From 0ef932afbc033abf97e4e3e288c144571f3154c3 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 25 Mar 2021 19:49:18 -0500 Subject: [PATCH 1/7] Create extract_obs_data function --- pymc3/aesaraf.py | 26 +++++++++++++++++++- pymc3/tests/test_aesaraf.py | 49 ++++++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/pymc3/aesaraf.py b/pymc3/aesaraf.py index 61ec4a9da6..a41e173a14 100644 --- a/pymc3/aesaraf.py +++ b/pymc3/aesaraf.py @@ -19,10 +19,12 @@ from aesara import scalar from aesara import tensor as aet from aesara.gradient import grad -from aesara.graph.basic import Apply, graph_inputs +from aesara.graph.basic import Apply, Constant, graph_inputs from aesara.graph.op import Op from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream from aesara.tensor.elemwise import Elemwise +from aesara.tensor.sharedvar import SharedVariable +from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1 from aesara.tensor.var import TensorVariable from pymc3.data import GeneratorAdapter @@ -48,6 +50,28 @@ ] +def extract_obs_data(x: TensorVariable) -> np.ndarray: + """Extract data observed symbolic variables. + + Raises + ------ + TypeError + + """ + if isinstance(x, Constant): + return x.data + if isinstance(x, SharedVariable): + return x.get_value() + if x.owner and isinstance(x.owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1)): + array_data = extract_obs_data(x.owner.inputs[0]) + mask_idx = tuple(extract_obs_data(i) for i in x.owner.inputs[2:]) + mask = np.zeros_like(array_data) + mask[mask_idx] = 1 + return np.ma.MaskedArray(array_data, mask) + + raise TypeError(f"Data cannot be extracted from {x}") + + def inputvars(a): """ Get the inputs into a aesara variables diff --git a/pymc3/tests/test_aesaraf.py b/pymc3/tests/test_aesaraf.py index 1b591e0a85..da1fe29689 100644 --- a/pymc3/tests/test_aesaraf.py +++ b/pymc3/tests/test_aesaraf.py @@ -19,9 +19,10 @@ import numpy as np import pytest +from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1 from aesara.tensor.type import TensorType -from pymc3.aesaraf import _conversion_map, take_along_axis +from pymc3.aesaraf import _conversion_map, extract_obs_data, take_along_axis from pymc3.vartypes import int_types FLOATX = str(aesara.config.floatX) @@ -225,3 +226,49 @@ def test_dtype_failure(self): indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=FLOATX) with pytest.raises(IndexError): take_along_axis(arr, indices) + + +def test_extract_obs_data(): + + with pytest.raises(TypeError): + extract_obs_data(aet.matrix()) + + data = np.random.normal(size=(2, 3)) + data_at = aet.as_tensor(data) + mask = np.random.binomial(1, 0.5, size=(2, 3)).astype(bool) + + for val_at in (data_at, aesara.shared(data)): + res = extract_obs_data(val_at) + + assert isinstance(res, np.ndarray) + assert np.array_equal(res, data) + + # AdvancedIncSubtensor check + data_m = np.ma.MaskedArray(data, mask) + missing_values = data_at.type()[mask] + constant = aet.as_tensor(data_m.filled()) + z_at = aet.set_subtensor(constant[mask.nonzero()], missing_values) + + assert isinstance(z_at.owner.op, AdvancedIncSubtensor) + + res = extract_obs_data(z_at) + + assert isinstance(res, np.ndarray) + assert np.ma.allequal(res, data_m) + + # AdvancedIncSubtensor1 check + data = np.random.normal(size=(3,)) + data_at = aet.as_tensor(data) + mask = np.random.binomial(1, 0.5, size=(3,)).astype(bool) + + data_m = np.ma.MaskedArray(data, mask) + missing_values = data_at.type()[mask] + constant = aet.as_tensor(data_m.filled()) + z_at = aet.set_subtensor(constant[mask.nonzero()], missing_values) + + assert isinstance(z_at.owner.op, AdvancedIncSubtensor1) + + res = extract_obs_data(z_at) + + assert isinstance(res, np.ndarray) + assert np.ma.allequal(res, data_m) From 371393685cde4091c436f3b448ab56d4046cb7a4 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 25 Mar 2021 21:50:49 -0500 Subject: [PATCH 2/7] Do not use shared variables as inputs during prior/posterior sampling --- pymc3/sampling.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index b6cb03bd44..0b016a674e 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -32,6 +32,7 @@ import packaging import xarray +from aesara.tensor.sharedvar import SharedVariable from arviz import InferenceData from fastprogress.fastprogress import progress_bar @@ -1730,7 +1731,9 @@ def sample_posterior_predictive( inputs_and_names = [ (rv, rv.name) for rv in rv_ancestors(vars_to_sample, walk_past_rvs=True) - if rv not in vars_to_sample and rv in model.named_vars.values() + if rv not in vars_to_sample + and rv in model.named_vars.values() + and not isinstance(rv, SharedVariable) ] if inputs_and_names: inputs, input_names = zip(*inputs_and_names) @@ -1738,7 +1741,11 @@ def sample_posterior_predictive( inputs, input_names = [], [] else: output_names = [v.name for v in vars_to_sample if v.name is not None] - input_names = [n for n in _trace.varnames if n not in output_names] + input_names = [ + n + for n in _trace.varnames + if n not in output_names and not isinstance(model[n], SharedVariable) + ] inputs = [model[n] for n in input_names] if size is not None: @@ -1987,7 +1994,7 @@ def sample_prior_predictive( names = get_default_varnames(vars_, include_transformed=False) vars_to_sample = [model[name] for name in names] - inputs = [i for i in inputvars(vars_to_sample)] + inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, SharedVariable)] sampler_fn = aesara.function( inputs, vars_to_sample, From 78ff887ec19bfb42acb9d40c7ac5bc839a663771 Mon Sep 17 00:00:00 2001 From: "Oriol (ZBook)" Date: Sat, 27 Feb 2021 00:57:26 +0200 Subject: [PATCH 3/7] Port InferenceData conversion code --- pymc3/__init__.py | 7 +- pymc3/backends/__init__.py | 1 + pymc3/backends/arviz.py | 674 +++++++++++++++++++++++++++ pymc3/sampling.py | 65 +-- pymc3/tests/test_idata_conversion.py | 653 ++++++++++++++++++++++++++ pymc3/tests/test_sampling.py | 12 +- requirements.txt | 2 +- 7 files changed, 1345 insertions(+), 69 deletions(-) create mode 100644 pymc3/backends/arviz.py create mode 100644 pymc3/tests/test_idata_conversion.py diff --git a/pymc3/__init__.py b/pymc3/__init__.py index 8f33feef09..d47e0fc692 100644 --- a/pymc3/__init__.py +++ b/pymc3/__init__.py @@ -40,7 +40,12 @@ def __set_compiler_flags(): from pymc3 import gp, ode, sampling from pymc3.aesaraf import * -from pymc3.backends import load_trace, save_trace +from pymc3.backends import ( + load_trace, + predictions_to_inference_data, + save_trace, + to_inference_data, +) from pymc3.backends.tracetab import * from pymc3.blocking import * from pymc3.data import * diff --git a/pymc3/backends/__init__.py b/pymc3/backends/__init__.py index 535e800ec0..f42dc5975e 100644 --- a/pymc3/backends/__init__.py +++ b/pymc3/backends/__init__.py @@ -60,6 +60,7 @@ Saved backends can be loaded using `arviz.from_netcdf` """ +from pymc3.backends.arviz import predictions_to_inference_data, to_inference_data from pymc3.backends.ndarray import ( NDArray, load_trace, diff --git a/pymc3/backends/arviz.py b/pymc3/backends/arviz.py new file mode 100644 index 0000000000..1073a0b930 --- /dev/null +++ b/pymc3/backends/arviz.py @@ -0,0 +1,674 @@ +"""PyMC3-ArviZ conversion code.""" +import logging +import warnings + +from typing import ( # pylint: disable=unused-import + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Mapping, + Optional, + Tuple, + Union, +) + +import numpy as np +import xarray as xr + +from aesara.graph.basic import Constant +from aesara.tensor.sharedvar import SharedVariable +from arviz import InferenceData, concat, rcParams +from arviz.data.base import CoordSpec, DimSpec, dict_to_dataset, requires + +import pymc3 + +from pymc3.aesaraf import extract_obs_data +from pymc3.distributions import logpt +from pymc3.model import modelcontext +from pymc3.util import get_default_varnames + +if TYPE_CHECKING: + from typing import Set # pylint: disable=ungrouped-imports + + from pymc3.backends.base import MultiTrace # pylint: disable=invalid-name + from pymc3.model import Model + +___all__ = [""] + +_log = logging.getLogger("pymc3") + +# random variable object ... +Var = Any # pylint: disable=invalid-name + + +class _DefaultTrace: + """ + Utility for collecting samples into a dictionary. + + Name comes from its similarity to ``defaultdict``: + entries are lazily created. + + Parameters + ---------- + samples : int + The number of samples that will be collected, per variable, + into the trace. + + Attributes + ---------- + trace_dict : Dict[str, np.ndarray] + A dictionary constituting a trace. Should be extracted + after a procedure has filled the `_DefaultTrace` using the + `insert()` method + """ + + trace_dict: Dict[str, np.ndarray] = {} + _len: Optional[int] = None + + def __init__(self, samples: int): + self._len = samples + self.trace_dict = {} + + def insert(self, k: str, v, idx: int): + """ + Insert `v` as the value of the `idx`th sample for the variable `k`. + + Parameters + ---------- + k: str + Name of the variable. + v: anything that can go into a numpy array (including a numpy array) + The value of the `idx`th sample from variable `k` + ids: int + The index of the sample we are inserting into the trace. + """ + value_shape = np.shape(v) + + # initialize if necessary + if k not in self.trace_dict: + array_shape = (self._len,) + value_shape + self.trace_dict[k] = np.empty(array_shape, dtype=np.array(v).dtype) + + # do the actual insertion + if value_shape == (): + self.trace_dict[k][idx] = v + else: + self.trace_dict[k][idx, :] = v + + +class InferenceDataConverter: # pylint: disable=too-many-instance-attributes + """Encapsulate InferenceData specific logic.""" + + model = None # type: Optional[Model] + nchains = None # type: int + ndraws = None # type: int + posterior_predictive = None # Type: Optional[Mapping[str, np.ndarray]] + predictions = None # Type: Optional[Mapping[str, np.ndarray]] + prior = None # Type: Optional[Mapping[str, np.ndarray]] + + def __init__( + self, + *, + trace=None, + prior=None, + posterior_predictive=None, + log_likelihood=True, + predictions=None, + coords: Optional[CoordSpec] = None, + dims: Optional[DimSpec] = None, + model=None, + save_warmup: Optional[bool] = None, + density_dist_obs: bool = True, + index_origin: Optional[int] = None, + ): + + self.save_warmup = rcParams["data.save_warmup"] if save_warmup is None else save_warmup + self.trace = trace + + # this permits us to get the model from command-line argument or from with model: + try: + self.model = modelcontext(model) + except TypeError: + self.model = None + + self.attrs = None + if trace is not None: + self.nchains = trace.nchains if hasattr(trace, "nchains") else 1 + if hasattr(trace.report, "n_draws") and trace.report.n_draws is not None: + self.ndraws = trace.report.n_draws + self.attrs = { + "sampling_time": trace.report.t_sampling, + "tuning_steps": trace.report.n_tune, + } + else: + self.ndraws = len(trace) + if self.save_warmup: + warnings.warn( + "Warmup samples will be stored in posterior group and will not be" + " excluded from stats and diagnostics." + " Do not slice the trace manually before conversion", + UserWarning, + ) + self.ntune = len(self.trace) - self.ndraws + self.posterior_trace, self.warmup_trace = self.split_trace() + else: + self.nchains = self.ndraws = 0 + + self.prior = prior + self.posterior_predictive = posterior_predictive + self.log_likelihood = log_likelihood + self.predictions = predictions + self.index_origin = rcParams["data.index_origin"] if index_origin is None else index_origin + + def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray: + return next(iter(dct.values())) + + if trace is None: + # if you have a posterior_predictive built with keep_dims, + # you'll lose here, but there's nothing I can do about that. + self.nchains = 1 + get_from = None + if predictions is not None: + get_from = predictions + elif posterior_predictive is not None: + get_from = posterior_predictive + elif prior is not None: + get_from = prior + if get_from is None: + # pylint: disable=line-too-long + raise ValueError( + "When constructing InferenceData must have at least" + " one of trace, prior, posterior_predictive or predictions." + ) + + aelem = arbitrary_element(get_from) + self.ndraws = aelem.shape[0] + + self.coords = {} if coords is None else coords + if hasattr(self.model, "coords"): + self.coords = {**self.model.coords, **self.coords} + + self.dims = {} if dims is None else dims + if hasattr(self.model, "RV_dims"): + model_dims = {k: list(v) for k, v in self.model.RV_dims.items()} + self.dims = {**model_dims, **self.dims} + + self.density_dist_obs = density_dist_obs + self.observations, self.multi_observations = self.find_observations() + + def find_observations(self) -> Tuple[Optional[Dict[str, Var]], Optional[Dict[str, Var]]]: + """If there are observations available, return them as a dictionary.""" + if self.model is None: + return (None, None) + observations = {} + multi_observations = {} + for obs in self.model.observed_RVs: + aux_obs = getattr(obs.tag, "observations", None) + if aux_obs is not None: + try: + obs_data = extract_obs_data(aux_obs) + observations[obs.name] = obs_data + except TypeError: + warnings.warn(f"Could not extract data from symbolic observation {obs}") + else: + warnings.warn(f"No data for observation {obs}") + + return observations, multi_observations + + def split_trace(self) -> Tuple[Union[None, "MultiTrace"], Union[None, "MultiTrace"]]: + """Split MultiTrace object into posterior and warmup. + + Returns + ------- + trace_posterior: MultiTrace or None + The slice of the trace corresponding to the posterior. If the posterior + trace is empty, None is returned + trace_warmup: MultiTrace or None + The slice of the trace corresponding to the warmup. If the warmup trace is + empty or ``save_warmup=False``, None is returned + """ + trace_posterior = None + trace_warmup = None + if self.save_warmup and self.ntune > 0: + trace_warmup = self.trace[: self.ntune] + if self.ndraws > 0: + trace_posterior = self.trace[self.ntune :] + return trace_posterior, trace_warmup + + def log_likelihood_vals_point(self, point, var, log_like_fun): + """Compute log likelihood for each observed point.""" + # TODO: This is a cheap hack; we should filter-out the correct + # variables some other way + point = {i.name: point[i.name] for i in log_like_fun.f.maker.inputs if i.name in point} + log_like_val = np.atleast_1d(log_like_fun(point)) + if var.tag.missing_values: + try: + obs_data = extract_obs_data(var) + except TypeError: + warnings.warn(f"Could not extract data from symbolic observation {var}") + + mask = obs_data.mask + if np.ndim(mask) > np.ndim(log_like_val): + mask = np.any(mask, axis=-1) + log_like_val = np.where(mask, np.nan, log_like_val) + return log_like_val + + def _extract_log_likelihood(self, trace): + """Compute log likelihood of each observation.""" + if self.trace is None: + return None + if self.model is None: + return None + + if self.log_likelihood is True: + cached = [(var, self.model.fn(logpt(var))) for var in self.model.observed_RVs] + else: + cached = [ + (var, self.model.fn(logpt(var))) + for var in self.model.observed_RVs + if var.name in self.log_likelihood + ] + log_likelihood_dict = _DefaultTrace(len(trace.chains)) + for var, log_like_fun in cached: + for k, chain in enumerate(trace.chains): + log_like_chain = [ + self.log_likelihood_vals_point(point, var, log_like_fun) + for point in trace.points([chain]) + ] + log_likelihood_dict.insert(var.name, np.stack(log_like_chain), k) + return log_likelihood_dict.trace_dict + + @requires("trace") + def posterior_to_xarray(self): + """Convert the posterior to an xarray dataset.""" + var_names = get_default_varnames(self.trace.varnames, include_transformed=False) + data = {} + data_warmup = {} + for var_name in var_names: + if self.warmup_trace: + data_warmup[var_name] = np.array( + self.warmup_trace.get_values(var_name, combine=False, squeeze=False) + ) + if self.posterior_trace: + data[var_name] = np.array( + self.posterior_trace.get_values(var_name, combine=False, squeeze=False) + ) + return ( + dict_to_dataset( + data, + library=pymc3, + coords=self.coords, + dims=self.dims, + attrs=self.attrs, + index_origin=self.index_origin, + ), + dict_to_dataset( + data_warmup, + library=pymc3, + coords=self.coords, + dims=self.dims, + attrs=self.attrs, + index_origin=self.index_origin, + ), + ) + + @requires("trace") + def sample_stats_to_xarray(self): + """Extract sample_stats from PyMC3 trace.""" + data = {} + rename_key = { + "model_logp": "lp", + "mean_tree_accept": "acceptance_rate", + "depth": "tree_depth", + "tree_size": "n_steps", + } + data = {} + data_warmup = {} + for stat in self.trace.stat_names: + name = rename_key.get(stat, stat) + if name == "tune": + continue + if self.warmup_trace: + data_warmup[name] = np.array( + self.warmup_trace.get_sampler_stats(stat, combine=False) + ) + if self.posterior_trace: + data[name] = np.array(self.posterior_trace.get_sampler_stats(stat, combine=False)) + + return ( + dict_to_dataset( + data, + library=pymc3, + dims=None, + coords=self.coords, + attrs=self.attrs, + index_origin=self.index_origin, + ), + dict_to_dataset( + data_warmup, + library=pymc3, + dims=None, + coords=self.coords, + attrs=self.attrs, + index_origin=self.index_origin, + ), + ) + + @requires("trace") + @requires("model") + def log_likelihood_to_xarray(self): + """Extract log likelihood and log_p data from PyMC3 trace.""" + if self.predictions or not self.log_likelihood: + return None + data_warmup = {} + data = {} + warn_msg = ( + "Could not compute log_likelihood, it will be omitted. " + "Check your model object or set log_likelihood=False" + ) + if self.posterior_trace: + try: + data = self._extract_log_likelihood(self.posterior_trace) + except TypeError: + warnings.warn(warn_msg) + if self.warmup_trace: + try: + data_warmup = self._extract_log_likelihood(self.warmup_trace) + except TypeError: + warnings.warn(warn_msg) + return ( + dict_to_dataset( + data, + library=pymc3, + dims=self.dims, + coords=self.coords, + skip_event_dims=True, + index_origin=self.index_origin, + ), + dict_to_dataset( + data_warmup, + library=pymc3, + dims=self.dims, + coords=self.coords, + skip_event_dims=True, + index_origin=self.index_origin, + ), + ) + + def translate_posterior_predictive_dict_to_xarray(self, dct) -> xr.Dataset: + """Take Dict of variables to numpy ndarrays (samples) and translate into dataset.""" + data = {} + for k, ary in dct.items(): + shape = ary.shape + if shape[0] == self.nchains and shape[1] == self.ndraws: + data[k] = ary + elif shape[0] == self.nchains * self.ndraws: + data[k] = ary.reshape((self.nchains, self.ndraws, *shape[1:])) + else: + data[k] = np.expand_dims(ary, 0) + # pylint: disable=line-too-long + _log.warning( + "posterior predictive variable %s's shape not compatible with number of chains and draws. " + "This can mean that some draws or even whole chains are not represented.", + k, + ) + return dict_to_dataset( + data, library=pymc3, coords=self.coords, dims=self.dims, index_origin=self.index_origin + ) + + @requires(["posterior_predictive"]) + def posterior_predictive_to_xarray(self): + """Convert posterior_predictive samples to xarray.""" + return self.translate_posterior_predictive_dict_to_xarray(self.posterior_predictive) + + @requires(["predictions"]) + def predictions_to_xarray(self): + """Convert predictions (out of sample predictions) to xarray.""" + return self.translate_posterior_predictive_dict_to_xarray(self.predictions) + + def priors_to_xarray(self): + """Convert prior samples (and if possible prior predictive too) to xarray.""" + if self.prior is None: + return {"prior": None, "prior_predictive": None} + if self.observations is not None: + prior_predictive_vars = list(self.observations.keys()) + prior_vars = [key for key in self.prior.keys() if key not in prior_predictive_vars] + else: + prior_vars = list(self.prior.keys()) + prior_predictive_vars = None + + priors_dict = {} + for group, var_names in zip( + ("prior", "prior_predictive"), (prior_vars, prior_predictive_vars) + ): + priors_dict[group] = ( + None + if var_names is None + else dict_to_dataset( + {k: np.expand_dims(self.prior[k], 0) for k in var_names}, + library=pymc3, + coords=self.coords, + dims=self.dims, + index_origin=self.index_origin, + ) + ) + return priors_dict + + @requires(["observations", "multi_observations"]) + @requires("model") + def observed_data_to_xarray(self): + """Convert observed data to xarray.""" + if self.predictions: + return None + return dict_to_dataset( + {**self.observations, **self.multi_observations}, + library=pymc3, + coords=self.coords, + dims=self.dims, + default_dims=[], + index_origin=self.index_origin, + ) + + @requires(["trace", "predictions"]) + @requires("model") + def constant_data_to_xarray(self): + """Convert constant data to xarray.""" + # For constant data, we are concerned only with deterministics and + # data. The constant data vars must be either pm.Data + # (TensorSharedVariable) or pm.Deterministic + constant_data_vars = {} # type: Dict[str, Var] + + def is_data(name, var) -> bool: + assert self.model is not None + return ( + var not in self.model.deterministics + and var not in self.model.observed_RVs + and var not in self.model.free_RVs + and var not in self.model.potentials + and (self.observations is None or name not in self.observations) + and isinstance(var, (Constant, SharedVariable)) + ) + + # I don't know how to find pm.Data, except that they are named + # variables that aren't observed or free RVs, nor are they + # deterministics, and then we eliminate observations. + for name, var in self.model.named_vars.items(): + if is_data(name, var): + constant_data_vars[name] = var + + if not constant_data_vars: + return None + + constant_data = {} + for name, vals in constant_data_vars.items(): + if hasattr(vals, "get_value"): + vals = vals.get_value() + elif hasattr(vals, "data"): + vals = vals.data + constant_data[name] = vals + + return dict_to_dataset( + constant_data, + library=pymc3, + coords=self.coords, + dims=self.dims, + default_dims=[], + index_origin=self.index_origin, + ) + + def to_inference_data(self): + """Convert all available data to an InferenceData object. + + Note that if groups can not be created (e.g., there is no `trace`, so + the `posterior` and `sample_stats` can not be extracted), then the InferenceData + will not have those groups. + """ + id_dict = { + "posterior": self.posterior_to_xarray(), + "sample_stats": self.sample_stats_to_xarray(), + "log_likelihood": self.log_likelihood_to_xarray(), + "posterior_predictive": self.posterior_predictive_to_xarray(), + "predictions": self.predictions_to_xarray(), + **self.priors_to_xarray(), + "observed_data": self.observed_data_to_xarray(), + } + if self.predictions: + id_dict["predictions_constant_data"] = self.constant_data_to_xarray() + else: + id_dict["constant_data"] = self.constant_data_to_xarray() + return InferenceData(save_warmup=self.save_warmup, **id_dict) + + +def to_inference_data( + trace: Optional["MultiTrace"] = None, + *, + prior: Optional[Dict[str, Any]] = None, + posterior_predictive: Optional[Dict[str, Any]] = None, + log_likelihood: Union[bool, Iterable[str]] = True, + coords: Optional[CoordSpec] = None, + dims: Optional[DimSpec] = None, + model: Optional["Model"] = None, + save_warmup: Optional[bool] = None, + density_dist_obs: bool = True, +) -> InferenceData: + """Convert pymc3 data into an InferenceData object. + + All three of them are optional arguments, but at least one of ``trace``, + ``prior`` and ``posterior_predictive`` must be present. + For a usage example read the + :ref:`Creating InferenceData section on from_pymc3 ` + + Parameters + ---------- + trace : MultiTrace, optional + Trace generated from MCMC sampling. Output of + :func:`~pymc3.sampling.sample`. + prior : dict, optional + Dictionary with the variable names as keys, and values numpy arrays + containing prior and prior predictive samples. + posterior_predictive : dict, optional + Dictionary with the variable names as keys, and values numpy arrays + containing posterior predictive samples. + log_likelihood : bool or array_like of str, optional + List of variables to calculate `log_likelihood`. Defaults to True which calculates + `log_likelihood` for all observed variables. If set to False, log_likelihood is skipped. + coords : dict of {str: array-like}, optional + Map of coordinate names to coordinate values + dims : dict of {str: list of str}, optional + Map of variable names to the coordinate names to use to index its dimensions. + model : Model, optional + Model used to generate ``trace``. It is not necessary to pass ``model`` if in + ``with`` context. + save_warmup : bool, optional + Save warmup iterations InferenceData object. If not defined, use default + defined by the rcParams. + density_dist_obs : bool, default True + Store variables passed with ``observed`` arg to + :class:`~pymc.distributions.DensityDist` in the generated InferenceData. + + Returns + ------- + arviz.InferenceData + """ + if isinstance(trace, InferenceData): + return trace + + return InferenceDataConverter( + trace=trace, + prior=prior, + posterior_predictive=posterior_predictive, + log_likelihood=log_likelihood, + coords=coords, + dims=dims, + model=model, + save_warmup=save_warmup, + density_dist_obs=density_dist_obs, + ).to_inference_data() + + +### Later I could have this return ``None`` if the ``idata_orig`` argument is supplied. But +### perhaps we should have an inplace argument? +def predictions_to_inference_data( + predictions, + posterior_trace: Optional["MultiTrace"] = None, + model: Optional["Model"] = None, + coords: Optional[CoordSpec] = None, + dims: Optional[DimSpec] = None, + idata_orig: Optional[InferenceData] = None, + inplace: bool = False, +) -> InferenceData: + """Translate out-of-sample predictions into ``InferenceData``. + + Parameters + ---------- + predictions: Dict[str, np.ndarray] + The predictions are the return value of :func:`~pymc3.sample_posterior_predictive`, + a dictionary of strings (variable names) to numpy ndarrays (draws). + posterior_trace: MultiTrace + This should be a trace that has been thinned appropriately for + ``pymc3.sample_posterior_predictive``. Specifically, any variable whose shape is + a deterministic function of the shape of any predictor (explanatory, independent, etc.) + variables must be *removed* from this trace. + model: Model + The pymc3 model. It can be ommited if within a model context. + coords: Dict[str, array-like[Any]] + Coordinates for the variables. Map from coordinate names to coordinate values. + dims: Dict[str, array-like[str]] + Map from variable name to ordered set of coordinate names. + idata_orig: InferenceData, optional + If supplied, then modify this inference data in place, adding ``predictions`` and + (if available) ``predictions_constant_data`` groups. If this is not supplied, make a + fresh InferenceData + inplace: boolean, optional + If idata_orig is supplied and inplace is True, merge the predictions into idata_orig, + rather than returning a fresh InferenceData object. + + Returns + ------- + InferenceData: + May be modified ``idata_orig``. + """ + if inplace and not idata_orig: + raise ValueError( + "Do not pass True for inplace unless passing" "an existing InferenceData as idata_orig" + ) + new_idata = InferenceDataConverter( + trace=posterior_trace, + predictions=predictions, + model=model, + coords=coords, + dims=dims, + log_likelihood=False, + ).to_inference_data() + if idata_orig is None: + return new_idata + elif inplace: + concat([idata_orig, new_idata], dim=None, inplace=True) + return idata_orig + else: + # if we are not returning in place, then merge the old groups into the new inference + # data and return that. + concat([new_idata, idata_orig], dim=None, copy=True, inplace=True) + return new_idata diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 0b016a674e..06574f0508 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -27,7 +27,6 @@ import aesara import aesara.gradient as tg -import arviz import numpy as np import packaging import xarray @@ -39,6 +38,7 @@ import pymc3 as pm from pymc3.aesaraf import inputvars +from pymc3.backends.arviz import _DefaultTrace from pymc3.backends.base import BaseTrace, MultiTrace from pymc3.backends.ndarray import NDArray from pymc3.blocking import DictToArrayBijection @@ -345,7 +345,7 @@ def sample( Whether to return the trace as an :class:`arviz:arviz.InferenceData` (True) object or a `MultiTrace` (False) Defaults to `False`, but we'll switch to `True` in an upcoming release. idata_kwargs : dict, optional - Keyword arguments for :func:`arviz:arviz.from_pymc3` + Keyword arguments for :func:`pymc3.to_inference_data` mp_ctx : multiprocessing.context.BaseContent A multiprocessing context for parallel sampling. See multiprocessing documentation for details. @@ -636,12 +636,10 @@ def sample( idata = None if compute_convergence_checks or return_inferencedata: - # XXX: Arviz `log_likelihood` calculations need to be disabled until - # it's updated to work with v4. - ikwargs = dict(model=model, save_warmup=not discard_tuned_samples, log_likelihood=False) + ikwargs = dict(model=model, save_warmup=not discard_tuned_samples) if idata_kwargs: ikwargs.update(idata_kwargs) - idata = arviz.from_pymc3(trace, **ikwargs) + idata = pm.to_inference_data(trace, **ikwargs) if compute_convergence_checks: if draws - tune < 100: @@ -1550,61 +1548,6 @@ def stop_tuning(step): return step -class _DefaultTrace: - """ - Utility for collecting samples into a dictionary. - - Name comes from its similarity to ``defaultdict``: - entries are lazily created. - - Parameters - ---------- - samples : int - The number of samples that will be collected, per variable, - into the trace. - - Attributes - ---------- - trace_dict : Dict[str, np.ndarray] - A dictionary constituting a trace. Should be extracted - after a procedure has filled the `_DefaultTrace` using the - `insert()` method - """ - - trace_dict: Dict[str, np.ndarray] = {} - _len: Optional[int] = None - - def __init__(self, samples: int): - self._len = samples - self.trace_dict = {} - - def insert(self, k: str, v, idx: int): - """ - Insert `v` as the value of the `idx`th sample for the variable `k`. - - Parameters - ---------- - k: str - Name of the variable. - v: anything that can go into a numpy array (including a numpy array) - The value of the `idx`th sample from variable `k` - ids: int - The index of the sample we are inserting into the trace. - """ - value_shape = np.shape(v) - - # initialize if necessary - if k not in self.trace_dict: - array_shape = (self._len,) + value_shape - self.trace_dict[k] = np.empty(array_shape, dtype=np.array(v).dtype) - - # do the actual insertion - if value_shape == (): - self.trace_dict[k][idx] = v - else: - self.trace_dict[k][idx, :] = v - - def sample_posterior_predictive( trace, samples: Optional[int] = None, diff --git a/pymc3/tests/test_idata_conversion.py b/pymc3/tests/test_idata_conversion.py new file mode 100644 index 0000000000..e098fa5db9 --- /dev/null +++ b/pymc3/tests/test_idata_conversion.py @@ -0,0 +1,653 @@ +# pylint: disable=no-member, invalid-name, redefined-outer-name, protected-access, too-many-public-methods +from typing import Dict, Tuple + +import numpy as np +import pandas as pd +import pytest + +from arviz import InferenceData +from arviz.tests.helpers import check_multiple_attrs +from numpy import ma + +import pymc3 as pm + +from pymc3.backends.arviz import predictions_to_inference_data, to_inference_data + + +@pytest.fixture(scope="module") +def eight_schools_params(): + """Share setup for eight schools.""" + return { + "J": 8, + "y": np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0]), + "sigma": np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0]), + } + + +@pytest.fixture(scope="module") +def draws(): + """Share default draw count.""" + return 500 + + +@pytest.fixture(scope="module") +def chains(): + """Share default chain count.""" + return 2 + + +class TestDataPyMC3: + class Data: + def __init__(self, model, trace): + self.model = model + self.obj = trace + + @pytest.fixture(scope="class") + def data(self, eight_schools_params, draws, chains): + with pm.Model() as model: + mu = pm.Normal("mu", mu=0, sd=5) + tau = pm.HalfCauchy("tau", beta=5) + eta = pm.Normal("eta", mu=0, sd=1, size=eight_schools_params["J"]) + theta = pm.Deterministic("theta", mu + tau * eta) + pm.Normal( + "obs", + mu=theta, + sd=eight_schools_params["sigma"], + observed=eight_schools_params["y"], + ) + trace = pm.sample(draws, chains=chains) + + return self.Data(model, trace) + + def get_inference_data(self, data, eight_schools_params): + with data.model: + prior = pm.sample_prior_predictive() + posterior_predictive = pm.sample_posterior_predictive(data.obj) + + return ( + to_inference_data( + trace=data.obj, + prior=prior, + posterior_predictive=posterior_predictive, + coords={"school": np.arange(eight_schools_params["J"])}, + dims={"theta": ["school"], "eta": ["school"]}, + model=data.model, + ), + posterior_predictive, + ) + + def get_predictions_inference_data( + self, data, eight_schools_params, inplace + ) -> Tuple[InferenceData, Dict[str, np.ndarray]]: + with data.model: + prior = pm.sample_prior_predictive() + posterior_predictive = pm.sample_posterior_predictive(data.obj) + + idata = to_inference_data( + trace=data.obj, + prior=prior, + coords={"school": np.arange(eight_schools_params["J"])}, + dims={"theta": ["school"], "eta": ["school"]}, + ) + assert isinstance(idata, InferenceData) + extended = predictions_to_inference_data( + posterior_predictive, idata_orig=idata, inplace=inplace + ) + assert isinstance(extended, InferenceData) + assert (id(idata) == id(extended)) == inplace + return (extended, posterior_predictive) + + def make_predictions_inference_data( + self, data, eight_schools_params + ) -> Tuple[InferenceData, Dict[str, np.ndarray]]: + with data.model: + posterior_predictive = pm.sample_posterior_predictive(data.obj) + idata = predictions_to_inference_data( + posterior_predictive, + posterior_trace=data.obj, + coords={"school": np.arange(eight_schools_params["J"])}, + dims={"theta": ["school"], "eta": ["school"]}, + ) + assert isinstance(idata, InferenceData) + return idata, posterior_predictive + + def test_to_idata(self, data, eight_schools_params, chains, draws): + inference_data, posterior_predictive = self.get_inference_data(data, eight_schools_params) + test_dict = { + "posterior": ["mu", "tau", "eta", "theta"], + "sample_stats": ["diverging", "lp", "~log_likelihood"], + "log_likelihood": ["obs"], + "posterior_predictive": ["obs"], + "prior": ["mu", "tau", "eta", "theta"], + "prior_predictive": ["obs"], + "observed_data": ["obs"], + } + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + for key, values in posterior_predictive.items(): + ivalues = inference_data.posterior_predictive[key] + for chain in range(chains): + assert np.all( + np.isclose(ivalues[chain], values[chain * draws : (chain + 1) * draws]) + ) + + def test_predictions_to_idata(self, data, eight_schools_params): + "Test that we can add predictions to a previously-existing InferenceData." + test_dict = { + "posterior": ["mu", "tau", "eta", "theta"], + "sample_stats": ["diverging", "lp"], + "log_likelihood": ["obs"], + "predictions": ["obs"], + "prior": ["mu", "tau", "eta", "theta"], + "observed_data": ["obs"], + } + + # check adding non-destructively + inference_data, posterior_predictive = self.get_predictions_inference_data( + data, eight_schools_params, False + ) + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + for key, values in posterior_predictive.items(): + ivalues = inference_data.predictions[key] + assert ivalues.shape[0] == 1 # one chain in predictions + assert np.all(np.isclose(ivalues[0], values)) + + # check adding in place + inference_data, posterior_predictive = self.get_predictions_inference_data( + data, eight_schools_params, True + ) + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + for key, values in posterior_predictive.items(): + ivalues = inference_data.predictions[key] + assert ivalues.shape[0] == 1 # one chain in predictions + assert np.all(np.isclose(ivalues[0], values)) + + def test_predictions_to_idata_new(self, data, eight_schools_params): + # check creating new + inference_data, posterior_predictive = self.make_predictions_inference_data( + data, eight_schools_params + ) + test_dict = { + "posterior": ["mu", "tau", "eta", "theta"], + "predictions": ["obs"], + "~observed_data": "", + } + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + for key, values in posterior_predictive.items(): + ivalues = inference_data.predictions[key] + # could the following better be done by simply flattening both the ivalues + # and the values? + if len(ivalues.shape) == 3: + ivalues_arr = np.reshape( + ivalues.values, (ivalues.shape[0] * ivalues.shape[1], ivalues.shape[2]) + ) + elif len(ivalues.shape) == 2: + ivalues_arr = np.reshape(ivalues.values, (ivalues.shape[0] * ivalues.shape[1])) + else: + raise ValueError(f"Unexpected values shape for variable {key}") + assert (ivalues.shape[0] == 2) and (ivalues.shape[1] == 500) + assert values.shape[0] == 1000 + assert np.all(np.isclose(ivalues_arr, values)) + + def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params): + with data.model: + posterior_predictive = pm.sample_posterior_predictive(data.obj, keep_size=True) + inference_data = to_inference_data( + trace=data.obj, + posterior_predictive=posterior_predictive, + coords={"school": np.arange(eight_schools_params["J"])}, + dims={"theta": ["school"], "eta": ["school"]}, + ) + + shape = inference_data.posterior_predictive.obs.shape + assert np.all( + [obs_s == s for obs_s, s in zip(shape, (chains, draws, eight_schools_params["J"]))] + ) + + def test_posterior_predictive_warning(self, data, eight_schools_params, caplog): + with data.model: + posterior_predictive = pm.sample_posterior_predictive(data.obj, 370) + inference_data = to_inference_data( + trace=data.obj, + posterior_predictive=posterior_predictive, + coords={"school": np.arange(eight_schools_params["J"])}, + dims={"theta": ["school"], "eta": ["school"]}, + ) + + records = caplog.records + shape = inference_data.posterior_predictive.obs.shape + assert np.all([obs_s == s for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))]) + assert len(records) == 1 + assert records[0].levelname == "WARNING" + + @pytest.mark.xfail(reason="Dims option is not supported yet") + @pytest.mark.parametrize("use_context", [True, False]) + def test_autodetect_coords_from_model(self, use_context): + df_data = pd.DataFrame(columns=["date"]).set_index("date") + dates = pd.date_range(start="2020-05-01", end="2020-05-20") + for city, mu in {"Berlin": 15, "San Marino": 18, "Paris": 16}.items(): + df_data[city] = np.random.normal(loc=mu, size=len(dates)) + df_data.index = dates + df_data.index.name = "date" + + coords = {"date": df_data.index, "city": df_data.columns} + with pm.Model(coords=coords) as model: + europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sd=3.0) + city_offset = pm.Normal("city_offset", mu=0.0, sd=3.0, dims="city") + city_temperature = pm.Deterministic( + "city_temperature", europe_mean + city_offset, dims="city" + ) + + data_dims = ("date", "city") + data = pm.Data("data", df_data, dims=data_dims) + _ = pm.Normal("likelihood", mu=city_temperature, sd=0.5, observed=data, dims=data_dims) + + trace = pm.sample( + return_inferencedata=False, + compute_convergence_checks=False, + cores=1, + chains=1, + tune=20, + draws=30, + step=pm.Metropolis(), + ) + if use_context: + idata = to_inference_data(trace=trace) + if not use_context: + idata = to_inference_data(trace=trace, model=model) + + assert "city" in list(idata.posterior.dims) + assert "city" in list(idata.observed_data.dims) + assert "date" in list(idata.observed_data.dims) + + np.testing.assert_array_equal(idata.posterior.coords["city"], coords["city"]) + np.testing.assert_array_equal(idata.observed_data.coords["date"], coords["date"]) + np.testing.assert_array_equal(idata.observed_data.coords["city"], coords["city"]) + + @pytest.mark.xfail(reason="Dims option is not supported yet") + def test_ovewrite_model_coords_dims(self): + """Check coords and dims from model object can be partially overwrited.""" + dim1 = ["a", "b"] + new_dim1 = ["c", "d"] + coords = {"dim1": dim1, "dim2": ["c1", "c2"]} + x_data = np.arange(4).reshape((2, 2)) + y = x_data + np.random.normal(size=(2, 2)) + with pm.Model(coords=coords): + x = pm.Data("x", x_data, dims=("dim1", "dim2")) + beta = pm.Normal("beta", 0, 1, dims="dim1") + _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2")) + trace = pm.sample(100, tune=100) + idata1 = to_inference_data(trace) + idata2 = to_inference_data(trace, coords={"dim1": new_dim1}, dims={"beta": ["dim2"]}) + + test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} + fails1 = check_multiple_attrs(test_dict, idata1) + assert not fails1 + fails2 = check_multiple_attrs(test_dict, idata2) + assert not fails2 + assert "dim1" in list(idata1.posterior.beta.dims) + assert "dim2" in list(idata2.posterior.beta.dims) + assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1)) + assert np.all(idata1.constant_data.x.dim2.values == np.array(["c1", "c2"])) + assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1)) + assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"])) + + @pytest.mark.xfail(reason="Missing data not refactored for v4") + def test_missing_data_model(self): + # source pymc3/pymc3/tests/test_missing.py + data = ma.masked_values([1, 2, -1, 4, -1], value=-1) + model = pm.Model() + with model: + x = pm.Normal("x", 1, 1) + pm.Normal("y", x, 1, observed=data) + inference_data = pm.sample(100, chains=2, return_inferencedata=True) + + # make sure that data is really missing + (y_missing,) = model.missing_values + # TODO: Test values aren't enabled anymore + assert y_missing.tag.test_value.shape == (2,) + + test_dict = {"posterior": ["x"], "observed_data": ["y"], "log_likelihood": ["y"]} + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + + @pytest.mark.xfail(reason="LKJCholeskyCov not refactored for v4") + @pytest.mark.xfail(reason="Missing data not refactored for v4") + def test_mv_missing_data_model(self): + data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1) + + model = pm.Model() + with model: + mu = pm.Normal("mu", 0, 1, size=2) + sd_dist = pm.HalfNormal.dist(1.0) + chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True) + pm.MvNormal("y", mu=mu, chol=chol, observed=data) + inference_data = pm.sample(100, chains=2, return_inferencedata=True) + + # make sure that data is really missing + (y_missing,) = model.missing_values + # TODO: Test values aren't enabled anymore + assert y_missing.tag.test_value.shape == (4,) + + test_dict = { + "posterior": ["mu", "chol_cov"], + "observed_data": ["y"], + "log_likelihood": ["y"], + } + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + + @pytest.mark.parametrize("log_likelihood", [True, False, ["y1"]]) + def test_multiple_observed_rv(self, log_likelihood): + y1_data = np.random.randn(10) + y2_data = np.random.randn(100) + with pm.Model(): + x = pm.Normal("x", 1, 1) + pm.Normal("y1", x, 1, observed=y1_data) + pm.Normal("y2", x, 1, observed=y2_data) + inference_data = pm.sample( + 100, + chains=2, + return_inferencedata=True, + idata_kwargs={"log_likelihood": log_likelihood}, + ) + test_dict = { + "posterior": ["x"], + "observed_data": ["y1", "y2"], + "log_likelihood": ["y1", "y2"], + "sample_stats": ["diverging", "lp", "~log_likelihood"], + } + if not log_likelihood: + test_dict.pop("log_likelihood") + test_dict["~log_likelihood"] = [] + if isinstance(log_likelihood, list): + test_dict["log_likelihood"] = ["y1", "~y2"] + + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + + @pytest.mark.xfail(reason="DensityDist not refactored for v4") + def test_multiple_observed_rv_without_observations(self): + with pm.Model(): + mu = pm.Normal("mu") + x = pm.DensityDist( # pylint: disable=unused-variable + "x", logpt(pm.Normal.dist(mu, 1.0)), observed={"value": 0.1} + ) + inference_data = pm.sample(100, chains=2, return_inferencedata=True) + test_dict = { + "posterior": ["mu"], + "sample_stats": ["lp"], + "log_likelihood": ["x"], + "observed_data": ["value", "~x"], + } + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + assert inference_data.observed_data.value.dtype.kind == "f" + + @pytest.mark.xfail(reason="DensityDist not refactored for v4") + @pytest.mark.parametrize("multiobs", (True, False)) + def test_multiobservedrv_to_observed_data(self, multiobs): + # fake regression data, with weights (W) + np.random.seed(2019) + N = 100 + X = np.random.uniform(size=N) + W = 1 + np.random.poisson(size=N) + a, b = 5, 17 + Y = a + np.random.normal(b * X) + + with pm.Model(): + a = pm.Normal("a", 0, 10) + b = pm.Normal("b", 0, 10) + mu = a + b * X + sigma = pm.HalfNormal("sigma", 1) + + def weighted_normal(y, w): + return w * logpt(pm.Normal.dist(mu=mu, sd=sigma), y) + + y_logp = pm.DensityDist( # pylint: disable=unused-variable + "y_logp", weighted_normal, observed={"y": Y, "w": W} + ) + idata = pm.sample( + 20, tune=20, return_inferencedata=True, idata_kwargs={"density_dist_obs": multiobs} + ) + multiobs_str = "" if multiobs else "~" + test_dict = { + "posterior": ["a", "b", "sigma"], + "sample_stats": ["lp"], + "log_likelihood": ["y_logp"], + f"{multiobs_str}observed_data": ["y", "w"], + } + fails = check_multiple_attrs(test_dict, idata) + assert not fails + if multiobs: + assert idata.observed_data.y.dtype.kind == "f" + + def test_single_observation(self): + with pm.Model(): + p = pm.Uniform("p", 0, 1) + pm.Binomial("w", p=p, n=2, observed=1) + inference_data = pm.sample(500, chains=2, return_inferencedata=True) + + assert inference_data + + @pytest.mark.xfail(reason="Potential not refactored for v4") + def test_potential(self): + with pm.Model(): + x = pm.Normal("x", 0.0, 1.0) + pm.Potential("z", logpt(pm.Normal.dist(x, 1.0), np.random.randn(10))) + inference_data = pm.sample(100, chains=2, return_inferencedata=True) + + assert inference_data + + @pytest.mark.parametrize("use_context", [True, False]) + def test_constant_data(self, use_context): + """Test constant_data group behaviour.""" + with pm.Model() as model: + x = pm.Data("x", [1.0, 2.0, 3.0]) + y = pm.Data("y", [1.0, 2.0, 3.0]) + beta = pm.Normal("beta", 0, 1) + obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable + trace = pm.sample(100, tune=100) + if use_context: + inference_data = to_inference_data(trace=trace) + + if not use_context: + inference_data = to_inference_data(trace=trace, model=model) + test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + + def test_predictions_constant_data(self): + with pm.Model(): + x = pm.Data("x", [1.0, 2.0, 3.0]) + y = pm.Data("y", [1.0, 2.0, 3.0]) + beta = pm.Normal("beta", 0, 1) + obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable + trace = pm.sample(100, tune=100) + inference_data = to_inference_data(trace) + + test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + + with pm.Model(): + x = pm.Data("x", [1.0, 2.0]) + y = pm.Data("y", [1.0, 2.0]) + beta = pm.Normal("beta", 0, 1) + obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable + predictive_trace = pm.sample_posterior_predictive(inference_data) + assert set(predictive_trace.keys()) == {"obs"} + # this should be four chains of 100 samples + # assert predictive_trace["obs"].shape == (400, 2) + # but the shape seems to vary between pymc3 versions + inference_data = predictions_to_inference_data(predictive_trace, posterior_trace=trace) + test_dict = {"posterior": ["beta"], "~observed_data": ""} + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails, "Posterior data not copied over as expected." + test_dict = {"predictions": ["obs"]} + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails, "Predictions not instantiated as expected." + test_dict = {"predictions_constant_data": ["x"]} + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails, "Predictions constant data not instantiated as expected." + + def test_no_trace(self): + with pm.Model() as model: + x = pm.Data("x", [1.0, 2.0, 3.0]) + y = pm.Data("y", [1.0, 2.0, 3.0]) + beta = pm.Normal("beta", 0, 1) + obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable + trace = pm.sample(100, tune=100) + prior = pm.sample_prior_predictive() + posterior_predictive = pm.sample_posterior_predictive(trace) + + # Only prior + inference_data = to_inference_data(prior=prior, model=model) + test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]} + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + # Only posterior_predictive + inference_data = to_inference_data(posterior_predictive=posterior_predictive, model=model) + test_dict = {"posterior_predictive": ["obs"]} + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + # Prior and posterior_predictive but no trace + inference_data = to_inference_data( + prior=prior, posterior_predictive=posterior_predictive, model=model + ) + test_dict = { + "prior": ["beta"], + "prior_predictive": ["obs"], + "posterior_predictive": ["obs"], + } + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + + @pytest.mark.parametrize("use_context", [True, False]) + def test_priors_separation(self, use_context): + """Test model is enough to get prior, prior predictive and observed_data.""" + with pm.Model() as model: + x = pm.Data("x", [1.0, 2.0, 3.0]) + y = pm.Data("y", [1.0, 2.0, 3.0]) + beta = pm.Normal("beta", 0, 1) + obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable + prior = pm.sample_prior_predictive() + + test_dict = { + "prior": ["beta", "~obs"], + "observed_data": ["obs"], + "prior_predictive": ["obs"], + } + if use_context: + with model: + inference_data = to_inference_data(prior=prior) + else: + inference_data = to_inference_data(prior=prior, model=model) + fails = check_multiple_attrs(test_dict, inference_data) + assert not fails + + @pytest.mark.xfail(reason="Dims option is not supported yet") + def test_multivariate_observations(self): + coords = {"direction": ["x", "y", "z"], "experiment": np.arange(20)} + data = np.random.multinomial(20, [0.2, 0.3, 0.5], size=20) + with pm.Model(coords=coords): + p = pm.Beta("p", 1, 1, size=(3,)) + pm.Multinomial("y", 20, p, dims=("experiment", "direction"), observed=data) + idata = pm.sample(draws=50, tune=100, return_inferencedata=True) + test_dict = { + "posterior": ["p"], + "sample_stats": ["lp"], + "log_likelihood": ["y"], + "observed_data": ["y"], + } + fails = check_multiple_attrs(test_dict, idata) + assert not fails + assert "direction" not in idata.log_likelihood.dims + assert "direction" in idata.observed_data.dims + + +class TestPyMC3WarmupHandling: + @pytest.mark.skipif( + not hasattr(pm.backends.base.SamplerReport, "n_draws"), + reason="requires pymc3 3.9 or higher", + ) + @pytest.mark.parametrize("save_warmup", [False, True]) + @pytest.mark.parametrize("chains", [1, 2]) + @pytest.mark.parametrize("tune,draws", [(0, 50), (10, 40), (30, 0)]) + def test_save_warmup(self, save_warmup, chains, tune, draws): + with pm.Model(): + pm.Uniform("u1") + pm.Normal("n1") + idata = pm.sample( + tune=tune, + draws=draws, + chains=chains, + cores=1, + step=pm.Metropolis(), + discard_tuned_samples=False, + return_inferencedata=True, + idata_kwargs={"save_warmup": save_warmup}, + ) + warmup_prefix = "" if save_warmup and (tune > 0) else "~" + post_prefix = "" if draws > 0 else "~" + test_dict = { + f"{post_prefix}posterior": ["u1", "n1"], + f"{post_prefix}sample_stats": ["~tune", "accept"], + f"{warmup_prefix}warmup_posterior": ["u1", "n1"], + f"{warmup_prefix}warmup_sample_stats": ["~tune"], + "~warmup_log_likelihood": [], + "~log_likelihood": [], + } + fails = check_multiple_attrs(test_dict, idata) + assert not fails + if hasattr(idata, "posterior"): + assert idata.posterior.dims["chain"] == chains + assert idata.posterior.dims["draw"] == draws + if hasattr(idata, "warmup_posterior"): + assert idata.warmup_posterior.dims["chain"] == chains + assert idata.warmup_posterior.dims["draw"] == tune + + def test_save_warmup_issue_1208_after_3_9(self): + with pm.Model(): + pm.Uniform("u1") + pm.Normal("n1") + trace = pm.sample( + tune=100, + draws=200, + chains=2, + cores=1, + step=pm.Metropolis(), + discard_tuned_samples=False, + ) + assert isinstance(trace, pm.backends.base.MultiTrace) + assert len(trace) == 300 + + # from original trace, warmup draws should be separated out + idata = to_inference_data(trace, save_warmup=True) + test_dict = { + "posterior": ["u1", "n1"], + "sample_stats": ["~tune", "accept"], + "warmup_posterior": ["u1", "n1"], + "warmup_sample_stats": ["~tune", "accept"], + } + fails = check_multiple_attrs(test_dict, idata) + assert not fails + assert idata.posterior.dims["chain"] == 2 + assert idata.posterior.dims["draw"] == 200 + + # manually sliced trace triggers the same warning as <=3.8 + with pytest.warns(UserWarning, match="Warmup samples"): + idata = to_inference_data(trace[-30:], save_warmup=True) + test_dict = { + "posterior": ["u1", "n1"], + "sample_stats": ["~tune", "accept"], + "~warmup_posterior": [], + "~warmup_sample_stats": [], + } + fails = check_multiple_attrs(test_dict, idata) + assert not fails + assert idata.posterior.dims["chain"] == 2 + assert idata.posterior.dims["draw"] == 30 diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 150675b43c..25363d2e99 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -471,7 +471,7 @@ def test_normal_scalar_idata(self): with model: # test keep_size parameter and idata input - idata = az.from_pymc3(trace) + idata = pm.to_inference_data(trace) ppc = pm.sample_posterior_predictive(idata, keep_size=True) assert ppc["a"].shape == (nchains, ndraws) @@ -514,7 +514,7 @@ def test_normal_vector_idata(self, caplog): with model: # test keep_size parameter with inference data as input... - idata = az.from_pymc3(trace) + idata = pm.to_inference_data(trace) ppc = pm.sample_posterior_predictive(idata, keep_size=True) assert ppc["a"].shape == (trace.nchains, len(trace), 2) @@ -720,14 +720,14 @@ def test_sample_posterior_predictive_w(self): y = pm.Normal("y", mu=mu, sigma=1, observed=data0) with pytest.warns(UserWarning, match=warning_msg): trace_0 = pm.sample(10, tune=0, chains=2, return_inferencedata=False) - idata_0 = az.from_pymc3(trace_0, log_likelihood=False) + idata_0 = pm.to_inference_data(trace_0, log_likelihood=False) with pm.Model() as model_1: mu = pm.Normal("mu", mu=0, sigma=1, size=len(data0)) y = pm.Normal("y", mu=mu, sigma=1, observed=data0) with pytest.warns(UserWarning, match=warning_msg): trace_1 = pm.sample(10, tune=0, chains=2, return_inferencedata=False) - idata_1 = az.from_pymc3(trace_1, log_likelihood=False) + idata_1 = pm.to_inference_data(trace_1, log_likelihood=False) with pm.Model() as model_2: # Model with no observed RVs. @@ -1037,13 +1037,13 @@ def test_sample_from_xarray_prior(self, point_list_arg_bug_fixture): with pmodel: prior = pm.sample_prior_predictive(samples=20) - idat = az.from_pymc3(trace, prior=prior) + idat = pm.to_inference_data(trace, prior=prior) with pmodel: pp = pm.sample_posterior_predictive(idat.prior, var_names=["d"]) @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture - idat = az.from_pymc3(trace) + idat = pm.to_inference_data(trace) with pmodel: pp = pm.sample_posterior_predictive(idat.posterior, var_names=["d"]) diff --git a/requirements.txt b/requirements.txt index 9ec84e7538..c9ecc6e1a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ aesara>=2.0.1 -arviz>=0.11.1 +arviz>=0.11.2 dill fastprogress>=0.2.0 numpy>=1.15.0 From e01a473f41c3d293f4644d0017df9e3df593a9af Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 25 Mar 2021 18:51:00 -0500 Subject: [PATCH 4/7] Disable dims, default_dims, and index_origin options until arviz > v0.11.2 --- pymc3/backends/arviz.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/pymc3/backends/arviz.py b/pymc3/backends/arviz.py index 1073a0b930..3be8a9986b 100644 --- a/pymc3/backends/arviz.py +++ b/pymc3/backends/arviz.py @@ -302,7 +302,7 @@ def posterior_to_xarray(self): coords=self.coords, dims=self.dims, attrs=self.attrs, - index_origin=self.index_origin, + # index_origin=self.index_origin, ), dict_to_dataset( data_warmup, @@ -310,7 +310,7 @@ def posterior_to_xarray(self): coords=self.coords, dims=self.dims, attrs=self.attrs, - index_origin=self.index_origin, + # index_origin=self.index_origin, ), ) @@ -344,7 +344,7 @@ def sample_stats_to_xarray(self): dims=None, coords=self.coords, attrs=self.attrs, - index_origin=self.index_origin, + # index_origin=self.index_origin, ), dict_to_dataset( data_warmup, @@ -352,7 +352,7 @@ def sample_stats_to_xarray(self): dims=None, coords=self.coords, attrs=self.attrs, - index_origin=self.index_origin, + # index_origin=self.index_origin, ), ) @@ -385,7 +385,7 @@ def log_likelihood_to_xarray(self): dims=self.dims, coords=self.coords, skip_event_dims=True, - index_origin=self.index_origin, + # index_origin=self.index_origin, ), dict_to_dataset( data_warmup, @@ -393,7 +393,7 @@ def log_likelihood_to_xarray(self): dims=self.dims, coords=self.coords, skip_event_dims=True, - index_origin=self.index_origin, + # index_origin=self.index_origin, ), ) @@ -415,7 +415,11 @@ def translate_posterior_predictive_dict_to_xarray(self, dct) -> xr.Dataset: k, ) return dict_to_dataset( - data, library=pymc3, coords=self.coords, dims=self.dims, index_origin=self.index_origin + data, + library=pymc3, + coords=self.coords, + # dims=self.dims, + # index_origin=self.index_origin ) @requires(["posterior_predictive"]) @@ -450,8 +454,8 @@ def priors_to_xarray(self): {k: np.expand_dims(self.prior[k], 0) for k in var_names}, library=pymc3, coords=self.coords, - dims=self.dims, - index_origin=self.index_origin, + # dims=self.dims, + # index_origin=self.index_origin, ) ) return priors_dict @@ -466,9 +470,9 @@ def observed_data_to_xarray(self): {**self.observations, **self.multi_observations}, library=pymc3, coords=self.coords, - dims=self.dims, - default_dims=[], - index_origin=self.index_origin, + # dims=self.dims, + # default_dims=[], + # index_origin=self.index_origin, ) @requires(["trace", "predictions"]) @@ -513,9 +517,9 @@ def is_data(name, var) -> bool: constant_data, library=pymc3, coords=self.coords, - dims=self.dims, - default_dims=[], - index_origin=self.index_origin, + # dims=self.dims, + # default_dims=[], + # index_origin=self.index_origin, ) def to_inference_data(self): From 1a604c354b3879bed20226ab76cf67a17949aa75 Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Thu, 25 Mar 2021 20:09:16 -0500 Subject: [PATCH 5/7] Re-enable Arviz tests in pymc3.tests.test_sampling --- pymc3/tests/test_sampling.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 25363d2e99..41cb200450 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -20,12 +20,13 @@ import aesara import aesara.tensor as aet -import arviz as az import numpy as np import numpy.testing as npt import pytest from aesara import shared +from arviz import InferenceData +from arviz import from_dict as az_from_dict from scipy import stats import pymc3 as pm @@ -200,7 +201,7 @@ def test_return_inferencedata(self, monkeypatch): # inferencedata with tuning result = pm.sample(**kwargs, return_inferencedata=True, discard_tuned_samples=False) - assert isinstance(result, az.InferenceData) + assert isinstance(result, InferenceData) assert result.posterior.sizes["draw"] == 100 assert result.posterior.sizes["chain"] == 2 assert len(result._groups_warmup) > 0 @@ -215,7 +216,7 @@ def test_return_inferencedata(self, monkeypatch): random_seed=-1 ) assert "prior" in result - assert isinstance(result, az.InferenceData) + assert isinstance(result, InferenceData) assert result.posterior.sizes["draw"] == 100 assert result.posterior.sizes["chain"] == 2 assert len(result._groups_warmup) == 0 @@ -458,7 +459,6 @@ def test_normal_scalar(self): ppc = pm.sample_posterior_predictive(trace, size=5, var_names=["a"]) assert ppc["a"].shape == (nchains * ndraws, 5) - @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_normal_scalar_idata(self): nchains = 2 ndraws = 500 @@ -466,12 +466,19 @@ def test_normal_scalar_idata(self): mu = pm.Normal("mu", 0.0, 1.0) a = pm.Normal("a", mu=mu, sigma=1, observed=0.0) trace = pm.sample( - draws=ndraws, chains=nchains, return_inferencedata=True, discard_tuned_samples=False + draws=ndraws, + chains=nchains, + return_inferencedata=False, + discard_tuned_samples=False, ) + assert not isinstance(trace, InferenceData) + with model: # test keep_size parameter and idata input idata = pm.to_inference_data(trace) + assert isinstance(idata, InferenceData) + ppc = pm.sample_posterior_predictive(idata, keep_size=True) assert ppc["a"].shape == (nchains, ndraws) @@ -505,16 +512,19 @@ def test_normal_vector(self, caplog): assert "a" in ppc assert ppc["a"].shape == (10, 4, 2) - @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_normal_vector_idata(self, caplog): with pm.Model() as model: mu = pm.Normal("mu", 0.0, 1.0) a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2])) trace = pm.sample(return_inferencedata=False) + assert not isinstance(trace, InferenceData) + with model: # test keep_size parameter with inference data as input... idata = pm.to_inference_data(trace) + assert isinstance(idata, InferenceData) + ppc = pm.sample_posterior_predictive(idata, keep_size=True) assert ppc["a"].shape == (trace.nchains, len(trace), 2) @@ -703,7 +713,7 @@ def test_potentials_warning(self): p = pm.Potential("p", a + 1) obs = pm.Normal("obs", a, 1, observed=5) - trace = az.from_dict({"a": np.random.rand(10)}) + trace = az_from_dict({"a": np.random.rand(10)}) with m: with pytest.warns(UserWarning, match=warning_msg): pm.sample_posterior_predictive(trace, samples=5) @@ -768,7 +778,7 @@ def test_potentials_warning(self): p = pm.Potential("p", a + 1) obs = pm.Normal("obs", a, 1, observed=5) - trace = az.from_dict({"a": np.random.rand(10)}) + trace = az_from_dict({"a": np.random.rand(10)}) with pytest.warns(UserWarning, match=warning_msg): pm.sample_posterior_predictive_w(samples=5, traces=[trace, trace], models=[m, m]) @@ -1031,17 +1041,17 @@ def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture): with pmodel: pp = pm.sample_posterior_predictive([trace[15]], var_names=["d"]) - @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_sample_from_xarray_prior(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture with pmodel: prior = pm.sample_prior_predictive(samples=20) + idat = pm.to_inference_data(trace, prior=prior) + with pmodel: pp = pm.sample_posterior_predictive(idat.prior, var_names=["d"]) - @pytest.mark.xfail(reason="Arviz not refactored for v4") def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture idat = pm.to_inference_data(trace) From d5726e793871d14648c78d66a432d0f258cfa924 Mon Sep 17 00:00:00 2001 From: "Oriol (ZBook)" Date: Fri, 26 Mar 2021 06:13:11 +0200 Subject: [PATCH 6/7] add workaround for data groups until next arviz release --- pymc3/backends/arviz.py | 82 +++++++++++++++++++--------- pymc3/tests/test_idata_conversion.py | 4 -- 2 files changed, 55 insertions(+), 31 deletions(-) diff --git a/pymc3/backends/arviz.py b/pymc3/backends/arviz.py index 3be8a9986b..cc2b1937c5 100644 --- a/pymc3/backends/arviz.py +++ b/pymc3/backends/arviz.py @@ -20,7 +20,9 @@ from aesara.graph.basic import Constant from aesara.tensor.sharedvar import SharedVariable from arviz import InferenceData, concat, rcParams -from arviz.data.base import CoordSpec, DimSpec, dict_to_dataset, requires +from arviz.data.base import CoordSpec, DimSpec +from arviz.data.base import dict_to_dataset as _dict_to_dataset +from arviz.data.base import generate_dims_coords, make_attrs, requires import pymc3 @@ -98,6 +100,37 @@ def insert(self, k: str, v, idx: int): self.trace_dict[k][idx, :] = v +def dict_to_dataset( + data, + library=None, + coords=None, + dims=None, + attrs=None, + default_dims=None, + skip_event_dims=None, + index_origin=None, +): + """Temporal workaround for dict_to_dataset. + + Once ArviZ>0.11.2 release is available, only two changes are needed for everything to work. + 1) this should be deleted, 2) dict_to_dataset should be imported as is from arviz, no underscore, + also remove unnecessary imports + """ + if default_dims is None: + return _dict_to_dataset( + data, library=library, coords=coords, dims=dims, skip_event_dims=skip_event_dims + ) + else: + out_data = {} + for name, vals in data.items(): + vals = np.atleast_1d(vals) + val_dims = dims.get(name) + val_dims, coords = generate_dims_coords(vals.shape, name, dims=val_dims, coords=coords) + coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims} + out_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords) + return xr.Dataset(data_vars=out_data, attrs=make_attrs(library=library)) + + class InferenceDataConverter: # pylint: disable=too-many-instance-attributes """Encapsulate InferenceData specific logic.""" @@ -196,14 +229,13 @@ def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray: self.dims = {**model_dims, **self.dims} self.density_dist_obs = density_dist_obs - self.observations, self.multi_observations = self.find_observations() + self.observations = self.find_observations() - def find_observations(self) -> Tuple[Optional[Dict[str, Var]], Optional[Dict[str, Var]]]: + def find_observations(self) -> Optional[Dict[str, Var]]: """If there are observations available, return them as a dictionary.""" if self.model is None: - return (None, None) + return None observations = {} - multi_observations = {} for obs in self.model.observed_RVs: aux_obs = getattr(obs.tag, "observations", None) if aux_obs is not None: @@ -215,7 +247,7 @@ def find_observations(self) -> Tuple[Optional[Dict[str, Var]], Optional[Dict[str else: warnings.warn(f"No data for observation {obs}") - return observations, multi_observations + return observations def split_trace(self) -> Tuple[Union[None, "MultiTrace"], Union[None, "MultiTrace"]]: """Split MultiTrace object into posterior and warmup. @@ -302,7 +334,7 @@ def posterior_to_xarray(self): coords=self.coords, dims=self.dims, attrs=self.attrs, - # index_origin=self.index_origin, + index_origin=self.index_origin, ), dict_to_dataset( data_warmup, @@ -310,7 +342,7 @@ def posterior_to_xarray(self): coords=self.coords, dims=self.dims, attrs=self.attrs, - # index_origin=self.index_origin, + index_origin=self.index_origin, ), ) @@ -344,7 +376,7 @@ def sample_stats_to_xarray(self): dims=None, coords=self.coords, attrs=self.attrs, - # index_origin=self.index_origin, + index_origin=self.index_origin, ), dict_to_dataset( data_warmup, @@ -352,7 +384,7 @@ def sample_stats_to_xarray(self): dims=None, coords=self.coords, attrs=self.attrs, - # index_origin=self.index_origin, + index_origin=self.index_origin, ), ) @@ -385,7 +417,7 @@ def log_likelihood_to_xarray(self): dims=self.dims, coords=self.coords, skip_event_dims=True, - # index_origin=self.index_origin, + index_origin=self.index_origin, ), dict_to_dataset( data_warmup, @@ -393,7 +425,7 @@ def log_likelihood_to_xarray(self): dims=self.dims, coords=self.coords, skip_event_dims=True, - # index_origin=self.index_origin, + index_origin=self.index_origin, ), ) @@ -415,11 +447,7 @@ def translate_posterior_predictive_dict_to_xarray(self, dct) -> xr.Dataset: k, ) return dict_to_dataset( - data, - library=pymc3, - coords=self.coords, - # dims=self.dims, - # index_origin=self.index_origin + data, library=pymc3, coords=self.coords, dims=self.dims, index_origin=self.index_origin ) @requires(["posterior_predictive"]) @@ -454,25 +482,25 @@ def priors_to_xarray(self): {k: np.expand_dims(self.prior[k], 0) for k in var_names}, library=pymc3, coords=self.coords, - # dims=self.dims, - # index_origin=self.index_origin, + dims=self.dims, + index_origin=self.index_origin, ) ) return priors_dict - @requires(["observations", "multi_observations"]) + @requires("observations") @requires("model") def observed_data_to_xarray(self): """Convert observed data to xarray.""" if self.predictions: return None return dict_to_dataset( - {**self.observations, **self.multi_observations}, + self.observations, library=pymc3, coords=self.coords, - # dims=self.dims, - # default_dims=[], - # index_origin=self.index_origin, + dims=self.dims, + default_dims=[], + index_origin=self.index_origin, ) @requires(["trace", "predictions"]) @@ -517,9 +545,9 @@ def is_data(name, var) -> bool: constant_data, library=pymc3, coords=self.coords, - # dims=self.dims, - # default_dims=[], - # index_origin=self.index_origin, + dims=self.dims, + default_dims=[], + index_origin=self.index_origin, ) def to_inference_data(self): diff --git a/pymc3/tests/test_idata_conversion.py b/pymc3/tests/test_idata_conversion.py index e098fa5db9..0d720d7656 100644 --- a/pymc3/tests/test_idata_conversion.py +++ b/pymc3/tests/test_idata_conversion.py @@ -570,10 +570,6 @@ def test_multivariate_observations(self): class TestPyMC3WarmupHandling: - @pytest.mark.skipif( - not hasattr(pm.backends.base.SamplerReport, "n_draws"), - reason="requires pymc3 3.9 or higher", - ) @pytest.mark.parametrize("save_warmup", [False, True]) @pytest.mark.parametrize("chains", [1, 2]) @pytest.mark.parametrize("tune,draws", [(0, 50), (10, 40), (30, 0)]) From fa7607d66073b6d7c414cc8db19df5057f9b2b7d Mon Sep 17 00:00:00 2001 From: "Oriol (ZBook)" Date: Fri, 26 Mar 2021 06:13:52 +0200 Subject: [PATCH 7/7] activate arviz compat tests --- .github/workflows/arviz_compat.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/arviz_compat.yml b/.github/workflows/arviz_compat.yml index 6c5832b881..55405d0624 100644 --- a/.github/workflows/arviz_compat.yml +++ b/.github/workflows/arviz_compat.yml @@ -7,7 +7,6 @@ on: jobs: pytest: - if: false strategy: matrix: os: [ubuntu-latest, macos-latest]