From 0ef932afbc033abf97e4e3e288c144571f3154c3 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Thu, 25 Mar 2021 19:49:18 -0500
Subject: [PATCH 1/7] Create extract_obs_data function

---
 pymc3/aesaraf.py            | 26 +++++++++++++++++++-
 pymc3/tests/test_aesaraf.py | 49 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/pymc3/aesaraf.py b/pymc3/aesaraf.py
index 61ec4a9da6..a41e173a14 100644
--- a/pymc3/aesaraf.py
+++ b/pymc3/aesaraf.py
@@ -19,10 +19,12 @@
 from aesara import scalar
 from aesara import tensor as aet
 from aesara.gradient import grad
-from aesara.graph.basic import Apply, graph_inputs
+from aesara.graph.basic import Apply, Constant, graph_inputs
 from aesara.graph.op import Op
 from aesara.sandbox.rng_mrg import MRG_RandomStream as RandomStream
 from aesara.tensor.elemwise import Elemwise
+from aesara.tensor.sharedvar import SharedVariable
+from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
 from aesara.tensor.var import TensorVariable
 
 from pymc3.data import GeneratorAdapter
@@ -48,6 +50,28 @@
 ]
 
 
+def extract_obs_data(x: TensorVariable) -> np.ndarray:
+    """Extract data observed symbolic variables.
+
+    Raises
+    ------
+    TypeError
+
+    """
+    if isinstance(x, Constant):
+        return x.data
+    if isinstance(x, SharedVariable):
+        return x.get_value()
+    if x.owner and isinstance(x.owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1)):
+        array_data = extract_obs_data(x.owner.inputs[0])
+        mask_idx = tuple(extract_obs_data(i) for i in x.owner.inputs[2:])
+        mask = np.zeros_like(array_data)
+        mask[mask_idx] = 1
+        return np.ma.MaskedArray(array_data, mask)
+
+    raise TypeError(f"Data cannot be extracted from {x}")
+
+
 def inputvars(a):
     """
     Get the inputs into a aesara variables
diff --git a/pymc3/tests/test_aesaraf.py b/pymc3/tests/test_aesaraf.py
index 1b591e0a85..da1fe29689 100644
--- a/pymc3/tests/test_aesaraf.py
+++ b/pymc3/tests/test_aesaraf.py
@@ -19,9 +19,10 @@
 import numpy as np
 import pytest
 
+from aesara.tensor.subtensor import AdvancedIncSubtensor, AdvancedIncSubtensor1
 from aesara.tensor.type import TensorType
 
-from pymc3.aesaraf import _conversion_map, take_along_axis
+from pymc3.aesaraf import _conversion_map, extract_obs_data, take_along_axis
 from pymc3.vartypes import int_types
 
 FLOATX = str(aesara.config.floatX)
@@ -225,3 +226,49 @@ def test_dtype_failure(self):
         indices.tag.test_value = np.zeros((1,) * indices.ndim, dtype=FLOATX)
         with pytest.raises(IndexError):
             take_along_axis(arr, indices)
+
+
+def test_extract_obs_data():
+
+    with pytest.raises(TypeError):
+        extract_obs_data(aet.matrix())
+
+    data = np.random.normal(size=(2, 3))
+    data_at = aet.as_tensor(data)
+    mask = np.random.binomial(1, 0.5, size=(2, 3)).astype(bool)
+
+    for val_at in (data_at, aesara.shared(data)):
+        res = extract_obs_data(val_at)
+
+        assert isinstance(res, np.ndarray)
+        assert np.array_equal(res, data)
+
+    # AdvancedIncSubtensor check
+    data_m = np.ma.MaskedArray(data, mask)
+    missing_values = data_at.type()[mask]
+    constant = aet.as_tensor(data_m.filled())
+    z_at = aet.set_subtensor(constant[mask.nonzero()], missing_values)
+
+    assert isinstance(z_at.owner.op, AdvancedIncSubtensor)
+
+    res = extract_obs_data(z_at)
+
+    assert isinstance(res, np.ndarray)
+    assert np.ma.allequal(res, data_m)
+
+    # AdvancedIncSubtensor1 check
+    data = np.random.normal(size=(3,))
+    data_at = aet.as_tensor(data)
+    mask = np.random.binomial(1, 0.5, size=(3,)).astype(bool)
+
+    data_m = np.ma.MaskedArray(data, mask)
+    missing_values = data_at.type()[mask]
+    constant = aet.as_tensor(data_m.filled())
+    z_at = aet.set_subtensor(constant[mask.nonzero()], missing_values)
+
+    assert isinstance(z_at.owner.op, AdvancedIncSubtensor1)
+
+    res = extract_obs_data(z_at)
+
+    assert isinstance(res, np.ndarray)
+    assert np.ma.allequal(res, data_m)

From 371393685cde4091c436f3b448ab56d4046cb7a4 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Thu, 25 Mar 2021 21:50:49 -0500
Subject: [PATCH 2/7] Do not use shared variables as inputs during
 prior/posterior sampling

---
 pymc3/sampling.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index b6cb03bd44..0b016a674e 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -32,6 +32,7 @@
 import packaging
 import xarray
 
+from aesara.tensor.sharedvar import SharedVariable
 from arviz import InferenceData
 from fastprogress.fastprogress import progress_bar
 
@@ -1730,7 +1731,9 @@ def sample_posterior_predictive(
         inputs_and_names = [
             (rv, rv.name)
             for rv in rv_ancestors(vars_to_sample, walk_past_rvs=True)
-            if rv not in vars_to_sample and rv in model.named_vars.values()
+            if rv not in vars_to_sample
+            and rv in model.named_vars.values()
+            and not isinstance(rv, SharedVariable)
         ]
         if inputs_and_names:
             inputs, input_names = zip(*inputs_and_names)
@@ -1738,7 +1741,11 @@ def sample_posterior_predictive(
             inputs, input_names = [], []
     else:
         output_names = [v.name for v in vars_to_sample if v.name is not None]
-        input_names = [n for n in _trace.varnames if n not in output_names]
+        input_names = [
+            n
+            for n in _trace.varnames
+            if n not in output_names and not isinstance(model[n], SharedVariable)
+        ]
         inputs = [model[n] for n in input_names]
 
     if size is not None:
@@ -1987,7 +1994,7 @@ def sample_prior_predictive(
     names = get_default_varnames(vars_, include_transformed=False)
 
     vars_to_sample = [model[name] for name in names]
-    inputs = [i for i in inputvars(vars_to_sample)]
+    inputs = [i for i in inputvars(vars_to_sample) if not isinstance(i, SharedVariable)]
     sampler_fn = aesara.function(
         inputs,
         vars_to_sample,

From 78ff887ec19bfb42acb9d40c7ac5bc839a663771 Mon Sep 17 00:00:00 2001
From: "Oriol (ZBook)" <oriol.abril.pla@gmail.com>
Date: Sat, 27 Feb 2021 00:57:26 +0200
Subject: [PATCH 3/7] Port InferenceData conversion code

---
 pymc3/__init__.py                    |   7 +-
 pymc3/backends/__init__.py           |   1 +
 pymc3/backends/arviz.py              | 674 +++++++++++++++++++++++++++
 pymc3/sampling.py                    |  65 +--
 pymc3/tests/test_idata_conversion.py | 653 ++++++++++++++++++++++++++
 pymc3/tests/test_sampling.py         |  12 +-
 requirements.txt                     |   2 +-
 7 files changed, 1345 insertions(+), 69 deletions(-)
 create mode 100644 pymc3/backends/arviz.py
 create mode 100644 pymc3/tests/test_idata_conversion.py

diff --git a/pymc3/__init__.py b/pymc3/__init__.py
index 8f33feef09..d47e0fc692 100644
--- a/pymc3/__init__.py
+++ b/pymc3/__init__.py
@@ -40,7 +40,12 @@ def __set_compiler_flags():
 
 from pymc3 import gp, ode, sampling
 from pymc3.aesaraf import *
-from pymc3.backends import load_trace, save_trace
+from pymc3.backends import (
+    load_trace,
+    predictions_to_inference_data,
+    save_trace,
+    to_inference_data,
+)
 from pymc3.backends.tracetab import *
 from pymc3.blocking import *
 from pymc3.data import *
diff --git a/pymc3/backends/__init__.py b/pymc3/backends/__init__.py
index 535e800ec0..f42dc5975e 100644
--- a/pymc3/backends/__init__.py
+++ b/pymc3/backends/__init__.py
@@ -60,6 +60,7 @@
 Saved backends can be loaded using `arviz.from_netcdf`
 
 """
+from pymc3.backends.arviz import predictions_to_inference_data, to_inference_data
 from pymc3.backends.ndarray import (
     NDArray,
     load_trace,
diff --git a/pymc3/backends/arviz.py b/pymc3/backends/arviz.py
new file mode 100644
index 0000000000..1073a0b930
--- /dev/null
+++ b/pymc3/backends/arviz.py
@@ -0,0 +1,674 @@
+"""PyMC3-ArviZ conversion code."""
+import logging
+import warnings
+
+from typing import (  # pylint: disable=unused-import
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+import xarray as xr
+
+from aesara.graph.basic import Constant
+from aesara.tensor.sharedvar import SharedVariable
+from arviz import InferenceData, concat, rcParams
+from arviz.data.base import CoordSpec, DimSpec, dict_to_dataset, requires
+
+import pymc3
+
+from pymc3.aesaraf import extract_obs_data
+from pymc3.distributions import logpt
+from pymc3.model import modelcontext
+from pymc3.util import get_default_varnames
+
+if TYPE_CHECKING:
+    from typing import Set  # pylint: disable=ungrouped-imports
+
+    from pymc3.backends.base import MultiTrace  # pylint: disable=invalid-name
+    from pymc3.model import Model
+
+___all__ = [""]
+
+_log = logging.getLogger("pymc3")
+
+# random variable object ...
+Var = Any  # pylint: disable=invalid-name
+
+
+class _DefaultTrace:
+    """
+    Utility for collecting samples into a dictionary.
+
+    Name comes from its similarity to ``defaultdict``:
+    entries are lazily created.
+
+    Parameters
+    ----------
+    samples : int
+        The number of samples that will be collected, per variable,
+        into the trace.
+
+    Attributes
+    ----------
+    trace_dict : Dict[str, np.ndarray]
+        A dictionary constituting a trace.  Should be extracted
+        after a procedure has filled the `_DefaultTrace` using the
+        `insert()` method
+    """
+
+    trace_dict: Dict[str, np.ndarray] = {}
+    _len: Optional[int] = None
+
+    def __init__(self, samples: int):
+        self._len = samples
+        self.trace_dict = {}
+
+    def insert(self, k: str, v, idx: int):
+        """
+        Insert `v` as the value of the `idx`th sample for the variable `k`.
+
+        Parameters
+        ----------
+        k: str
+            Name of the variable.
+        v: anything that can go into a numpy array (including a numpy array)
+            The value of the `idx`th sample from variable `k`
+        ids: int
+            The index of the sample we are inserting into the trace.
+        """
+        value_shape = np.shape(v)
+
+        # initialize if necessary
+        if k not in self.trace_dict:
+            array_shape = (self._len,) + value_shape
+            self.trace_dict[k] = np.empty(array_shape, dtype=np.array(v).dtype)
+
+        # do the actual insertion
+        if value_shape == ():
+            self.trace_dict[k][idx] = v
+        else:
+            self.trace_dict[k][idx, :] = v
+
+
+class InferenceDataConverter:  # pylint: disable=too-many-instance-attributes
+    """Encapsulate InferenceData specific logic."""
+
+    model = None  # type: Optional[Model]
+    nchains = None  # type: int
+    ndraws = None  # type: int
+    posterior_predictive = None  # Type: Optional[Mapping[str, np.ndarray]]
+    predictions = None  # Type: Optional[Mapping[str, np.ndarray]]
+    prior = None  # Type: Optional[Mapping[str, np.ndarray]]
+
+    def __init__(
+        self,
+        *,
+        trace=None,
+        prior=None,
+        posterior_predictive=None,
+        log_likelihood=True,
+        predictions=None,
+        coords: Optional[CoordSpec] = None,
+        dims: Optional[DimSpec] = None,
+        model=None,
+        save_warmup: Optional[bool] = None,
+        density_dist_obs: bool = True,
+        index_origin: Optional[int] = None,
+    ):
+
+        self.save_warmup = rcParams["data.save_warmup"] if save_warmup is None else save_warmup
+        self.trace = trace
+
+        # this permits us to get the model from command-line argument or from with model:
+        try:
+            self.model = modelcontext(model)
+        except TypeError:
+            self.model = None
+
+        self.attrs = None
+        if trace is not None:
+            self.nchains = trace.nchains if hasattr(trace, "nchains") else 1
+            if hasattr(trace.report, "n_draws") and trace.report.n_draws is not None:
+                self.ndraws = trace.report.n_draws
+                self.attrs = {
+                    "sampling_time": trace.report.t_sampling,
+                    "tuning_steps": trace.report.n_tune,
+                }
+            else:
+                self.ndraws = len(trace)
+                if self.save_warmup:
+                    warnings.warn(
+                        "Warmup samples will be stored in posterior group and will not be"
+                        " excluded from stats and diagnostics."
+                        " Do not slice the trace manually before conversion",
+                        UserWarning,
+                    )
+            self.ntune = len(self.trace) - self.ndraws
+            self.posterior_trace, self.warmup_trace = self.split_trace()
+        else:
+            self.nchains = self.ndraws = 0
+
+        self.prior = prior
+        self.posterior_predictive = posterior_predictive
+        self.log_likelihood = log_likelihood
+        self.predictions = predictions
+        self.index_origin = rcParams["data.index_origin"] if index_origin is None else index_origin
+
+        def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray:
+            return next(iter(dct.values()))
+
+        if trace is None:
+            # if you have a posterior_predictive built with keep_dims,
+            # you'll lose here, but there's nothing I can do about that.
+            self.nchains = 1
+            get_from = None
+            if predictions is not None:
+                get_from = predictions
+            elif posterior_predictive is not None:
+                get_from = posterior_predictive
+            elif prior is not None:
+                get_from = prior
+            if get_from is None:
+                # pylint: disable=line-too-long
+                raise ValueError(
+                    "When constructing InferenceData must have at least"
+                    " one of trace, prior, posterior_predictive or predictions."
+                )
+
+            aelem = arbitrary_element(get_from)
+            self.ndraws = aelem.shape[0]
+
+        self.coords = {} if coords is None else coords
+        if hasattr(self.model, "coords"):
+            self.coords = {**self.model.coords, **self.coords}
+
+        self.dims = {} if dims is None else dims
+        if hasattr(self.model, "RV_dims"):
+            model_dims = {k: list(v) for k, v in self.model.RV_dims.items()}
+            self.dims = {**model_dims, **self.dims}
+
+        self.density_dist_obs = density_dist_obs
+        self.observations, self.multi_observations = self.find_observations()
+
+    def find_observations(self) -> Tuple[Optional[Dict[str, Var]], Optional[Dict[str, Var]]]:
+        """If there are observations available, return them as a dictionary."""
+        if self.model is None:
+            return (None, None)
+        observations = {}
+        multi_observations = {}
+        for obs in self.model.observed_RVs:
+            aux_obs = getattr(obs.tag, "observations", None)
+            if aux_obs is not None:
+                try:
+                    obs_data = extract_obs_data(aux_obs)
+                    observations[obs.name] = obs_data
+                except TypeError:
+                    warnings.warn(f"Could not extract data from symbolic observation {obs}")
+            else:
+                warnings.warn(f"No data for observation {obs}")
+
+        return observations, multi_observations
+
+    def split_trace(self) -> Tuple[Union[None, "MultiTrace"], Union[None, "MultiTrace"]]:
+        """Split MultiTrace object into posterior and warmup.
+
+        Returns
+        -------
+        trace_posterior: MultiTrace or None
+            The slice of the trace corresponding to the posterior. If the posterior
+            trace is empty, None is returned
+        trace_warmup: MultiTrace or None
+            The slice of the trace corresponding to the warmup. If the warmup trace is
+            empty or ``save_warmup=False``, None is returned
+        """
+        trace_posterior = None
+        trace_warmup = None
+        if self.save_warmup and self.ntune > 0:
+            trace_warmup = self.trace[: self.ntune]
+        if self.ndraws > 0:
+            trace_posterior = self.trace[self.ntune :]
+        return trace_posterior, trace_warmup
+
+    def log_likelihood_vals_point(self, point, var, log_like_fun):
+        """Compute log likelihood for each observed point."""
+        # TODO: This is a cheap hack; we should filter-out the correct
+        # variables some other way
+        point = {i.name: point[i.name] for i in log_like_fun.f.maker.inputs if i.name in point}
+        log_like_val = np.atleast_1d(log_like_fun(point))
+        if var.tag.missing_values:
+            try:
+                obs_data = extract_obs_data(var)
+            except TypeError:
+                warnings.warn(f"Could not extract data from symbolic observation {var}")
+
+            mask = obs_data.mask
+            if np.ndim(mask) > np.ndim(log_like_val):
+                mask = np.any(mask, axis=-1)
+            log_like_val = np.where(mask, np.nan, log_like_val)
+        return log_like_val
+
+    def _extract_log_likelihood(self, trace):
+        """Compute log likelihood of each observation."""
+        if self.trace is None:
+            return None
+        if self.model is None:
+            return None
+
+        if self.log_likelihood is True:
+            cached = [(var, self.model.fn(logpt(var))) for var in self.model.observed_RVs]
+        else:
+            cached = [
+                (var, self.model.fn(logpt(var)))
+                for var in self.model.observed_RVs
+                if var.name in self.log_likelihood
+            ]
+        log_likelihood_dict = _DefaultTrace(len(trace.chains))
+        for var, log_like_fun in cached:
+            for k, chain in enumerate(trace.chains):
+                log_like_chain = [
+                    self.log_likelihood_vals_point(point, var, log_like_fun)
+                    for point in trace.points([chain])
+                ]
+                log_likelihood_dict.insert(var.name, np.stack(log_like_chain), k)
+        return log_likelihood_dict.trace_dict
+
+    @requires("trace")
+    def posterior_to_xarray(self):
+        """Convert the posterior to an xarray dataset."""
+        var_names = get_default_varnames(self.trace.varnames, include_transformed=False)
+        data = {}
+        data_warmup = {}
+        for var_name in var_names:
+            if self.warmup_trace:
+                data_warmup[var_name] = np.array(
+                    self.warmup_trace.get_values(var_name, combine=False, squeeze=False)
+                )
+            if self.posterior_trace:
+                data[var_name] = np.array(
+                    self.posterior_trace.get_values(var_name, combine=False, squeeze=False)
+                )
+        return (
+            dict_to_dataset(
+                data,
+                library=pymc3,
+                coords=self.coords,
+                dims=self.dims,
+                attrs=self.attrs,
+                index_origin=self.index_origin,
+            ),
+            dict_to_dataset(
+                data_warmup,
+                library=pymc3,
+                coords=self.coords,
+                dims=self.dims,
+                attrs=self.attrs,
+                index_origin=self.index_origin,
+            ),
+        )
+
+    @requires("trace")
+    def sample_stats_to_xarray(self):
+        """Extract sample_stats from PyMC3 trace."""
+        data = {}
+        rename_key = {
+            "model_logp": "lp",
+            "mean_tree_accept": "acceptance_rate",
+            "depth": "tree_depth",
+            "tree_size": "n_steps",
+        }
+        data = {}
+        data_warmup = {}
+        for stat in self.trace.stat_names:
+            name = rename_key.get(stat, stat)
+            if name == "tune":
+                continue
+            if self.warmup_trace:
+                data_warmup[name] = np.array(
+                    self.warmup_trace.get_sampler_stats(stat, combine=False)
+                )
+            if self.posterior_trace:
+                data[name] = np.array(self.posterior_trace.get_sampler_stats(stat, combine=False))
+
+        return (
+            dict_to_dataset(
+                data,
+                library=pymc3,
+                dims=None,
+                coords=self.coords,
+                attrs=self.attrs,
+                index_origin=self.index_origin,
+            ),
+            dict_to_dataset(
+                data_warmup,
+                library=pymc3,
+                dims=None,
+                coords=self.coords,
+                attrs=self.attrs,
+                index_origin=self.index_origin,
+            ),
+        )
+
+    @requires("trace")
+    @requires("model")
+    def log_likelihood_to_xarray(self):
+        """Extract log likelihood and log_p data from PyMC3 trace."""
+        if self.predictions or not self.log_likelihood:
+            return None
+        data_warmup = {}
+        data = {}
+        warn_msg = (
+            "Could not compute log_likelihood, it will be omitted. "
+            "Check your model object or set log_likelihood=False"
+        )
+        if self.posterior_trace:
+            try:
+                data = self._extract_log_likelihood(self.posterior_trace)
+            except TypeError:
+                warnings.warn(warn_msg)
+        if self.warmup_trace:
+            try:
+                data_warmup = self._extract_log_likelihood(self.warmup_trace)
+            except TypeError:
+                warnings.warn(warn_msg)
+        return (
+            dict_to_dataset(
+                data,
+                library=pymc3,
+                dims=self.dims,
+                coords=self.coords,
+                skip_event_dims=True,
+                index_origin=self.index_origin,
+            ),
+            dict_to_dataset(
+                data_warmup,
+                library=pymc3,
+                dims=self.dims,
+                coords=self.coords,
+                skip_event_dims=True,
+                index_origin=self.index_origin,
+            ),
+        )
+
+    def translate_posterior_predictive_dict_to_xarray(self, dct) -> xr.Dataset:
+        """Take Dict of variables to numpy ndarrays (samples) and translate into dataset."""
+        data = {}
+        for k, ary in dct.items():
+            shape = ary.shape
+            if shape[0] == self.nchains and shape[1] == self.ndraws:
+                data[k] = ary
+            elif shape[0] == self.nchains * self.ndraws:
+                data[k] = ary.reshape((self.nchains, self.ndraws, *shape[1:]))
+            else:
+                data[k] = np.expand_dims(ary, 0)
+                # pylint: disable=line-too-long
+                _log.warning(
+                    "posterior predictive variable %s's shape not compatible with number of chains and draws. "
+                    "This can mean that some draws or even whole chains are not represented.",
+                    k,
+                )
+        return dict_to_dataset(
+            data, library=pymc3, coords=self.coords, dims=self.dims, index_origin=self.index_origin
+        )
+
+    @requires(["posterior_predictive"])
+    def posterior_predictive_to_xarray(self):
+        """Convert posterior_predictive samples to xarray."""
+        return self.translate_posterior_predictive_dict_to_xarray(self.posterior_predictive)
+
+    @requires(["predictions"])
+    def predictions_to_xarray(self):
+        """Convert predictions (out of sample predictions) to xarray."""
+        return self.translate_posterior_predictive_dict_to_xarray(self.predictions)
+
+    def priors_to_xarray(self):
+        """Convert prior samples (and if possible prior predictive too) to xarray."""
+        if self.prior is None:
+            return {"prior": None, "prior_predictive": None}
+        if self.observations is not None:
+            prior_predictive_vars = list(self.observations.keys())
+            prior_vars = [key for key in self.prior.keys() if key not in prior_predictive_vars]
+        else:
+            prior_vars = list(self.prior.keys())
+            prior_predictive_vars = None
+
+        priors_dict = {}
+        for group, var_names in zip(
+            ("prior", "prior_predictive"), (prior_vars, prior_predictive_vars)
+        ):
+            priors_dict[group] = (
+                None
+                if var_names is None
+                else dict_to_dataset(
+                    {k: np.expand_dims(self.prior[k], 0) for k in var_names},
+                    library=pymc3,
+                    coords=self.coords,
+                    dims=self.dims,
+                    index_origin=self.index_origin,
+                )
+            )
+        return priors_dict
+
+    @requires(["observations", "multi_observations"])
+    @requires("model")
+    def observed_data_to_xarray(self):
+        """Convert observed data to xarray."""
+        if self.predictions:
+            return None
+        return dict_to_dataset(
+            {**self.observations, **self.multi_observations},
+            library=pymc3,
+            coords=self.coords,
+            dims=self.dims,
+            default_dims=[],
+            index_origin=self.index_origin,
+        )
+
+    @requires(["trace", "predictions"])
+    @requires("model")
+    def constant_data_to_xarray(self):
+        """Convert constant data to xarray."""
+        # For constant data, we are concerned only with deterministics and
+        # data.  The constant data vars must be either pm.Data
+        # (TensorSharedVariable) or pm.Deterministic
+        constant_data_vars = {}  # type: Dict[str, Var]
+
+        def is_data(name, var) -> bool:
+            assert self.model is not None
+            return (
+                var not in self.model.deterministics
+                and var not in self.model.observed_RVs
+                and var not in self.model.free_RVs
+                and var not in self.model.potentials
+                and (self.observations is None or name not in self.observations)
+                and isinstance(var, (Constant, SharedVariable))
+            )
+
+        # I don't know how to find pm.Data, except that they are named
+        # variables that aren't observed or free RVs, nor are they
+        # deterministics, and then we eliminate observations.
+        for name, var in self.model.named_vars.items():
+            if is_data(name, var):
+                constant_data_vars[name] = var
+
+        if not constant_data_vars:
+            return None
+
+        constant_data = {}
+        for name, vals in constant_data_vars.items():
+            if hasattr(vals, "get_value"):
+                vals = vals.get_value()
+            elif hasattr(vals, "data"):
+                vals = vals.data
+            constant_data[name] = vals
+
+        return dict_to_dataset(
+            constant_data,
+            library=pymc3,
+            coords=self.coords,
+            dims=self.dims,
+            default_dims=[],
+            index_origin=self.index_origin,
+        )
+
+    def to_inference_data(self):
+        """Convert all available data to an InferenceData object.
+
+        Note that if groups can not be created (e.g., there is no `trace`, so
+        the `posterior` and `sample_stats` can not be extracted), then the InferenceData
+        will not have those groups.
+        """
+        id_dict = {
+            "posterior": self.posterior_to_xarray(),
+            "sample_stats": self.sample_stats_to_xarray(),
+            "log_likelihood": self.log_likelihood_to_xarray(),
+            "posterior_predictive": self.posterior_predictive_to_xarray(),
+            "predictions": self.predictions_to_xarray(),
+            **self.priors_to_xarray(),
+            "observed_data": self.observed_data_to_xarray(),
+        }
+        if self.predictions:
+            id_dict["predictions_constant_data"] = self.constant_data_to_xarray()
+        else:
+            id_dict["constant_data"] = self.constant_data_to_xarray()
+        return InferenceData(save_warmup=self.save_warmup, **id_dict)
+
+
+def to_inference_data(
+    trace: Optional["MultiTrace"] = None,
+    *,
+    prior: Optional[Dict[str, Any]] = None,
+    posterior_predictive: Optional[Dict[str, Any]] = None,
+    log_likelihood: Union[bool, Iterable[str]] = True,
+    coords: Optional[CoordSpec] = None,
+    dims: Optional[DimSpec] = None,
+    model: Optional["Model"] = None,
+    save_warmup: Optional[bool] = None,
+    density_dist_obs: bool = True,
+) -> InferenceData:
+    """Convert pymc3 data into an InferenceData object.
+
+    All three of them are optional arguments, but at least one of ``trace``,
+    ``prior`` and ``posterior_predictive`` must be present.
+    For a usage example read the
+    :ref:`Creating InferenceData section on from_pymc3 <creating_InferenceData>`
+
+    Parameters
+    ----------
+    trace : MultiTrace, optional
+        Trace generated from MCMC sampling. Output of
+        :func:`~pymc3.sampling.sample`.
+    prior : dict, optional
+        Dictionary with the variable names as keys, and values numpy arrays
+        containing prior and prior predictive samples.
+    posterior_predictive : dict, optional
+        Dictionary with the variable names as keys, and values numpy arrays
+        containing posterior predictive samples.
+    log_likelihood : bool or array_like of str, optional
+        List of variables to calculate `log_likelihood`. Defaults to True which calculates
+        `log_likelihood` for all observed variables. If set to False, log_likelihood is skipped.
+    coords : dict of {str: array-like}, optional
+        Map of coordinate names to coordinate values
+    dims : dict of {str: list of str}, optional
+        Map of variable names to the coordinate names to use to index its dimensions.
+    model : Model, optional
+        Model used to generate ``trace``. It is not necessary to pass ``model`` if in
+        ``with`` context.
+    save_warmup : bool, optional
+        Save warmup iterations InferenceData object. If not defined, use default
+        defined by the rcParams.
+    density_dist_obs : bool, default True
+        Store variables passed with ``observed`` arg to
+        :class:`~pymc.distributions.DensityDist` in the generated InferenceData.
+
+    Returns
+    -------
+    arviz.InferenceData
+    """
+    if isinstance(trace, InferenceData):
+        return trace
+
+    return InferenceDataConverter(
+        trace=trace,
+        prior=prior,
+        posterior_predictive=posterior_predictive,
+        log_likelihood=log_likelihood,
+        coords=coords,
+        dims=dims,
+        model=model,
+        save_warmup=save_warmup,
+        density_dist_obs=density_dist_obs,
+    ).to_inference_data()
+
+
+### Later I could have this return ``None`` if the ``idata_orig`` argument is supplied.  But
+### perhaps we should have an inplace argument?
+def predictions_to_inference_data(
+    predictions,
+    posterior_trace: Optional["MultiTrace"] = None,
+    model: Optional["Model"] = None,
+    coords: Optional[CoordSpec] = None,
+    dims: Optional[DimSpec] = None,
+    idata_orig: Optional[InferenceData] = None,
+    inplace: bool = False,
+) -> InferenceData:
+    """Translate out-of-sample predictions into ``InferenceData``.
+
+    Parameters
+    ----------
+    predictions: Dict[str, np.ndarray]
+        The predictions are the return value of :func:`~pymc3.sample_posterior_predictive`,
+        a dictionary of strings (variable names) to numpy ndarrays (draws).
+    posterior_trace: MultiTrace
+        This should be a trace that has been thinned appropriately for
+        ``pymc3.sample_posterior_predictive``. Specifically, any variable whose shape is
+        a deterministic function of the shape of any predictor (explanatory, independent, etc.)
+        variables must be *removed* from this trace.
+    model: Model
+        The pymc3 model. It can be ommited if within a model context.
+    coords: Dict[str, array-like[Any]]
+        Coordinates for the variables.  Map from coordinate names to coordinate values.
+    dims: Dict[str, array-like[str]]
+        Map from variable name to ordered set of coordinate names.
+    idata_orig: InferenceData, optional
+        If supplied, then modify this inference data in place, adding ``predictions`` and
+        (if available) ``predictions_constant_data`` groups. If this is not supplied, make a
+        fresh InferenceData
+    inplace: boolean, optional
+        If idata_orig is supplied and inplace is True, merge the predictions into idata_orig,
+        rather than returning a fresh InferenceData object.
+
+    Returns
+    -------
+    InferenceData:
+        May be modified ``idata_orig``.
+    """
+    if inplace and not idata_orig:
+        raise ValueError(
+            "Do not pass True for inplace unless passing" "an existing InferenceData as idata_orig"
+        )
+    new_idata = InferenceDataConverter(
+        trace=posterior_trace,
+        predictions=predictions,
+        model=model,
+        coords=coords,
+        dims=dims,
+        log_likelihood=False,
+    ).to_inference_data()
+    if idata_orig is None:
+        return new_idata
+    elif inplace:
+        concat([idata_orig, new_idata], dim=None, inplace=True)
+        return idata_orig
+    else:
+        # if we are not returning in place, then merge the old groups into the new inference
+        # data and return that.
+        concat([new_idata, idata_orig], dim=None, copy=True, inplace=True)
+        return new_idata
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 0b016a674e..06574f0508 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -27,7 +27,6 @@
 
 import aesara
 import aesara.gradient as tg
-import arviz
 import numpy as np
 import packaging
 import xarray
@@ -39,6 +38,7 @@
 import pymc3 as pm
 
 from pymc3.aesaraf import inputvars
+from pymc3.backends.arviz import _DefaultTrace
 from pymc3.backends.base import BaseTrace, MultiTrace
 from pymc3.backends.ndarray import NDArray
 from pymc3.blocking import DictToArrayBijection
@@ -345,7 +345,7 @@ def sample(
         Whether to return the trace as an :class:`arviz:arviz.InferenceData` (True) object or a `MultiTrace` (False)
         Defaults to `False`, but we'll switch to `True` in an upcoming release.
     idata_kwargs : dict, optional
-        Keyword arguments for :func:`arviz:arviz.from_pymc3`
+        Keyword arguments for :func:`pymc3.to_inference_data`
     mp_ctx : multiprocessing.context.BaseContent
         A multiprocessing context for parallel sampling. See multiprocessing
         documentation for details.
@@ -636,12 +636,10 @@ def sample(
 
     idata = None
     if compute_convergence_checks or return_inferencedata:
-        # XXX: Arviz `log_likelihood` calculations need to be disabled until
-        # it's updated to work with v4.
-        ikwargs = dict(model=model, save_warmup=not discard_tuned_samples, log_likelihood=False)
+        ikwargs = dict(model=model, save_warmup=not discard_tuned_samples)
         if idata_kwargs:
             ikwargs.update(idata_kwargs)
-        idata = arviz.from_pymc3(trace, **ikwargs)
+        idata = pm.to_inference_data(trace, **ikwargs)
 
     if compute_convergence_checks:
         if draws - tune < 100:
@@ -1550,61 +1548,6 @@ def stop_tuning(step):
     return step
 
 
-class _DefaultTrace:
-    """
-    Utility for collecting samples into a dictionary.
-
-    Name comes from its similarity to ``defaultdict``:
-    entries are lazily created.
-
-    Parameters
-    ----------
-    samples : int
-        The number of samples that will be collected, per variable,
-        into the trace.
-
-    Attributes
-    ----------
-    trace_dict : Dict[str, np.ndarray]
-        A dictionary constituting a trace.  Should be extracted
-        after a procedure has filled the `_DefaultTrace` using the
-        `insert()` method
-    """
-
-    trace_dict: Dict[str, np.ndarray] = {}
-    _len: Optional[int] = None
-
-    def __init__(self, samples: int):
-        self._len = samples
-        self.trace_dict = {}
-
-    def insert(self, k: str, v, idx: int):
-        """
-        Insert `v` as the value of the `idx`th sample for the variable `k`.
-
-        Parameters
-        ----------
-        k: str
-            Name of the variable.
-        v: anything that can go into a numpy array (including a numpy array)
-            The value of the `idx`th sample from variable `k`
-        ids: int
-            The index of the sample we are inserting into the trace.
-        """
-        value_shape = np.shape(v)
-
-        # initialize if necessary
-        if k not in self.trace_dict:
-            array_shape = (self._len,) + value_shape
-            self.trace_dict[k] = np.empty(array_shape, dtype=np.array(v).dtype)
-
-        # do the actual insertion
-        if value_shape == ():
-            self.trace_dict[k][idx] = v
-        else:
-            self.trace_dict[k][idx, :] = v
-
-
 def sample_posterior_predictive(
     trace,
     samples: Optional[int] = None,
diff --git a/pymc3/tests/test_idata_conversion.py b/pymc3/tests/test_idata_conversion.py
new file mode 100644
index 0000000000..e098fa5db9
--- /dev/null
+++ b/pymc3/tests/test_idata_conversion.py
@@ -0,0 +1,653 @@
+# pylint: disable=no-member, invalid-name, redefined-outer-name, protected-access, too-many-public-methods
+from typing import Dict, Tuple
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from arviz import InferenceData
+from arviz.tests.helpers import check_multiple_attrs
+from numpy import ma
+
+import pymc3 as pm
+
+from pymc3.backends.arviz import predictions_to_inference_data, to_inference_data
+
+
+@pytest.fixture(scope="module")
+def eight_schools_params():
+    """Share setup for eight schools."""
+    return {
+        "J": 8,
+        "y": np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0]),
+        "sigma": np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0]),
+    }
+
+
+@pytest.fixture(scope="module")
+def draws():
+    """Share default draw count."""
+    return 500
+
+
+@pytest.fixture(scope="module")
+def chains():
+    """Share default chain count."""
+    return 2
+
+
+class TestDataPyMC3:
+    class Data:
+        def __init__(self, model, trace):
+            self.model = model
+            self.obj = trace
+
+    @pytest.fixture(scope="class")
+    def data(self, eight_schools_params, draws, chains):
+        with pm.Model() as model:
+            mu = pm.Normal("mu", mu=0, sd=5)
+            tau = pm.HalfCauchy("tau", beta=5)
+            eta = pm.Normal("eta", mu=0, sd=1, size=eight_schools_params["J"])
+            theta = pm.Deterministic("theta", mu + tau * eta)
+            pm.Normal(
+                "obs",
+                mu=theta,
+                sd=eight_schools_params["sigma"],
+                observed=eight_schools_params["y"],
+            )
+            trace = pm.sample(draws, chains=chains)
+
+        return self.Data(model, trace)
+
+    def get_inference_data(self, data, eight_schools_params):
+        with data.model:
+            prior = pm.sample_prior_predictive()
+            posterior_predictive = pm.sample_posterior_predictive(data.obj)
+
+        return (
+            to_inference_data(
+                trace=data.obj,
+                prior=prior,
+                posterior_predictive=posterior_predictive,
+                coords={"school": np.arange(eight_schools_params["J"])},
+                dims={"theta": ["school"], "eta": ["school"]},
+                model=data.model,
+            ),
+            posterior_predictive,
+        )
+
+    def get_predictions_inference_data(
+        self, data, eight_schools_params, inplace
+    ) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
+        with data.model:
+            prior = pm.sample_prior_predictive()
+            posterior_predictive = pm.sample_posterior_predictive(data.obj)
+
+            idata = to_inference_data(
+                trace=data.obj,
+                prior=prior,
+                coords={"school": np.arange(eight_schools_params["J"])},
+                dims={"theta": ["school"], "eta": ["school"]},
+            )
+            assert isinstance(idata, InferenceData)
+            extended = predictions_to_inference_data(
+                posterior_predictive, idata_orig=idata, inplace=inplace
+            )
+            assert isinstance(extended, InferenceData)
+            assert (id(idata) == id(extended)) == inplace
+        return (extended, posterior_predictive)
+
+    def make_predictions_inference_data(
+        self, data, eight_schools_params
+    ) -> Tuple[InferenceData, Dict[str, np.ndarray]]:
+        with data.model:
+            posterior_predictive = pm.sample_posterior_predictive(data.obj)
+            idata = predictions_to_inference_data(
+                posterior_predictive,
+                posterior_trace=data.obj,
+                coords={"school": np.arange(eight_schools_params["J"])},
+                dims={"theta": ["school"], "eta": ["school"]},
+            )
+            assert isinstance(idata, InferenceData)
+        return idata, posterior_predictive
+
+    def test_to_idata(self, data, eight_schools_params, chains, draws):
+        inference_data, posterior_predictive = self.get_inference_data(data, eight_schools_params)
+        test_dict = {
+            "posterior": ["mu", "tau", "eta", "theta"],
+            "sample_stats": ["diverging", "lp", "~log_likelihood"],
+            "log_likelihood": ["obs"],
+            "posterior_predictive": ["obs"],
+            "prior": ["mu", "tau", "eta", "theta"],
+            "prior_predictive": ["obs"],
+            "observed_data": ["obs"],
+        }
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+        for key, values in posterior_predictive.items():
+            ivalues = inference_data.posterior_predictive[key]
+            for chain in range(chains):
+                assert np.all(
+                    np.isclose(ivalues[chain], values[chain * draws : (chain + 1) * draws])
+                )
+
+    def test_predictions_to_idata(self, data, eight_schools_params):
+        "Test that we can add predictions to a previously-existing InferenceData."
+        test_dict = {
+            "posterior": ["mu", "tau", "eta", "theta"],
+            "sample_stats": ["diverging", "lp"],
+            "log_likelihood": ["obs"],
+            "predictions": ["obs"],
+            "prior": ["mu", "tau", "eta", "theta"],
+            "observed_data": ["obs"],
+        }
+
+        # check adding non-destructively
+        inference_data, posterior_predictive = self.get_predictions_inference_data(
+            data, eight_schools_params, False
+        )
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+        for key, values in posterior_predictive.items():
+            ivalues = inference_data.predictions[key]
+            assert ivalues.shape[0] == 1  # one chain in predictions
+            assert np.all(np.isclose(ivalues[0], values))
+
+        # check adding in place
+        inference_data, posterior_predictive = self.get_predictions_inference_data(
+            data, eight_schools_params, True
+        )
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+        for key, values in posterior_predictive.items():
+            ivalues = inference_data.predictions[key]
+            assert ivalues.shape[0] == 1  # one chain in predictions
+            assert np.all(np.isclose(ivalues[0], values))
+
+    def test_predictions_to_idata_new(self, data, eight_schools_params):
+        # check creating new
+        inference_data, posterior_predictive = self.make_predictions_inference_data(
+            data, eight_schools_params
+        )
+        test_dict = {
+            "posterior": ["mu", "tau", "eta", "theta"],
+            "predictions": ["obs"],
+            "~observed_data": "",
+        }
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+        for key, values in posterior_predictive.items():
+            ivalues = inference_data.predictions[key]
+            # could the following better be done by simply flattening both the ivalues
+            # and the values?
+            if len(ivalues.shape) == 3:
+                ivalues_arr = np.reshape(
+                    ivalues.values, (ivalues.shape[0] * ivalues.shape[1], ivalues.shape[2])
+                )
+            elif len(ivalues.shape) == 2:
+                ivalues_arr = np.reshape(ivalues.values, (ivalues.shape[0] * ivalues.shape[1]))
+            else:
+                raise ValueError(f"Unexpected values shape for variable {key}")
+            assert (ivalues.shape[0] == 2) and (ivalues.shape[1] == 500)
+            assert values.shape[0] == 1000
+            assert np.all(np.isclose(ivalues_arr, values))
+
+    def test_posterior_predictive_keep_size(self, data, chains, draws, eight_schools_params):
+        with data.model:
+            posterior_predictive = pm.sample_posterior_predictive(data.obj, keep_size=True)
+            inference_data = to_inference_data(
+                trace=data.obj,
+                posterior_predictive=posterior_predictive,
+                coords={"school": np.arange(eight_schools_params["J"])},
+                dims={"theta": ["school"], "eta": ["school"]},
+            )
+
+        shape = inference_data.posterior_predictive.obs.shape
+        assert np.all(
+            [obs_s == s for obs_s, s in zip(shape, (chains, draws, eight_schools_params["J"]))]
+        )
+
+    def test_posterior_predictive_warning(self, data, eight_schools_params, caplog):
+        with data.model:
+            posterior_predictive = pm.sample_posterior_predictive(data.obj, 370)
+            inference_data = to_inference_data(
+                trace=data.obj,
+                posterior_predictive=posterior_predictive,
+                coords={"school": np.arange(eight_schools_params["J"])},
+                dims={"theta": ["school"], "eta": ["school"]},
+            )
+
+        records = caplog.records
+        shape = inference_data.posterior_predictive.obs.shape
+        assert np.all([obs_s == s for obs_s, s in zip(shape, (1, 370, eight_schools_params["J"]))])
+        assert len(records) == 1
+        assert records[0].levelname == "WARNING"
+
+    @pytest.mark.xfail(reason="Dims option is not supported yet")
+    @pytest.mark.parametrize("use_context", [True, False])
+    def test_autodetect_coords_from_model(self, use_context):
+        df_data = pd.DataFrame(columns=["date"]).set_index("date")
+        dates = pd.date_range(start="2020-05-01", end="2020-05-20")
+        for city, mu in {"Berlin": 15, "San Marino": 18, "Paris": 16}.items():
+            df_data[city] = np.random.normal(loc=mu, size=len(dates))
+        df_data.index = dates
+        df_data.index.name = "date"
+
+        coords = {"date": df_data.index, "city": df_data.columns}
+        with pm.Model(coords=coords) as model:
+            europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sd=3.0)
+            city_offset = pm.Normal("city_offset", mu=0.0, sd=3.0, dims="city")
+            city_temperature = pm.Deterministic(
+                "city_temperature", europe_mean + city_offset, dims="city"
+            )
+
+            data_dims = ("date", "city")
+            data = pm.Data("data", df_data, dims=data_dims)
+            _ = pm.Normal("likelihood", mu=city_temperature, sd=0.5, observed=data, dims=data_dims)
+
+            trace = pm.sample(
+                return_inferencedata=False,
+                compute_convergence_checks=False,
+                cores=1,
+                chains=1,
+                tune=20,
+                draws=30,
+                step=pm.Metropolis(),
+            )
+            if use_context:
+                idata = to_inference_data(trace=trace)
+        if not use_context:
+            idata = to_inference_data(trace=trace, model=model)
+
+        assert "city" in list(idata.posterior.dims)
+        assert "city" in list(idata.observed_data.dims)
+        assert "date" in list(idata.observed_data.dims)
+
+        np.testing.assert_array_equal(idata.posterior.coords["city"], coords["city"])
+        np.testing.assert_array_equal(idata.observed_data.coords["date"], coords["date"])
+        np.testing.assert_array_equal(idata.observed_data.coords["city"], coords["city"])
+
+    @pytest.mark.xfail(reason="Dims option is not supported yet")
+    def test_ovewrite_model_coords_dims(self):
+        """Check coords and dims from model object can be partially overwrited."""
+        dim1 = ["a", "b"]
+        new_dim1 = ["c", "d"]
+        coords = {"dim1": dim1, "dim2": ["c1", "c2"]}
+        x_data = np.arange(4).reshape((2, 2))
+        y = x_data + np.random.normal(size=(2, 2))
+        with pm.Model(coords=coords):
+            x = pm.Data("x", x_data, dims=("dim1", "dim2"))
+            beta = pm.Normal("beta", 0, 1, dims="dim1")
+            _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2"))
+            trace = pm.sample(100, tune=100)
+            idata1 = to_inference_data(trace)
+            idata2 = to_inference_data(trace, coords={"dim1": new_dim1}, dims={"beta": ["dim2"]})
+
+        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
+        fails1 = check_multiple_attrs(test_dict, idata1)
+        assert not fails1
+        fails2 = check_multiple_attrs(test_dict, idata2)
+        assert not fails2
+        assert "dim1" in list(idata1.posterior.beta.dims)
+        assert "dim2" in list(idata2.posterior.beta.dims)
+        assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1))
+        assert np.all(idata1.constant_data.x.dim2.values == np.array(["c1", "c2"]))
+        assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1))
+        assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"]))
+
+    @pytest.mark.xfail(reason="Missing data not refactored for v4")
+    def test_missing_data_model(self):
+        # source pymc3/pymc3/tests/test_missing.py
+        data = ma.masked_values([1, 2, -1, 4, -1], value=-1)
+        model = pm.Model()
+        with model:
+            x = pm.Normal("x", 1, 1)
+            pm.Normal("y", x, 1, observed=data)
+            inference_data = pm.sample(100, chains=2, return_inferencedata=True)
+
+        # make sure that data is really missing
+        (y_missing,) = model.missing_values
+        # TODO: Test values aren't enabled anymore
+        assert y_missing.tag.test_value.shape == (2,)
+
+        test_dict = {"posterior": ["x"], "observed_data": ["y"], "log_likelihood": ["y"]}
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+
+    @pytest.mark.xfail(reason="LKJCholeskyCov not refactored for v4")
+    @pytest.mark.xfail(reason="Missing data not refactored for v4")
+    def test_mv_missing_data_model(self):
+        data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1)
+
+        model = pm.Model()
+        with model:
+            mu = pm.Normal("mu", 0, 1, size=2)
+            sd_dist = pm.HalfNormal.dist(1.0)
+            chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True)
+            pm.MvNormal("y", mu=mu, chol=chol, observed=data)
+            inference_data = pm.sample(100, chains=2, return_inferencedata=True)
+
+        # make sure that data is really missing
+        (y_missing,) = model.missing_values
+        # TODO: Test values aren't enabled anymore
+        assert y_missing.tag.test_value.shape == (4,)
+
+        test_dict = {
+            "posterior": ["mu", "chol_cov"],
+            "observed_data": ["y"],
+            "log_likelihood": ["y"],
+        }
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+
+    @pytest.mark.parametrize("log_likelihood", [True, False, ["y1"]])
+    def test_multiple_observed_rv(self, log_likelihood):
+        y1_data = np.random.randn(10)
+        y2_data = np.random.randn(100)
+        with pm.Model():
+            x = pm.Normal("x", 1, 1)
+            pm.Normal("y1", x, 1, observed=y1_data)
+            pm.Normal("y2", x, 1, observed=y2_data)
+            inference_data = pm.sample(
+                100,
+                chains=2,
+                return_inferencedata=True,
+                idata_kwargs={"log_likelihood": log_likelihood},
+            )
+        test_dict = {
+            "posterior": ["x"],
+            "observed_data": ["y1", "y2"],
+            "log_likelihood": ["y1", "y2"],
+            "sample_stats": ["diverging", "lp", "~log_likelihood"],
+        }
+        if not log_likelihood:
+            test_dict.pop("log_likelihood")
+            test_dict["~log_likelihood"] = []
+        if isinstance(log_likelihood, list):
+            test_dict["log_likelihood"] = ["y1", "~y2"]
+
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+
+    @pytest.mark.xfail(reason="DensityDist not refactored for v4")
+    def test_multiple_observed_rv_without_observations(self):
+        with pm.Model():
+            mu = pm.Normal("mu")
+            x = pm.DensityDist(  # pylint: disable=unused-variable
+                "x", logpt(pm.Normal.dist(mu, 1.0)), observed={"value": 0.1}
+            )
+            inference_data = pm.sample(100, chains=2, return_inferencedata=True)
+        test_dict = {
+            "posterior": ["mu"],
+            "sample_stats": ["lp"],
+            "log_likelihood": ["x"],
+            "observed_data": ["value", "~x"],
+        }
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+        assert inference_data.observed_data.value.dtype.kind == "f"
+
+    @pytest.mark.xfail(reason="DensityDist not refactored for v4")
+    @pytest.mark.parametrize("multiobs", (True, False))
+    def test_multiobservedrv_to_observed_data(self, multiobs):
+        # fake regression data, with weights (W)
+        np.random.seed(2019)
+        N = 100
+        X = np.random.uniform(size=N)
+        W = 1 + np.random.poisson(size=N)
+        a, b = 5, 17
+        Y = a + np.random.normal(b * X)
+
+        with pm.Model():
+            a = pm.Normal("a", 0, 10)
+            b = pm.Normal("b", 0, 10)
+            mu = a + b * X
+            sigma = pm.HalfNormal("sigma", 1)
+
+            def weighted_normal(y, w):
+                return w * logpt(pm.Normal.dist(mu=mu, sd=sigma), y)
+
+            y_logp = pm.DensityDist(  # pylint: disable=unused-variable
+                "y_logp", weighted_normal, observed={"y": Y, "w": W}
+            )
+            idata = pm.sample(
+                20, tune=20, return_inferencedata=True, idata_kwargs={"density_dist_obs": multiobs}
+            )
+        multiobs_str = "" if multiobs else "~"
+        test_dict = {
+            "posterior": ["a", "b", "sigma"],
+            "sample_stats": ["lp"],
+            "log_likelihood": ["y_logp"],
+            f"{multiobs_str}observed_data": ["y", "w"],
+        }
+        fails = check_multiple_attrs(test_dict, idata)
+        assert not fails
+        if multiobs:
+            assert idata.observed_data.y.dtype.kind == "f"
+
+    def test_single_observation(self):
+        with pm.Model():
+            p = pm.Uniform("p", 0, 1)
+            pm.Binomial("w", p=p, n=2, observed=1)
+            inference_data = pm.sample(500, chains=2, return_inferencedata=True)
+
+        assert inference_data
+
+    @pytest.mark.xfail(reason="Potential not refactored for v4")
+    def test_potential(self):
+        with pm.Model():
+            x = pm.Normal("x", 0.0, 1.0)
+            pm.Potential("z", logpt(pm.Normal.dist(x, 1.0), np.random.randn(10)))
+            inference_data = pm.sample(100, chains=2, return_inferencedata=True)
+
+        assert inference_data
+
+    @pytest.mark.parametrize("use_context", [True, False])
+    def test_constant_data(self, use_context):
+        """Test constant_data group behaviour."""
+        with pm.Model() as model:
+            x = pm.Data("x", [1.0, 2.0, 3.0])
+            y = pm.Data("y", [1.0, 2.0, 3.0])
+            beta = pm.Normal("beta", 0, 1)
+            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
+            trace = pm.sample(100, tune=100)
+            if use_context:
+                inference_data = to_inference_data(trace=trace)
+
+        if not use_context:
+            inference_data = to_inference_data(trace=trace, model=model)
+        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+
+    def test_predictions_constant_data(self):
+        with pm.Model():
+            x = pm.Data("x", [1.0, 2.0, 3.0])
+            y = pm.Data("y", [1.0, 2.0, 3.0])
+            beta = pm.Normal("beta", 0, 1)
+            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
+            trace = pm.sample(100, tune=100)
+            inference_data = to_inference_data(trace)
+
+        test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]}
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+
+        with pm.Model():
+            x = pm.Data("x", [1.0, 2.0])
+            y = pm.Data("y", [1.0, 2.0])
+            beta = pm.Normal("beta", 0, 1)
+            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
+            predictive_trace = pm.sample_posterior_predictive(inference_data)
+            assert set(predictive_trace.keys()) == {"obs"}
+            # this should be four chains of 100 samples
+            # assert predictive_trace["obs"].shape == (400, 2)
+            # but the shape seems to vary between pymc3 versions
+            inference_data = predictions_to_inference_data(predictive_trace, posterior_trace=trace)
+        test_dict = {"posterior": ["beta"], "~observed_data": ""}
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails, "Posterior data not copied over as expected."
+        test_dict = {"predictions": ["obs"]}
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails, "Predictions not instantiated as expected."
+        test_dict = {"predictions_constant_data": ["x"]}
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails, "Predictions constant data not instantiated as expected."
+
+    def test_no_trace(self):
+        with pm.Model() as model:
+            x = pm.Data("x", [1.0, 2.0, 3.0])
+            y = pm.Data("y", [1.0, 2.0, 3.0])
+            beta = pm.Normal("beta", 0, 1)
+            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
+            trace = pm.sample(100, tune=100)
+            prior = pm.sample_prior_predictive()
+            posterior_predictive = pm.sample_posterior_predictive(trace)
+
+        # Only prior
+        inference_data = to_inference_data(prior=prior, model=model)
+        test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]}
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+        # Only posterior_predictive
+        inference_data = to_inference_data(posterior_predictive=posterior_predictive, model=model)
+        test_dict = {"posterior_predictive": ["obs"]}
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+        # Prior and posterior_predictive but no trace
+        inference_data = to_inference_data(
+            prior=prior, posterior_predictive=posterior_predictive, model=model
+        )
+        test_dict = {
+            "prior": ["beta"],
+            "prior_predictive": ["obs"],
+            "posterior_predictive": ["obs"],
+        }
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+
+    @pytest.mark.parametrize("use_context", [True, False])
+    def test_priors_separation(self, use_context):
+        """Test model is enough to get prior, prior predictive and observed_data."""
+        with pm.Model() as model:
+            x = pm.Data("x", [1.0, 2.0, 3.0])
+            y = pm.Data("y", [1.0, 2.0, 3.0])
+            beta = pm.Normal("beta", 0, 1)
+            obs = pm.Normal("obs", x * beta, 1, observed=y)  # pylint: disable=unused-variable
+            prior = pm.sample_prior_predictive()
+
+        test_dict = {
+            "prior": ["beta", "~obs"],
+            "observed_data": ["obs"],
+            "prior_predictive": ["obs"],
+        }
+        if use_context:
+            with model:
+                inference_data = to_inference_data(prior=prior)
+        else:
+            inference_data = to_inference_data(prior=prior, model=model)
+        fails = check_multiple_attrs(test_dict, inference_data)
+        assert not fails
+
+    @pytest.mark.xfail(reason="Dims option is not supported yet")
+    def test_multivariate_observations(self):
+        coords = {"direction": ["x", "y", "z"], "experiment": np.arange(20)}
+        data = np.random.multinomial(20, [0.2, 0.3, 0.5], size=20)
+        with pm.Model(coords=coords):
+            p = pm.Beta("p", 1, 1, size=(3,))
+            pm.Multinomial("y", 20, p, dims=("experiment", "direction"), observed=data)
+            idata = pm.sample(draws=50, tune=100, return_inferencedata=True)
+        test_dict = {
+            "posterior": ["p"],
+            "sample_stats": ["lp"],
+            "log_likelihood": ["y"],
+            "observed_data": ["y"],
+        }
+        fails = check_multiple_attrs(test_dict, idata)
+        assert not fails
+        assert "direction" not in idata.log_likelihood.dims
+        assert "direction" in idata.observed_data.dims
+
+
+class TestPyMC3WarmupHandling:
+    @pytest.mark.skipif(
+        not hasattr(pm.backends.base.SamplerReport, "n_draws"),
+        reason="requires pymc3 3.9 or higher",
+    )
+    @pytest.mark.parametrize("save_warmup", [False, True])
+    @pytest.mark.parametrize("chains", [1, 2])
+    @pytest.mark.parametrize("tune,draws", [(0, 50), (10, 40), (30, 0)])
+    def test_save_warmup(self, save_warmup, chains, tune, draws):
+        with pm.Model():
+            pm.Uniform("u1")
+            pm.Normal("n1")
+            idata = pm.sample(
+                tune=tune,
+                draws=draws,
+                chains=chains,
+                cores=1,
+                step=pm.Metropolis(),
+                discard_tuned_samples=False,
+                return_inferencedata=True,
+                idata_kwargs={"save_warmup": save_warmup},
+            )
+        warmup_prefix = "" if save_warmup and (tune > 0) else "~"
+        post_prefix = "" if draws > 0 else "~"
+        test_dict = {
+            f"{post_prefix}posterior": ["u1", "n1"],
+            f"{post_prefix}sample_stats": ["~tune", "accept"],
+            f"{warmup_prefix}warmup_posterior": ["u1", "n1"],
+            f"{warmup_prefix}warmup_sample_stats": ["~tune"],
+            "~warmup_log_likelihood": [],
+            "~log_likelihood": [],
+        }
+        fails = check_multiple_attrs(test_dict, idata)
+        assert not fails
+        if hasattr(idata, "posterior"):
+            assert idata.posterior.dims["chain"] == chains
+            assert idata.posterior.dims["draw"] == draws
+        if hasattr(idata, "warmup_posterior"):
+            assert idata.warmup_posterior.dims["chain"] == chains
+            assert idata.warmup_posterior.dims["draw"] == tune
+
+    def test_save_warmup_issue_1208_after_3_9(self):
+        with pm.Model():
+            pm.Uniform("u1")
+            pm.Normal("n1")
+            trace = pm.sample(
+                tune=100,
+                draws=200,
+                chains=2,
+                cores=1,
+                step=pm.Metropolis(),
+                discard_tuned_samples=False,
+            )
+            assert isinstance(trace, pm.backends.base.MultiTrace)
+            assert len(trace) == 300
+
+            # from original trace, warmup draws should be separated out
+            idata = to_inference_data(trace, save_warmup=True)
+            test_dict = {
+                "posterior": ["u1", "n1"],
+                "sample_stats": ["~tune", "accept"],
+                "warmup_posterior": ["u1", "n1"],
+                "warmup_sample_stats": ["~tune", "accept"],
+            }
+            fails = check_multiple_attrs(test_dict, idata)
+            assert not fails
+            assert idata.posterior.dims["chain"] == 2
+            assert idata.posterior.dims["draw"] == 200
+
+            # manually sliced trace triggers the same warning as <=3.8
+            with pytest.warns(UserWarning, match="Warmup samples"):
+                idata = to_inference_data(trace[-30:], save_warmup=True)
+            test_dict = {
+                "posterior": ["u1", "n1"],
+                "sample_stats": ["~tune", "accept"],
+                "~warmup_posterior": [],
+                "~warmup_sample_stats": [],
+            }
+            fails = check_multiple_attrs(test_dict, idata)
+            assert not fails
+            assert idata.posterior.dims["chain"] == 2
+            assert idata.posterior.dims["draw"] == 30
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 150675b43c..25363d2e99 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -471,7 +471,7 @@ def test_normal_scalar_idata(self):
 
         with model:
             # test keep_size parameter and idata input
-            idata = az.from_pymc3(trace)
+            idata = pm.to_inference_data(trace)
             ppc = pm.sample_posterior_predictive(idata, keep_size=True)
             assert ppc["a"].shape == (nchains, ndraws)
 
@@ -514,7 +514,7 @@ def test_normal_vector_idata(self, caplog):
 
         with model:
             # test keep_size parameter with inference data as input...
-            idata = az.from_pymc3(trace)
+            idata = pm.to_inference_data(trace)
             ppc = pm.sample_posterior_predictive(idata, keep_size=True)
             assert ppc["a"].shape == (trace.nchains, len(trace), 2)
 
@@ -720,14 +720,14 @@ def test_sample_posterior_predictive_w(self):
             y = pm.Normal("y", mu=mu, sigma=1, observed=data0)
             with pytest.warns(UserWarning, match=warning_msg):
                 trace_0 = pm.sample(10, tune=0, chains=2, return_inferencedata=False)
-            idata_0 = az.from_pymc3(trace_0, log_likelihood=False)
+            idata_0 = pm.to_inference_data(trace_0, log_likelihood=False)
 
         with pm.Model() as model_1:
             mu = pm.Normal("mu", mu=0, sigma=1, size=len(data0))
             y = pm.Normal("y", mu=mu, sigma=1, observed=data0)
             with pytest.warns(UserWarning, match=warning_msg):
                 trace_1 = pm.sample(10, tune=0, chains=2, return_inferencedata=False)
-            idata_1 = az.from_pymc3(trace_1, log_likelihood=False)
+            idata_1 = pm.to_inference_data(trace_1, log_likelihood=False)
 
         with pm.Model() as model_2:
             # Model with no observed RVs.
@@ -1037,13 +1037,13 @@ def test_sample_from_xarray_prior(self, point_list_arg_bug_fixture):
 
         with pmodel:
             prior = pm.sample_prior_predictive(samples=20)
-        idat = az.from_pymc3(trace, prior=prior)
+        idat = pm.to_inference_data(trace, prior=prior)
         with pmodel:
             pp = pm.sample_posterior_predictive(idat.prior, var_names=["d"])
 
     @pytest.mark.xfail(reason="Arviz not refactored for v4")
     def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture):
         pmodel, trace = point_list_arg_bug_fixture
-        idat = az.from_pymc3(trace)
+        idat = pm.to_inference_data(trace)
         with pmodel:
             pp = pm.sample_posterior_predictive(idat.posterior, var_names=["d"])
diff --git a/requirements.txt b/requirements.txt
index 9ec84e7538..c9ecc6e1a6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 aesara>=2.0.1
-arviz>=0.11.1
+arviz>=0.11.2
 dill
 fastprogress>=0.2.0
 numpy>=1.15.0

From e01a473f41c3d293f4644d0017df9e3df593a9af Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Thu, 25 Mar 2021 18:51:00 -0500
Subject: [PATCH 4/7] Disable dims, default_dims, and index_origin options
 until arviz > v0.11.2

---
 pymc3/backends/arviz.py | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/pymc3/backends/arviz.py b/pymc3/backends/arviz.py
index 1073a0b930..3be8a9986b 100644
--- a/pymc3/backends/arviz.py
+++ b/pymc3/backends/arviz.py
@@ -302,7 +302,7 @@ def posterior_to_xarray(self):
                 coords=self.coords,
                 dims=self.dims,
                 attrs=self.attrs,
-                index_origin=self.index_origin,
+                # index_origin=self.index_origin,
             ),
             dict_to_dataset(
                 data_warmup,
@@ -310,7 +310,7 @@ def posterior_to_xarray(self):
                 coords=self.coords,
                 dims=self.dims,
                 attrs=self.attrs,
-                index_origin=self.index_origin,
+                # index_origin=self.index_origin,
             ),
         )
 
@@ -344,7 +344,7 @@ def sample_stats_to_xarray(self):
                 dims=None,
                 coords=self.coords,
                 attrs=self.attrs,
-                index_origin=self.index_origin,
+                # index_origin=self.index_origin,
             ),
             dict_to_dataset(
                 data_warmup,
@@ -352,7 +352,7 @@ def sample_stats_to_xarray(self):
                 dims=None,
                 coords=self.coords,
                 attrs=self.attrs,
-                index_origin=self.index_origin,
+                # index_origin=self.index_origin,
             ),
         )
 
@@ -385,7 +385,7 @@ def log_likelihood_to_xarray(self):
                 dims=self.dims,
                 coords=self.coords,
                 skip_event_dims=True,
-                index_origin=self.index_origin,
+                # index_origin=self.index_origin,
             ),
             dict_to_dataset(
                 data_warmup,
@@ -393,7 +393,7 @@ def log_likelihood_to_xarray(self):
                 dims=self.dims,
                 coords=self.coords,
                 skip_event_dims=True,
-                index_origin=self.index_origin,
+                # index_origin=self.index_origin,
             ),
         )
 
@@ -415,7 +415,11 @@ def translate_posterior_predictive_dict_to_xarray(self, dct) -> xr.Dataset:
                     k,
                 )
         return dict_to_dataset(
-            data, library=pymc3, coords=self.coords, dims=self.dims, index_origin=self.index_origin
+            data,
+            library=pymc3,
+            coords=self.coords,
+            # dims=self.dims,
+            # index_origin=self.index_origin
         )
 
     @requires(["posterior_predictive"])
@@ -450,8 +454,8 @@ def priors_to_xarray(self):
                     {k: np.expand_dims(self.prior[k], 0) for k in var_names},
                     library=pymc3,
                     coords=self.coords,
-                    dims=self.dims,
-                    index_origin=self.index_origin,
+                    # dims=self.dims,
+                    # index_origin=self.index_origin,
                 )
             )
         return priors_dict
@@ -466,9 +470,9 @@ def observed_data_to_xarray(self):
             {**self.observations, **self.multi_observations},
             library=pymc3,
             coords=self.coords,
-            dims=self.dims,
-            default_dims=[],
-            index_origin=self.index_origin,
+            # dims=self.dims,
+            # default_dims=[],
+            # index_origin=self.index_origin,
         )
 
     @requires(["trace", "predictions"])
@@ -513,9 +517,9 @@ def is_data(name, var) -> bool:
             constant_data,
             library=pymc3,
             coords=self.coords,
-            dims=self.dims,
-            default_dims=[],
-            index_origin=self.index_origin,
+            # dims=self.dims,
+            # default_dims=[],
+            # index_origin=self.index_origin,
         )
 
     def to_inference_data(self):

From 1a604c354b3879bed20226ab76cf67a17949aa75 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Thu, 25 Mar 2021 20:09:16 -0500
Subject: [PATCH 5/7] Re-enable Arviz tests in pymc3.tests.test_sampling

---
 pymc3/tests/test_sampling.py | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 25363d2e99..41cb200450 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -20,12 +20,13 @@
 
 import aesara
 import aesara.tensor as aet
-import arviz as az
 import numpy as np
 import numpy.testing as npt
 import pytest
 
 from aesara import shared
+from arviz import InferenceData
+from arviz import from_dict as az_from_dict
 from scipy import stats
 
 import pymc3 as pm
@@ -200,7 +201,7 @@ def test_return_inferencedata(self, monkeypatch):
 
             # inferencedata with tuning
             result = pm.sample(**kwargs, return_inferencedata=True, discard_tuned_samples=False)
-            assert isinstance(result, az.InferenceData)
+            assert isinstance(result, InferenceData)
             assert result.posterior.sizes["draw"] == 100
             assert result.posterior.sizes["chain"] == 2
             assert len(result._groups_warmup) > 0
@@ -215,7 +216,7 @@ def test_return_inferencedata(self, monkeypatch):
                 random_seed=-1
             )
             assert "prior" in result
-            assert isinstance(result, az.InferenceData)
+            assert isinstance(result, InferenceData)
             assert result.posterior.sizes["draw"] == 100
             assert result.posterior.sizes["chain"] == 2
             assert len(result._groups_warmup) == 0
@@ -458,7 +459,6 @@ def test_normal_scalar(self):
             ppc = pm.sample_posterior_predictive(trace, size=5, var_names=["a"])
             assert ppc["a"].shape == (nchains * ndraws, 5)
 
-    @pytest.mark.xfail(reason="Arviz not refactored for v4")
     def test_normal_scalar_idata(self):
         nchains = 2
         ndraws = 500
@@ -466,12 +466,19 @@ def test_normal_scalar_idata(self):
             mu = pm.Normal("mu", 0.0, 1.0)
             a = pm.Normal("a", mu=mu, sigma=1, observed=0.0)
             trace = pm.sample(
-                draws=ndraws, chains=nchains, return_inferencedata=True, discard_tuned_samples=False
+                draws=ndraws,
+                chains=nchains,
+                return_inferencedata=False,
+                discard_tuned_samples=False,
             )
 
+        assert not isinstance(trace, InferenceData)
+
         with model:
             # test keep_size parameter and idata input
             idata = pm.to_inference_data(trace)
+            assert isinstance(idata, InferenceData)
+
             ppc = pm.sample_posterior_predictive(idata, keep_size=True)
             assert ppc["a"].shape == (nchains, ndraws)
 
@@ -505,16 +512,19 @@ def test_normal_vector(self, caplog):
             assert "a" in ppc
             assert ppc["a"].shape == (10, 4, 2)
 
-    @pytest.mark.xfail(reason="Arviz not refactored for v4")
     def test_normal_vector_idata(self, caplog):
         with pm.Model() as model:
             mu = pm.Normal("mu", 0.0, 1.0)
             a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2]))
             trace = pm.sample(return_inferencedata=False)
 
+        assert not isinstance(trace, InferenceData)
+
         with model:
             # test keep_size parameter with inference data as input...
             idata = pm.to_inference_data(trace)
+            assert isinstance(idata, InferenceData)
+
             ppc = pm.sample_posterior_predictive(idata, keep_size=True)
             assert ppc["a"].shape == (trace.nchains, len(trace), 2)
 
@@ -703,7 +713,7 @@ def test_potentials_warning(self):
             p = pm.Potential("p", a + 1)
             obs = pm.Normal("obs", a, 1, observed=5)
 
-        trace = az.from_dict({"a": np.random.rand(10)})
+        trace = az_from_dict({"a": np.random.rand(10)})
         with m:
             with pytest.warns(UserWarning, match=warning_msg):
                 pm.sample_posterior_predictive(trace, samples=5)
@@ -768,7 +778,7 @@ def test_potentials_warning(self):
             p = pm.Potential("p", a + 1)
             obs = pm.Normal("obs", a, 1, observed=5)
 
-        trace = az.from_dict({"a": np.random.rand(10)})
+        trace = az_from_dict({"a": np.random.rand(10)})
         with pytest.warns(UserWarning, match=warning_msg):
             pm.sample_posterior_predictive_w(samples=5, traces=[trace, trace], models=[m, m])
 
@@ -1031,17 +1041,17 @@ def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture):
         with pmodel:
             pp = pm.sample_posterior_predictive([trace[15]], var_names=["d"])
 
-    @pytest.mark.xfail(reason="Arviz not refactored for v4")
     def test_sample_from_xarray_prior(self, point_list_arg_bug_fixture):
         pmodel, trace = point_list_arg_bug_fixture
 
         with pmodel:
             prior = pm.sample_prior_predictive(samples=20)
+
         idat = pm.to_inference_data(trace, prior=prior)
+
         with pmodel:
             pp = pm.sample_posterior_predictive(idat.prior, var_names=["d"])
 
-    @pytest.mark.xfail(reason="Arviz not refactored for v4")
     def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture):
         pmodel, trace = point_list_arg_bug_fixture
         idat = pm.to_inference_data(trace)

From d5726e793871d14648c78d66a432d0f258cfa924 Mon Sep 17 00:00:00 2001
From: "Oriol (ZBook)" <oriol.abril.pla@gmail.com>
Date: Fri, 26 Mar 2021 06:13:11 +0200
Subject: [PATCH 6/7] add workaround for data groups until next arviz release

---
 pymc3/backends/arviz.py              | 82 +++++++++++++++++++---------
 pymc3/tests/test_idata_conversion.py |  4 --
 2 files changed, 55 insertions(+), 31 deletions(-)

diff --git a/pymc3/backends/arviz.py b/pymc3/backends/arviz.py
index 3be8a9986b..cc2b1937c5 100644
--- a/pymc3/backends/arviz.py
+++ b/pymc3/backends/arviz.py
@@ -20,7 +20,9 @@
 from aesara.graph.basic import Constant
 from aesara.tensor.sharedvar import SharedVariable
 from arviz import InferenceData, concat, rcParams
-from arviz.data.base import CoordSpec, DimSpec, dict_to_dataset, requires
+from arviz.data.base import CoordSpec, DimSpec
+from arviz.data.base import dict_to_dataset as _dict_to_dataset
+from arviz.data.base import generate_dims_coords, make_attrs, requires
 
 import pymc3
 
@@ -98,6 +100,37 @@ def insert(self, k: str, v, idx: int):
             self.trace_dict[k][idx, :] = v
 
 
+def dict_to_dataset(
+    data,
+    library=None,
+    coords=None,
+    dims=None,
+    attrs=None,
+    default_dims=None,
+    skip_event_dims=None,
+    index_origin=None,
+):
+    """Temporal workaround for dict_to_dataset.
+
+    Once ArviZ>0.11.2 release is available, only two changes are needed for everything to work.
+    1) this should be deleted, 2) dict_to_dataset should be imported as is from arviz, no underscore,
+    also remove unnecessary imports
+    """
+    if default_dims is None:
+        return _dict_to_dataset(
+            data, library=library, coords=coords, dims=dims, skip_event_dims=skip_event_dims
+        )
+    else:
+        out_data = {}
+        for name, vals in data.items():
+            vals = np.atleast_1d(vals)
+            val_dims = dims.get(name)
+            val_dims, coords = generate_dims_coords(vals.shape, name, dims=val_dims, coords=coords)
+            coords = {key: xr.IndexVariable((key,), data=coords[key]) for key in val_dims}
+            out_data[name] = xr.DataArray(vals, dims=val_dims, coords=coords)
+        return xr.Dataset(data_vars=out_data, attrs=make_attrs(library=library))
+
+
 class InferenceDataConverter:  # pylint: disable=too-many-instance-attributes
     """Encapsulate InferenceData specific logic."""
 
@@ -196,14 +229,13 @@ def arbitrary_element(dct: Dict[Any, np.ndarray]) -> np.ndarray:
             self.dims = {**model_dims, **self.dims}
 
         self.density_dist_obs = density_dist_obs
-        self.observations, self.multi_observations = self.find_observations()
+        self.observations = self.find_observations()
 
-    def find_observations(self) -> Tuple[Optional[Dict[str, Var]], Optional[Dict[str, Var]]]:
+    def find_observations(self) -> Optional[Dict[str, Var]]:
         """If there are observations available, return them as a dictionary."""
         if self.model is None:
-            return (None, None)
+            return None
         observations = {}
-        multi_observations = {}
         for obs in self.model.observed_RVs:
             aux_obs = getattr(obs.tag, "observations", None)
             if aux_obs is not None:
@@ -215,7 +247,7 @@ def find_observations(self) -> Tuple[Optional[Dict[str, Var]], Optional[Dict[str
             else:
                 warnings.warn(f"No data for observation {obs}")
 
-        return observations, multi_observations
+        return observations
 
     def split_trace(self) -> Tuple[Union[None, "MultiTrace"], Union[None, "MultiTrace"]]:
         """Split MultiTrace object into posterior and warmup.
@@ -302,7 +334,7 @@ def posterior_to_xarray(self):
                 coords=self.coords,
                 dims=self.dims,
                 attrs=self.attrs,
-                # index_origin=self.index_origin,
+                index_origin=self.index_origin,
             ),
             dict_to_dataset(
                 data_warmup,
@@ -310,7 +342,7 @@ def posterior_to_xarray(self):
                 coords=self.coords,
                 dims=self.dims,
                 attrs=self.attrs,
-                # index_origin=self.index_origin,
+                index_origin=self.index_origin,
             ),
         )
 
@@ -344,7 +376,7 @@ def sample_stats_to_xarray(self):
                 dims=None,
                 coords=self.coords,
                 attrs=self.attrs,
-                # index_origin=self.index_origin,
+                index_origin=self.index_origin,
             ),
             dict_to_dataset(
                 data_warmup,
@@ -352,7 +384,7 @@ def sample_stats_to_xarray(self):
                 dims=None,
                 coords=self.coords,
                 attrs=self.attrs,
-                # index_origin=self.index_origin,
+                index_origin=self.index_origin,
             ),
         )
 
@@ -385,7 +417,7 @@ def log_likelihood_to_xarray(self):
                 dims=self.dims,
                 coords=self.coords,
                 skip_event_dims=True,
-                # index_origin=self.index_origin,
+                index_origin=self.index_origin,
             ),
             dict_to_dataset(
                 data_warmup,
@@ -393,7 +425,7 @@ def log_likelihood_to_xarray(self):
                 dims=self.dims,
                 coords=self.coords,
                 skip_event_dims=True,
-                # index_origin=self.index_origin,
+                index_origin=self.index_origin,
             ),
         )
 
@@ -415,11 +447,7 @@ def translate_posterior_predictive_dict_to_xarray(self, dct) -> xr.Dataset:
                     k,
                 )
         return dict_to_dataset(
-            data,
-            library=pymc3,
-            coords=self.coords,
-            # dims=self.dims,
-            # index_origin=self.index_origin
+            data, library=pymc3, coords=self.coords, dims=self.dims, index_origin=self.index_origin
         )
 
     @requires(["posterior_predictive"])
@@ -454,25 +482,25 @@ def priors_to_xarray(self):
                     {k: np.expand_dims(self.prior[k], 0) for k in var_names},
                     library=pymc3,
                     coords=self.coords,
-                    # dims=self.dims,
-                    # index_origin=self.index_origin,
+                    dims=self.dims,
+                    index_origin=self.index_origin,
                 )
             )
         return priors_dict
 
-    @requires(["observations", "multi_observations"])
+    @requires("observations")
     @requires("model")
     def observed_data_to_xarray(self):
         """Convert observed data to xarray."""
         if self.predictions:
             return None
         return dict_to_dataset(
-            {**self.observations, **self.multi_observations},
+            self.observations,
             library=pymc3,
             coords=self.coords,
-            # dims=self.dims,
-            # default_dims=[],
-            # index_origin=self.index_origin,
+            dims=self.dims,
+            default_dims=[],
+            index_origin=self.index_origin,
         )
 
     @requires(["trace", "predictions"])
@@ -517,9 +545,9 @@ def is_data(name, var) -> bool:
             constant_data,
             library=pymc3,
             coords=self.coords,
-            # dims=self.dims,
-            # default_dims=[],
-            # index_origin=self.index_origin,
+            dims=self.dims,
+            default_dims=[],
+            index_origin=self.index_origin,
         )
 
     def to_inference_data(self):
diff --git a/pymc3/tests/test_idata_conversion.py b/pymc3/tests/test_idata_conversion.py
index e098fa5db9..0d720d7656 100644
--- a/pymc3/tests/test_idata_conversion.py
+++ b/pymc3/tests/test_idata_conversion.py
@@ -570,10 +570,6 @@ def test_multivariate_observations(self):
 
 
 class TestPyMC3WarmupHandling:
-    @pytest.mark.skipif(
-        not hasattr(pm.backends.base.SamplerReport, "n_draws"),
-        reason="requires pymc3 3.9 or higher",
-    )
     @pytest.mark.parametrize("save_warmup", [False, True])
     @pytest.mark.parametrize("chains", [1, 2])
     @pytest.mark.parametrize("tune,draws", [(0, 50), (10, 40), (30, 0)])

From fa7607d66073b6d7c414cc8db19df5057f9b2b7d Mon Sep 17 00:00:00 2001
From: "Oriol (ZBook)" <oriol.abril.pla@gmail.com>
Date: Fri, 26 Mar 2021 06:13:52 +0200
Subject: [PATCH 7/7] activate arviz compat tests

---
 .github/workflows/arviz_compat.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/arviz_compat.yml b/.github/workflows/arviz_compat.yml
index 6c5832b881..55405d0624 100644
--- a/.github/workflows/arviz_compat.yml
+++ b/.github/workflows/arviz_compat.yml
@@ -7,7 +7,6 @@ on:
 
 jobs:
   pytest:
-    if: false
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest]