From b23e0d372174bf4e58286ac86f988f9d9c35ec75 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Thu, 29 Jun 2023 15:11:16 +0200
Subject: [PATCH 1/3] Raise informative error when passing masked data to
 `Data`

---
 pymc/data.py       | 5 +++++
 tests/test_data.py | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/pymc/data.py b/pymc/data.py
index 4f34dfacb3..98f8a39970 100644
--- a/pymc/data.py
+++ b/pymc/data.py
@@ -423,6 +423,11 @@ def Data(
     # `convert_observed_data` takes care of parameter `value` and
     # transforms it to something digestible for PyTensor.
     arr = convert_observed_data(value)
+    if isinstance(arr, np.ma.MaskedArray):
+        raise NotImplementedError(
+            "Masked arrays or arrays with `nan` entries are not supported. "
+            "Pass them directly to `observed` if you want to trigger auto-imputation"
+        )
 
     if mutable is None:
         warnings.warn(
diff --git a/tests/test_data.py b/tests/test_data.py
index 0d004cd0a1..87998e63c9 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -437,6 +437,14 @@ def test_data_mutable_default_warning(self):
             assert isinstance(data, pt.TensorConstant)
         pass
 
+    def test_masked_array_error(self):
+        with pm.Model():
+            with pytest.raises(
+                NotImplementedError,
+                match="Masked arrays or arrays with `nan` entries are not supported.",
+            ):
+                pm.ConstantData("x", [0, 1, np.nan, 2])
+
 
 def test_data_naming():
     """

From e1fd17546ba82187ab103e47f236d7136d2e4a49 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Thu, 29 Jun 2023 15:12:57 +0200
Subject: [PATCH 2/3] Extend support for automatic imputation

---
 pymc/distributions/distribution.py       | 146 ++++++++++++++-
 pymc/model.py                            |  74 ++------
 tests/backends/test_arviz.py             |  17 +-
 tests/distributions/test_distribution.py | 228 ++++++++++++++++++++++-
 tests/test_model.py                      |  73 +++++---
 5 files changed, 442 insertions(+), 96 deletions(-)

diff --git a/pymc/distributions/distribution.py b/pymc/distributions/distribution.py
index 6ea639d9f6..d19b5a16c8 100644
--- a/pymc/distributions/distribution.py
+++ b/pymc/distributions/distribution.py
@@ -25,13 +25,14 @@
 
 from pytensor import tensor as pt
 from pytensor.compile.builders import OpFromGraph
-from pytensor.graph import node_rewriter
+from pytensor.graph import FunctionGraph, node_rewriter
 from pytensor.graph.basic import Node, Variable
 from pytensor.graph.replace import clone_replace
 from pytensor.graph.rewriting.basic import in2out
 from pytensor.graph.utils import MetaType
 from pytensor.tensor.basic import as_tensor_variable
 from pytensor.tensor.random.op import RandomVariable
+from pytensor.tensor.random.rewriting import local_subtensor_rv_lift
 from pytensor.tensor.random.utils import normalize_size_param
 from pytensor.tensor.var import TensorVariable
 from typing_extensions import TypeAlias
@@ -49,6 +50,7 @@
 )
 from pymc.exceptions import BlockModelAccessError
 from pymc.logprob.abstract import MeasurableVariable, _icdf, _logcdf, _logprob
+from pymc.logprob.basic import logp
 from pymc.logprob.rewriting import logprob_rewrites_db
 from pymc.model import BlockModelAccess
 from pymc.printing import str_for_dist
@@ -1148,3 +1150,145 @@ def logcdf(value, c):
             -np.inf,
             0,
         )
+
+
+class PartialObservedRV(SymbolicRandomVariable):
+    """RandomVariable with partially observed subspace, as indicated by a boolean mask.
+
+    See `create_partial_observed_rv` for more details.
+    """
+
+
+def create_partial_observed_rv(
+    rv: TensorVariable,
+    mask: Union[np.ndarray, TensorVariable],
+) -> Tuple[
+    Tuple[TensorVariable, TensorVariable], Tuple[TensorVariable, TensorVariable], TensorVariable
+]:
+    """Separate observed and unobserved components of a RandomVariable.
+
+    This function may return two independent RandomVariables or, if not possible,
+    two variables from a common `PartialObservedRV` node
+
+    Parameters
+    ----------
+    rv : TensorVariable
+    mask : tensor_like
+        Constant or variable boolean mask. True entries correspond to components of the variable that are not observed.
+
+    Returns
+    -------
+    observed_rv and mask : Tuple of TensorVariable
+        The observed component of the RV and respective indexing mask
+    unobserved_rv and mask : Tuple of TensorVariable
+        The unobserved component of the RV and respective indexing mask
+    joined_rv : TensorVariable
+        The symbolic join of the observed and unobserved components.
+    """
+    if not mask.dtype == "bool":
+        raise ValueError(
+            f"mask must be an array or tensor of boolean dtype, got dtype: {mask.dtype}"
+        )
+
+    if mask.ndim > rv.ndim:
+        raise ValueError(f"mask can't have more dims than rv, got ndim: {mask.ndim}")
+
+    antimask = ~mask
+
+    can_rewrite = False
+    # Only pure RVs can be rewritten
+    if isinstance(rv.owner.op, RandomVariable):
+        ndim_supp = rv.owner.op.ndim_supp
+
+        # All univariate RVs can be rewritten
+        if ndim_supp == 0:
+            can_rewrite = True
+
+        # Multivariate RVs can be rewritten if masking does not split within support dimensions
+        else:
+            batch_dims = rv.type.ndim - ndim_supp
+            constant_mask = getattr(as_tensor_variable(mask), "data", None)
+
+            # Indexing does not overlap with core dimensions
+            if mask.ndim <= batch_dims:
+                can_rewrite = True
+
+            # Try to handle special case where mask is constant across support dimensions,
+            # TODO: This could be done by the rewrite itself
+            elif constant_mask is not None:
+                # We check if a constant_mask that only keeps the first entry of each support dim
+                # is equivalent to the original one after re-expanding.
+                trimmed_mask = constant_mask[(...,) + (0,) * ndim_supp]
+                expanded_mask = np.broadcast_to(
+                    np.expand_dims(trimmed_mask, axis=tuple(range(-ndim_supp, 0))),
+                    shape=constant_mask.shape,
+                )
+                if np.array_equal(constant_mask, expanded_mask):
+                    mask = trimmed_mask
+                    antimask = ~trimmed_mask
+                    can_rewrite = True
+
+    if can_rewrite:
+        # Rewrite doesn't work with boolean masks. Should be fixed after https://github.com/pymc-devs/pytensor/pull/329
+        mask, antimask = mask.nonzero(), antimask.nonzero()
+
+        masked_rv = rv[mask]
+        fgraph = FunctionGraph(outputs=[masked_rv], clone=False)
+        [unobserved_rv] = local_subtensor_rv_lift.transform(fgraph, fgraph.outputs[0].owner)
+
+        antimasked_rv = rv[antimask]
+        fgraph = FunctionGraph(outputs=[antimasked_rv], clone=False)
+        [observed_rv] = local_subtensor_rv_lift.transform(fgraph, fgraph.outputs[0].owner)
+
+        # Make a clone of the observedRV, with a distinct rng so that observed and
+        # unobserved are never treated as equivalent (and mergeable) nodes by pytensor.
+        _, size, _, *inps = observed_rv.owner.inputs
+        observed_rv = observed_rv.owner.op(*inps, size=size)
+
+    # For all other cases use the more general PartialObservedRV
+    else:
+        # The symbolic graph simply splits the observed and unobserved components,
+        # so they can be given separate values.
+        dist_, mask_ = rv.type(), as_tensor_variable(mask).type()
+        observed_rv_, unobserved_rv_ = dist_[~mask_], dist_[mask_]
+
+        observed_rv, unobserved_rv = PartialObservedRV(
+            inputs=[dist_, mask_],
+            outputs=[observed_rv_, unobserved_rv_],
+            ndim_supp=rv.owner.op.ndim_supp,
+        )(rv, mask)
+
+    joined_rv = pt.empty(rv.shape, dtype=rv.type.dtype)
+    joined_rv = pt.set_subtensor(joined_rv[mask], unobserved_rv)
+    joined_rv = pt.set_subtensor(joined_rv[antimask], observed_rv)
+
+    return (observed_rv, antimask), (unobserved_rv, mask), joined_rv
+
+
+@_logprob.register(PartialObservedRV)
+def partial_observed_rv_logprob(op, values, dist, mask, **kwargs):
+    # For the logp, simply join the values
+    [obs_value, unobs_value] = values
+    antimask = ~mask
+    joined_value = pt.empty_like(dist)
+    joined_value = pt.set_subtensor(joined_value[mask], unobs_value)
+    joined_value = pt.set_subtensor(joined_value[antimask], obs_value)
+    joined_logp = logp(dist, joined_value)
+
+    # If we have a univariate RV we can split apart the logp terms
+    if op.ndim_supp == 0:
+        return joined_logp[antimask], joined_logp[mask]
+    # Otherwise, we can't (always/ easily) split apart logp terms.
+    # We return the full logp for the observed value, and a 0-nd array for the unobserved value
+    else:
+        return joined_logp.ravel(), pt.zeros((0,), dtype=joined_logp.type.dtype)
+
+
+@_moment.register(PartialObservedRV)
+def partial_observed_rv_moment(op, partial_obs_rv, rv, mask):
+    # Unobserved output
+    if partial_obs_rv.owner.outputs.index(partial_obs_rv) == 1:
+        return moment(rv)[mask]
+    # Observed output
+    else:
+        return moment(rv)[~mask]
diff --git a/pymc/model.py b/pymc/model.py
index ec94772192..fdd8752032 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -44,11 +44,9 @@
 from pytensor.compile import DeepCopyOp, get_mode
 from pytensor.compile.sharedvalue import SharedVariable
 from pytensor.graph.basic import Constant, Variable, graph_inputs
-from pytensor.graph.fg import FunctionGraph
 from pytensor.scalar import Cast
 from pytensor.tensor.elemwise import Elemwise
 from pytensor.tensor.random.op import RandomVariable
-from pytensor.tensor.random.rewriting import local_subtensor_rv_lift
 from pytensor.tensor.random.type import RandomType
 from pytensor.tensor.sharedvar import ScalarSharedVariable
 from pytensor.tensor.var import TensorConstant, TensorVariable
@@ -1409,67 +1407,27 @@ def make_obs_var(
             if total_size is not None:
                 raise ValueError("total_size is not compatible with imputed variables")
 
-            if not isinstance(rv_var.owner.op, RandomVariable):
-                raise NotImplementedError(
-                    "Automatic inputation is only supported for univariate RandomVariables."
-                    f" {rv_var} of type {type(rv_var.owner.op)} is not supported."
-                )
-
-            if rv_var.owner.op.ndim_supp > 0:
-                raise NotImplementedError(
-                    f"Automatic inputation is only supported for univariate "
-                    f"RandomVariables, but {rv_var} is multivariate"
-                )
+            from pymc.distributions.distribution import create_partial_observed_rv
 
-            # We can get a random variable comprised of only the unobserved
-            # entries by lifting the indices through the `RandomVariable` `Op`.
+            (
+                (observed_rv, observed_mask),
+                (unobserved_rv, _),
+                joined_rv,
+            ) = create_partial_observed_rv(rv_var, mask)
+            observed_data = pt.as_tensor(data.data[observed_mask])
 
-            masked_rv_var = rv_var[mask.nonzero()]
-
-            fgraph = FunctionGraph(
-                [i for i in graph_inputs((masked_rv_var,)) if not isinstance(i, Constant)],
-                [masked_rv_var],
-                clone=False,
-            )
+            # Register ObservedRV corresponding to observed component
+            observed_rv.name = f"{name}_observed"
+            self.create_value_var(observed_rv, transform=None, value_var=observed_data)
+            self.add_named_variable(observed_rv)
+            self.observed_RVs.append(observed_rv)
 
-            (missing_rv_var,) = local_subtensor_rv_lift.transform(fgraph, fgraph.outputs[0].owner)
+            # Register FreeRV corresponding to unobserved components
+            self.register_rv(unobserved_rv, f"{name}_missing", transform=transform)
 
-            self.register_rv(missing_rv_var, f"{name}_missing", transform=transform)
-
-            # Now, we lift the non-missing observed values and produce a new
-            # `rv_var` that contains only those.
-            #
-            # The end result is two disjoint distributions: one for the missing
-            # values, and another for the non-missing values.
-
-            antimask_idx = (~mask).nonzero()
-            nonmissing_data = pt.as_tensor_variable(data[antimask_idx].data)
-            unmasked_rv_var = rv_var[antimask_idx]
-            unmasked_rv_var = unmasked_rv_var.owner.clone().default_output()
-
-            fgraph = FunctionGraph(
-                [i for i in graph_inputs((unmasked_rv_var,)) if not isinstance(i, Constant)],
-                [unmasked_rv_var],
-                clone=False,
-            )
-            (observed_rv_var,) = local_subtensor_rv_lift.transform(fgraph, fgraph.outputs[0].owner)
-            # Make a clone of the RV, but let it create a new rng so that observed and
-            # missing are not treated as equivalent nodes by pytensor. This would happen
-            # if the size of the masked and unmasked array happened to coincide
-            _, size, _, *inps = observed_rv_var.owner.inputs
-            observed_rv_var = observed_rv_var.owner.op(*inps, size=size, name=f"{name}_observed")
-            observed_rv_var.tag.observations = nonmissing_data
-
-            self.create_value_var(observed_rv_var, transform=None, value_var=nonmissing_data)
-            self.add_named_variable(observed_rv_var)
-            self.observed_RVs.append(observed_rv_var)
-
-            # Create deterministic that combines observed and missing
+            # Register Deterministic that combines observed and missing
             # Note: This can widely increase memory consumption during sampling for large datasets
-            rv_var = pt.empty(data.shape, dtype=observed_rv_var.type.dtype)
-            rv_var = pt.set_subtensor(rv_var[mask.nonzero()], missing_rv_var)
-            rv_var = pt.set_subtensor(rv_var[antimask_idx], observed_rv_var)
-            rv_var = Deterministic(name, rv_var, self, dims)
+            rv_var = Deterministic(name, joined_rv, self, dims)
 
         else:
             if sps.issparse(data):
diff --git a/tests/backends/test_arviz.py b/tests/backends/test_arviz.py
index a16bf43db5..55364f6264 100644
--- a/tests/backends/test_arviz.py
+++ b/tests/backends/test_arviz.py
@@ -352,7 +352,6 @@ def test_missing_data_model(self):
         # See https://github.com/pymc-devs/pymc/issues/5255
         assert inference_data.log_likelihood["y_observed"].shape == (2, 100, 3)
 
-    @pytest.mark.xfail(reason="Multivariate partial observed RVs not implemented for V4")
     def test_mv_missing_data_model(self):
         data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1)
 
@@ -361,19 +360,25 @@ def test_mv_missing_data_model(self):
             mu = pm.Normal("mu", 0, 1, size=2)
             sd_dist = pm.HalfNormal.dist(1.0, size=2)
             # pylint: disable=unpacking-non-sequence
-            chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True)
+            chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist)
             # pylint: enable=unpacking-non-sequence
             with pytest.warns(ImputationWarning):
                 y = pm.MvNormal("y", mu=mu, chol=chol, observed=data)
-            inference_data = pm.sample(100, chains=2, return_inferencedata=True)
+            inference_data = pm.sample(
+                tune=10,
+                draws=10,
+                chains=2,
+                step=pm.Metropolis(),
+                idata_kwargs=dict(log_likelihood=True),
+            )
 
         # make sure that data is really missing
-        assert isinstance(y.owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1))
+        assert isinstance(y.owner.inputs[0].owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1))
 
         test_dict = {
             "posterior": ["mu", "chol_cov"],
-            "observed_data": ["y"],
-            "log_likelihood": ["y"],
+            "observed_data": ["y_observed"],
+            "log_likelihood": ["y_observed"],
         }
         fails = check_multiple_attrs(test_dict, inference_data)
         assert not fails
diff --git a/tests/distributions/test_distribution.py b/tests/distributions/test_distribution.py
index 5921b68fb1..8cc3c37912 100644
--- a/tests/distributions/test_distribution.py
+++ b/tests/distributions/test_distribution.py
@@ -23,7 +23,7 @@
 import pytest
 import scipy.stats as st
 
-from pytensor import scan
+from pytensor import scan, shared
 from pytensor.tensor import TensorVariable
 
 import pymc as pm
@@ -42,14 +42,16 @@
     CustomDist,
     CustomDistRV,
     CustomSymbolicDistRV,
+    PartialObservedRV,
     SymbolicRandomVariable,
     _moment,
+    create_partial_observed_rv,
     moment,
 )
 from pymc.distributions.shape_utils import change_dist_size, rv_size_is_none, to_tuple
 from pymc.distributions.transforms import log
 from pymc.exceptions import BlockModelAccessError
-from pymc.logprob.basic import logcdf, logp
+from pymc.logprob.basic import conditional_logp, logcdf, logp
 from pymc.model import Deterministic, Model
 from pymc.pytensorf import collect_default_updates
 from pymc.sampling import draw, sample
@@ -700,3 +702,225 @@ def test_dtype(self, floatX):
                 assert pm.DiracDelta.dist(2**16).dtype == "int32"
                 assert pm.DiracDelta.dist(2**32).dtype == "int64"
                 assert pm.DiracDelta.dist(2.0).dtype == floatX
+
+
+class TestPartialObservedRV:
+    @pytest.mark.parametrize("symbolic_rv", (False, True))
+    def test_univariate(self, symbolic_rv):
+        data = np.array([0.25, 0.5, 0.25])
+        mask = np.array([False, False, True])
+
+        rv = pm.Normal.dist([1, 2, 3])
+        if symbolic_rv:
+            # We use a Censored Normal so that PartialObservedRV is needed,
+            # but don't use the bounds for testing the logp
+            rv = pm.Censored.dist(rv, lower=-100, upper=100)
+        (obs_rv, obs_mask), (unobs_rv, unobs_mask), joined_rv = create_partial_observed_rv(rv, mask)
+
+        # Test types
+        if symbolic_rv:
+            assert isinstance(obs_rv.owner.op, PartialObservedRV)
+            assert isinstance(unobs_rv.owner.op, PartialObservedRV)
+        else:
+            assert isinstance(obs_rv.owner.op, Normal)
+            assert isinstance(unobs_rv.owner.op, Normal)
+
+        # Tesh shapes
+        assert tuple(obs_rv.shape.eval()) == (2,)
+        assert tuple(unobs_rv.shape.eval()) == (1,)
+        assert tuple(joined_rv.shape.eval()) == (3,)
+
+        # Test logp
+        logp = conditional_logp(
+            {obs_rv: pt.as_tensor(data[~mask]), unobs_rv: pt.as_tensor(data[mask])}
+        )
+        obs_logp, unobs_logp = pytensor.function([], list(logp.values()))()
+        np.testing.assert_allclose(obs_logp, st.norm([1, 2]).logpdf([0.25, 0.5]))
+        np.testing.assert_allclose(unobs_logp, st.norm([3]).logpdf([0.25]))
+
+    @pytest.mark.parametrize("obs_component_selected", (True, False))
+    def test_multivariate_constant_mask_separable(self, obs_component_selected):
+        if obs_component_selected:
+            mask = np.zeros((1, 4), dtype=bool)
+        else:
+            mask = np.ones((1, 4), dtype=bool)
+        obs_data = np.array([[0.1, 0.4, 0.1, 0.4]])
+        unobs_data = np.array([[0.4, 0.1, 0.4, 0.1]])
+
+        rv = pm.Dirichlet.dist([1, 2, 3, 4], shape=(1, 4))
+        (obs_rv, obs_mask), (unobs_rv, unobs_mask), joined_rv = create_partial_observed_rv(rv, mask)
+
+        # Test types
+        assert isinstance(obs_rv.owner.op, pm.Dirichlet)
+        assert isinstance(unobs_rv.owner.op, pm.Dirichlet)
+
+        # Test shapes
+        if obs_component_selected:
+            expected_obs_shape = (1, 4)
+            expected_unobs_shape = (0, 4)
+        else:
+            expected_obs_shape = (0, 4)
+            expected_unobs_shape = (1, 4)
+        assert tuple(obs_rv.shape.eval()) == expected_obs_shape
+        assert tuple(unobs_rv.shape.eval()) == expected_unobs_shape
+        assert tuple(joined_rv.shape.eval()) == (1, 4)
+
+        # Test logp
+        logp = conditional_logp(
+            {
+                obs_rv: pt.as_tensor(obs_data)[obs_mask],
+                unobs_rv: pt.as_tensor(unobs_data)[unobs_mask],
+            }
+        )
+        obs_logp, unobs_logp = pytensor.function([], list(logp.values()))()
+        if obs_component_selected:
+            expected_obs_logp = pm.logp(rv, obs_data).eval()
+            expected_unobs_logp = []
+        else:
+            expected_obs_logp = []
+            expected_unobs_logp = pm.logp(rv, unobs_data).eval()
+        np.testing.assert_allclose(obs_logp, expected_obs_logp)
+        np.testing.assert_allclose(unobs_logp, expected_unobs_logp)
+
+    def test_multivariate_constant_mask_unseparable(self):
+        mask = pt.constant(np.array([[True, True, False, False]]))
+        obs_data = np.array([[0.1, 0.4, 0.1, 0.4]])
+        unobs_data = np.array([[0.4, 0.1, 0.4, 0.1]])
+
+        rv = pm.Dirichlet.dist([1, 2, 3, 4], shape=(1, 4))
+        (obs_rv, obs_mask), (unobs_rv, unobs_mask), joined_rv = create_partial_observed_rv(rv, mask)
+
+        # Test types
+        assert isinstance(obs_rv.owner.op, PartialObservedRV)
+        assert isinstance(unobs_rv.owner.op, PartialObservedRV)
+
+        # Test shapes
+        assert tuple(obs_rv.shape.eval()) == (2,)
+        assert tuple(unobs_rv.shape.eval()) == (2,)
+        assert tuple(joined_rv.shape.eval()) == (1, 4)
+
+        # Test logp
+        logp = conditional_logp(
+            {
+                obs_rv: pt.as_tensor(obs_data)[obs_mask],
+                unobs_rv: pt.as_tensor(unobs_data)[unobs_mask],
+            }
+        )
+        obs_logp, unobs_logp = pytensor.function([], list(logp.values()))()
+
+        # For non-separable cases the logp always shows up in the observed variable
+        expected_logp = pm.logp(rv, [[0.1, 0.4, 0.4, 0.1]]).eval()
+        np.testing.assert_almost_equal(obs_logp, expected_logp)
+        np.testing.assert_array_equal(unobs_logp, [])
+
+    def test_multivariate_shared_mask_separable(self):
+        mask = shared(np.array([True]))
+        obs_data = np.array([[0.1, 0.4, 0.1, 0.4]])
+        unobs_data = np.array([[0.4, 0.1, 0.4, 0.1]])
+
+        rv = pm.Dirichlet.dist([1, 2, 3, 4], shape=(1, 4))
+        (obs_rv, obs_mask), (unobs_rv, unobs_mask), joined_rv = create_partial_observed_rv(rv, mask)
+
+        # Test types
+        # Multivariate RVs with shared masks on the last component are always unseparable.
+        assert isinstance(obs_rv.owner.op, pm.Dirichlet)
+        assert isinstance(unobs_rv.owner.op, pm.Dirichlet)
+
+        # Test shapes
+        assert tuple(obs_rv.shape.eval()) == (0, 4)
+        assert tuple(unobs_rv.shape.eval()) == (1, 4)
+        assert tuple(joined_rv.shape.eval()) == (1, 4)
+
+        # Test logp
+        logp = conditional_logp(
+            {
+                obs_rv: pt.as_tensor(obs_data)[obs_mask],
+                unobs_rv: pt.as_tensor(unobs_data)[unobs_mask],
+            }
+        )
+        logp_fn = pytensor.function([], list(logp.values()))
+        obs_logp, unobs_logp = logp_fn()
+        expected_logp = pm.logp(rv, unobs_data).eval()
+        np.testing.assert_almost_equal(obs_logp, [])
+        np.testing.assert_array_equal(unobs_logp, expected_logp)
+
+        # Test that we can update a shared mask
+        mask.set_value(np.array([False]))
+
+        assert tuple(obs_rv.shape.eval()) == (1, 4)
+        assert tuple(unobs_rv.shape.eval()) == (0, 4)
+
+        new_expected_logp = pm.logp(rv, obs_data).eval()
+        assert not np.isclose(expected_logp, new_expected_logp)  # Otherwise test is weak
+        obs_logp, unobs_logp = logp_fn()
+        np.testing.assert_almost_equal(obs_logp, new_expected_logp)
+        np.testing.assert_array_equal(unobs_logp, [])
+
+    def test_multivariate_shared_mask_unseparable(self):
+        # Even if the mask is initially not mixing support dims,
+        # it could later be changed in a way that does!
+        mask = shared(np.array([[True, True, True, True]]))
+        obs_data = np.array([[0.1, 0.4, 0.1, 0.4]])
+        unobs_data = np.array([[0.4, 0.1, 0.4, 0.1]])
+
+        rv = pm.Dirichlet.dist([1, 2, 3, 4], shape=(1, 4))
+        (obs_rv, obs_mask), (unobs_rv, unobs_mask), joined_rv = create_partial_observed_rv(rv, mask)
+
+        # Test types
+        # Multivariate RVs with shared masks on the last component are always unseparable.
+        assert isinstance(obs_rv.owner.op, PartialObservedRV)
+        assert isinstance(unobs_rv.owner.op, PartialObservedRV)
+
+        # Test shapes
+        assert tuple(obs_rv.shape.eval()) == (0,)
+        assert tuple(unobs_rv.shape.eval()) == (4,)
+        assert tuple(joined_rv.shape.eval()) == (1, 4)
+
+        # Test logp
+        logp = conditional_logp(
+            {
+                obs_rv: pt.as_tensor(obs_data)[obs_mask],
+                unobs_rv: pt.as_tensor(unobs_data)[unobs_mask],
+            }
+        )
+        logp_fn = pytensor.function([], list(logp.values()))
+        obs_logp, unobs_logp = logp_fn()
+        # For non-separable cases the logp always shows up in the observed variable
+        # Even in this case where all entries come from an unobserved component
+        expected_logp = pm.logp(rv, unobs_data).eval()
+        np.testing.assert_almost_equal(obs_logp, expected_logp)
+        np.testing.assert_array_equal(unobs_logp, [])
+
+        # Test that we can update a shared mask
+        mask.set_value(np.array([[False, False, True, True]]))
+
+        assert tuple(obs_rv.shape.eval()) == (2,)
+        assert tuple(unobs_rv.shape.eval()) == (2,)
+
+        new_expected_logp = pm.logp(rv, [0.1, 0.4, 0.4, 0.1]).eval()
+        assert not np.isclose(expected_logp, new_expected_logp)  # Otherwise test is weak
+        obs_logp, unobs_logp = logp_fn()
+        np.testing.assert_almost_equal(obs_logp, new_expected_logp)
+        np.testing.assert_array_equal(unobs_logp, [])
+
+    def test_moment(self):
+        x = pm.GaussianRandomWalk.dist(init_dist=pm.Normal.dist(-5), mu=1, steps=9)
+        ref_moment = moment(x).eval()
+        assert not np.allclose(ref_moment[::2], ref_moment[1::2])  # Otherwise test is weak
+
+        (obs_x, _), (unobs_x, _), _ = create_partial_observed_rv(
+            x, mask=np.array([False, True] * 5)
+        )
+        np.testing.assert_allclose(moment(obs_x).eval(), ref_moment[::2])
+        np.testing.assert_allclose(moment(unobs_x).eval(), ref_moment[1::2])
+
+    def test_wrong_mask(self):
+        rv = pm.Normal.dist(shape=(5,))
+
+        invalid_mask = np.array([0, 2, 4])
+        with pytest.raises(ValueError, match="mask must be an array or tensor of boolean dtype"):
+            create_partial_observed_rv(rv, invalid_mask)
+
+        invalid_mask = np.zeros((1, 5), dtype=bool)
+        with pytest.raises(ValueError, match="mask can't have more dims than rv"):
+            create_partial_observed_rv(rv, invalid_mask)
diff --git a/tests/test_model.py b/tests/test_model.py
index 10a509ae09..9306b12902 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -41,9 +41,10 @@
 from pymc import Deterministic, Potential
 from pymc.blocking import DictToArrayBijection, RaveledVars
 from pymc.distributions import Normal, transforms
-from pymc.distributions.transforms import log
+from pymc.distributions.distribution import PartialObservedRV
+from pymc.distributions.transforms import log, simplex
 from pymc.exceptions import ImputationWarning, ShapeError, ShapeWarning
-from pymc.logprob.basic import transformed_conditional_logp
+from pymc.logprob.basic import conditional_logp, transformed_conditional_logp
 from pymc.logprob.transforms import IntervalTransform
 from pymc.model import Point, ValueGradFunction, modelcontext
 from pymc.testing import SeededTest
@@ -1398,32 +1399,43 @@ def test_missing_logp2(self):
 
         assert m_logp == m_missing_logp
 
-    def test_missing_multivariate(self):
-        """Test model with missing variables whose transform changes base shape still works"""
+    def test_missing_multivariate_separable(self):
+        with pm.Model() as m_miss:
+            with pytest.warns(ImputationWarning):
+                x = pm.Dirichlet(
+                    "x",
+                    a=[1, 2, 3],
+                    observed=np.array([[0.3, 0.3, 0.4], [np.nan, np.nan, np.nan]]),
+                )
+        assert (m_miss["x_missing"].owner.op, pm.Dirichlet)
+        assert (m_miss["x_observed"].owner.op, pm.Dirichlet)
 
+        with pm.Model() as m_unobs:
+            x = pm.Dirichlet("x", a=[1, 2, 3], shape=(1, 3))
+
+        inp_vals = simplex.forward(np.array([[0.3, 0.3, 0.4]])).eval()
+        np.testing.assert_allclose(
+            m_miss.compile_logp(jacobian=False)({"x_missing_simplex__": inp_vals}),
+            m_unobs.compile_logp(jacobian=False)({"x_simplex__": inp_vals}) * 2,
+        )
+
+    def test_missing_multivariate_unseparable(self):
         with pm.Model() as m_miss:
-            with pytest.raises(
-                NotImplementedError,
-                match="Automatic inputation is only supported for univariate RandomVariables",
-            ):
-                with pytest.warns(ImputationWarning):
-                    x = pm.Dirichlet(
-                        "x",
-                        a=[1, 2, 3],
-                        observed=np.array([[0.3, 0.3, 0.4], [np.nan, np.nan, np.nan]]),
-                    )
-
-        # TODO: Test can be used when local_subtensor_rv_lift supports multivariate distributions
-        # from pymc.distributions.transforms import simplex
-        #
-        # with pm.Model() as m_unobs:
-        #     x = pm.Dirichlet("x", a=[1, 2, 3])
-        #
-        # inp_vals = simplex.forward(np.array([0.3, 0.3, 0.4])).eval()
-        # assert np.isclose(
-        #     m_miss.compile_logp()({"x_missing_simplex__": inp_vals}),
-        #     m_unobs.compile_logp(jacobian=False)({"x_simplex__": inp_vals}) * 2,
-        # )
+            with pytest.warns(ImputationWarning):
+                x = pm.Dirichlet(
+                    "x",
+                    a=[1, 2, 3],
+                    observed=np.array([[0.3, 0.3, np.nan], [np.nan, np.nan, 0.4]]),
+                )
+
+        assert isinstance(m_miss["x_missing"].owner.op, PartialObservedRV)
+        assert isinstance(m_miss["x_observed"].owner.op, PartialObservedRV)
+
+        inp_values = np.array([0.3, 0.3, 0.4])
+        np.testing.assert_allclose(
+            m_miss.compile_logp()({"x_missing": [0.4, 0.3, 0.3]}),
+            st.dirichlet.logpdf(inp_values, [1, 2, 3]) * 2,
+        )
 
     def test_missing_vector_parameter(self):
         with pm.Model() as m:
@@ -1482,11 +1494,10 @@ def test_dims(self):
                 x = pm.Normal("x", observed=data, dims=("observed",))
         assert model.named_vars_to_dims == {"x": ("observed",)}
 
-    def test_error_non_random_variable(self):
+    def test_symbolic_random_variable(self):
         data = np.array([np.nan] * 3 + [0] * 7)
         with pm.Model() as model:
-            msg = "x of type <class 'pymc.distributions.censored.CensoredRV'> is not supported"
-            with pytest.raises(NotImplementedError, match=msg):
+            with pytest.warns(ImputationWarning):
                 x = pm.Censored(
                     "x",
                     pm.Normal.dist(),
@@ -1494,6 +1505,10 @@ def test_error_non_random_variable(self):
                     upper=10,
                     observed=data,
                 )
+        np.testing.assert_almost_equal(
+            model.compile_logp()({"x_missing": [0] * 3}),
+            st.norm.logcdf(0) * 10,
+        )
 
 
 class TestShared(SeededTest):

From 4f1f11ca7792786ffb0554fc5fcf9898235aa952 Mon Sep 17 00:00:00 2001
From: Ricardo Vieira <ricardo.vieira1994@gmail.com>
Date: Fri, 30 Jun 2023 11:54:11 +0200
Subject: [PATCH 3/3] Use "unobserved" as imputed variable suffix instead of
 "missing"

---
 pymc/model.py                 |  2 +-
 tests/backends/test_arviz.py  |  4 +--
 tests/test_model.py           | 60 ++++++++++++++++++++---------------
 tests/test_model_graph.py     |  6 ++--
 tests/tuning/test_starting.py |  6 ++--
 5 files changed, 43 insertions(+), 35 deletions(-)

diff --git a/pymc/model.py b/pymc/model.py
index fdd8752032..65dd8ec9c3 100644
--- a/pymc/model.py
+++ b/pymc/model.py
@@ -1423,7 +1423,7 @@ def make_obs_var(
             self.observed_RVs.append(observed_rv)
 
             # Register FreeRV corresponding to unobserved components
-            self.register_rv(unobserved_rv, f"{name}_missing", transform=transform)
+            self.register_rv(unobserved_rv, f"{name}_unobserved", transform=transform)
 
             # Register Deterministic that combines observed and missing
             # Note: This can widely increase memory consumption during sampling for large datasets
diff --git a/tests/backends/test_arviz.py b/tests/backends/test_arviz.py
index 55364f6264..4f6d312e29 100644
--- a/tests/backends/test_arviz.py
+++ b/tests/backends/test_arviz.py
@@ -338,10 +338,10 @@ def test_missing_data_model(self):
             )
 
         # make sure that data is really missing
-        assert "y_missing" in model.named_vars
+        assert "y_unobserved" in model.named_vars
 
         test_dict = {
-            "posterior": ["x", "y_missing"],
+            "posterior": ["x", "y_unobserved"],
             "observed_data": ["y_observed"],
             "log_likelihood": ["y_observed"],
         }
diff --git a/tests/test_model.py b/tests/test_model.py
index 9306b12902..f4d2bbe78d 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -357,13 +357,13 @@ def test_missing_data(self):
         gf = m.logp_dlogp_function()
         gf._extra_are_set = True
 
-        assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type
+        assert m["x2_unobserved"].type == gf._extra_vars_shared["x2_unobserved"].type
 
         # The dtype of the merged observed/missing deterministic should match the RV dtype
         assert m.deterministics[0].type.dtype == x2.type.dtype
 
         point = m.initial_point(random_seed=None).copy()
-        del point["x2_missing"]
+        del point["x2_unobserved"]
 
         res = [gf(DictToArrayBijection.map(Point(point, model=m))) for i in range(5)]
 
@@ -566,7 +566,7 @@ def test_make_obs_var():
     assert masked_output != fake_distribution
     assert not isinstance(masked_output, RandomVariable)
     # Ensure it has missing values
-    assert {"testing_inputs_missing"} == {v.name for v in fake_model.value_vars}
+    assert {"testing_inputs_unobserved"} == {v.name for v in fake_model.value_vars}
     assert {"testing_inputs", "testing_inputs_observed"} == {
         v.name for v in fake_model.observed_RVs
     }
@@ -1221,7 +1221,7 @@ def test_missing_basic(self, missing_data):
             with pytest.warns(ImputationWarning):
                 _ = pm.Normal("y", x, 1, observed=missing_data)
 
-        assert "y_missing" in model.named_vars
+        assert "y_unobserved" in model.named_vars
 
         test_point = model.initial_point()
         assert not np.isnan(model.compile_logp()(test_point))
@@ -1238,7 +1238,7 @@ def test_missing_with_predictors(self):
             with pytest.warns(ImputationWarning):
                 y = pm.Normal("y", x * predictors, 1, observed=data)
 
-        assert "y_missing" in model.named_vars
+        assert "y_unobserved" in model.named_vars
 
         test_point = model.initial_point()
         assert not np.isnan(model.compile_logp()(test_point))
@@ -1278,17 +1278,19 @@ def test_interval_missing_observations(self):
             with pytest.warns(ImputationWarning):
                 theta2 = pm.Normal("theta2", mu=theta1, observed=obs2)
 
-            assert isinstance(model.rvs_to_transforms[model["theta1_missing"]], IntervalTransform)
+            assert isinstance(
+                model.rvs_to_transforms[model["theta1_unobserved"]], IntervalTransform
+            )
             assert model.rvs_to_transforms[model["theta1_observed"]] is None
 
             prior_trace = pm.sample_prior_predictive(random_seed=rng, return_inferencedata=False)
             assert set(prior_trace.keys()) == {
                 "theta1",
                 "theta1_observed",
-                "theta1_missing",
+                "theta1_unobserved",
                 "theta2",
                 "theta2_observed",
-                "theta2_missing",
+                "theta2_unobserved",
             }
 
             # Make sure the observed + missing combined deterministics have the
@@ -1303,14 +1305,16 @@ def test_interval_missing_observations(self):
             # Make sure the missing parts of the combined deterministic matches the
             # sampled missing and observed variable values
             assert (
-                np.mean(prior_trace["theta1"][:, obs1.mask] - prior_trace["theta1_missing"]) == 0.0
+                np.mean(prior_trace["theta1"][:, obs1.mask] - prior_trace["theta1_unobserved"])
+                == 0.0
             )
             assert (
                 np.mean(prior_trace["theta1"][:, ~obs1.mask] - prior_trace["theta1_observed"])
                 == 0.0
             )
             assert (
-                np.mean(prior_trace["theta2"][:, obs2.mask] - prior_trace["theta2_missing"]) == 0.0
+                np.mean(prior_trace["theta2"][:, obs2.mask] - prior_trace["theta2_unobserved"])
+                == 0.0
             )
             assert (
                 np.mean(prior_trace["theta2"][:, ~obs2.mask] - prior_trace["theta2_observed"])
@@ -1326,18 +1330,22 @@ def test_interval_missing_observations(self):
             )
             assert set(trace.varnames) == {
                 "theta1",
-                "theta1_missing",
-                "theta1_missing_interval__",
+                "theta1_unobserved",
+                "theta1_unobserved_interval__",
                 "theta2",
-                "theta2_missing",
+                "theta2_unobserved",
             }
 
             # Make sure that the missing values are newly generated samples and that
             # the observed and deterministic match
-            assert np.all(0 < trace["theta1_missing"].mean(0))
-            assert np.all(0 < trace["theta2_missing"].mean(0))
-            assert np.isclose(np.mean(trace["theta1"][:, obs1.mask] - trace["theta1_missing"]), 0)
-            assert np.isclose(np.mean(trace["theta2"][:, obs2.mask] - trace["theta2_missing"]), 0)
+            assert np.all(0 < trace["theta1_unobserved"].mean(0))
+            assert np.all(0 < trace["theta2_unobserved"].mean(0))
+            assert np.isclose(
+                np.mean(trace["theta1"][:, obs1.mask] - trace["theta1_unobserved"]), 0
+            )
+            assert np.isclose(
+                np.mean(trace["theta2"][:, obs2.mask] - trace["theta2_unobserved"]), 0
+            )
 
             # Make sure that the observed values are unchanged
             assert np.allclose(np.var(trace["theta1"][:, ~obs1.mask], 0), 0.0)
@@ -1378,7 +1386,7 @@ def test_missing_logp1(self):
             with pytest.warns(ImputationWarning):
                 x = pm.Gamma("x", 1, 1, observed=[1, 1, 1, np.nan])
 
-        logp_val = m2.compile_logp()({"x_missing_log__": np.array([0])})
+        logp_val = m2.compile_logp()({"x_unobserved_log__": np.array([0])})
         assert logp_val == -4.0
 
     def test_missing_logp2(self):
@@ -1394,7 +1402,7 @@ def test_missing_logp2(self):
                     "theta2", mu=theta1, observed=np.array([np.nan, np.nan, 2, np.nan, 4])
                 )
         m_missing_logp = m_missing.compile_logp()(
-            {"theta1_missing": [2, 4], "theta2_missing": [0, 1, 3]}
+            {"theta1_unobserved": [2, 4], "theta2_unobserved": [0, 1, 3]}
         )
 
         assert m_logp == m_missing_logp
@@ -1407,7 +1415,7 @@ def test_missing_multivariate_separable(self):
                     a=[1, 2, 3],
                     observed=np.array([[0.3, 0.3, 0.4], [np.nan, np.nan, np.nan]]),
                 )
-        assert (m_miss["x_missing"].owner.op, pm.Dirichlet)
+        assert (m_miss["x_unobserved"].owner.op, pm.Dirichlet)
         assert (m_miss["x_observed"].owner.op, pm.Dirichlet)
 
         with pm.Model() as m_unobs:
@@ -1415,7 +1423,7 @@ def test_missing_multivariate_separable(self):
 
         inp_vals = simplex.forward(np.array([[0.3, 0.3, 0.4]])).eval()
         np.testing.assert_allclose(
-            m_miss.compile_logp(jacobian=False)({"x_missing_simplex__": inp_vals}),
+            m_miss.compile_logp(jacobian=False)({"x_unobserved_simplex__": inp_vals}),
             m_unobs.compile_logp(jacobian=False)({"x_simplex__": inp_vals}) * 2,
         )
 
@@ -1428,12 +1436,12 @@ def test_missing_multivariate_unseparable(self):
                     observed=np.array([[0.3, 0.3, np.nan], [np.nan, np.nan, 0.4]]),
                 )
 
-        assert isinstance(m_miss["x_missing"].owner.op, PartialObservedRV)
+        assert isinstance(m_miss["x_unobserved"].owner.op, PartialObservedRV)
         assert isinstance(m_miss["x_observed"].owner.op, PartialObservedRV)
 
         inp_values = np.array([0.3, 0.3, 0.4])
         np.testing.assert_allclose(
-            m_miss.compile_logp()({"x_missing": [0.4, 0.3, 0.3]}),
+            m_miss.compile_logp()({"x_unobserved": [0.4, 0.3, 0.3]}),
             st.dirichlet.logpdf(inp_values, [1, 2, 3]) * 2,
         )
 
@@ -1451,7 +1459,7 @@ def test_missing_vector_parameter(self):
         assert np.all(x_draws[:, 0] < 0)
         assert np.all(x_draws[:, 1] > 0)
         assert np.isclose(
-            m.compile_logp()({"x_missing": np.array([-10, 10, -10, 10])}),
+            m.compile_logp()({"x_unobserved": np.array([-10, 10, -10, 10])}),
             st.norm(scale=0.1).logpdf(0) * 6,
         )
 
@@ -1470,7 +1478,7 @@ def test_missing_symmetric(self):
         x_obs_rv = m["x_observed"]
         x_obs_vv = m.rvs_to_values[x_obs_rv]
 
-        x_unobs_rv = m["x_missing"]
+        x_unobs_rv = m["x_unobserved"]
         x_unobs_vv = m.rvs_to_values[x_unobs_rv]
 
         logp = transformed_conditional_logp(
@@ -1506,7 +1514,7 @@ def test_symbolic_random_variable(self):
                     observed=data,
                 )
         np.testing.assert_almost_equal(
-            model.compile_logp()({"x_missing": [0] * 3}),
+            model.compile_logp()({"x_unobserved": [0] * 3}),
             st.norm.logcdf(0) * 10,
         )
 
diff --git a/tests/test_model_graph.py b/tests/test_model_graph.py
index 9c2e1caa37..0ad38737e7 100644
--- a/tests/test_model_graph.py
+++ b/tests/test_model_graph.py
@@ -145,13 +145,13 @@ def model_with_imputations():
 
     compute_graph = {
         "a": set(),
-        "L_missing": {"a"},
+        "L_unobserved": {"a"},
         "L_observed": {"a"},
-        "L": {"L_missing", "L_observed"},
+        "L": {"L_unobserved", "L_observed"},
     }
     plates = {
         "": {"a"},
-        "2": {"L_missing"},
+        "2": {"L_unobserved"},
         "10": {"L_observed"},
         "12": {"L"},
     }
diff --git a/tests/tuning/test_starting.py b/tests/tuning/test_starting.py
index 4e7a3540ee..cfdb891891 100644
--- a/tests/tuning/test_starting.py
+++ b/tests/tuning/test_starting.py
@@ -146,9 +146,9 @@ def test_find_MAP_issue_4488():
         y = pm.Deterministic("y", x + 1)
         map_estimate = find_MAP()
 
-    assert not set.difference({"x_missing", "x_missing_log__", "y"}, set(map_estimate.keys()))
-    np.testing.assert_allclose(map_estimate["x_missing"], 0.2, rtol=1e-4, atol=1e-4)
-    np.testing.assert_allclose(map_estimate["y"], [2.0, map_estimate["x_missing"][0] + 1])
+    assert not set.difference({"x_unobserved", "x_unobserved_log__", "y"}, set(map_estimate.keys()))
+    np.testing.assert_allclose(map_estimate["x_unobserved"], 0.2, rtol=1e-4, atol=1e-4)
+    np.testing.assert_allclose(map_estimate["y"], [2.0, map_estimate["x_unobserved"][0] + 1])
 
 
 def test_find_MAP_warning_non_free_RVs():