pymc-devs · AlexAndorra · Jan 16, 2021 · Oct 1, 2019 · Oct 2, 2019 · Oct 2, 2019
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
@@ -80,6 +80,7 @@
 from pymc3.distributions.mixture import Mixture, MixtureSameFamily, NormalMixture
 from pymc3.distributions.multivariate import (
     Dirichlet,
+    DirichletMultinomial,
     KroneckerNormal,
     LKJCholeskyCov,
     LKJCorr,
@@ -154,6 +155,7 @@
     "MvStudentT",
     "Dirichlet",
     "Multinomial",
+    "DirichletMultinomial",
     "Wishart",
     "WishartBartlett",
     "LKJCholeskyCov",

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
@@ -51,6 +51,7 @@
     "MvStudentT",
     "Dirichlet",
     "Multinomial",
+    "DirichletMultinomial",
     "Wishart",
     "WishartBartlett",
     "LKJCorr",
@@ -690,6 +691,140 @@ def logp(self, x):
         )
 
 
+class DirichletMultinomial(Discrete):
+    R"""Dirichlet Multinomial log-likelihood.
+
+    Dirichlet mixture of multinomials distribution, with a marginalized PMF.
+
+    .. math::
+
+    f(x \mid n, a) = \frac{\Gamma(n + 1)\Gamma(\sum a_k)}
+                              {\Gamma(\n + \sum a_k)}
+                         \prod_{k=1}^K
+                         \frac{\Gamma(x_k +  a_k)}
+                              {\Gamma(x_k + 1)\Gamma(a_k)}
+
+    ==========  ===========================================
+    Support     :math:`x \in \{0, 1, \ldots, n\}` such that
+                :math:`\sum x_i = n`
+    Mean        :math:`n \frac{a_i}{\sum{a_k}}`
+    ==========  ===========================================
+
+    Parameters
+    ----------
+    n : int or array
+        Total counts in each replicate. If n is an array its shape must be (N,)
+        with N = a.shape[0]
+
+    a : one- or two-dimensional array
+        Dirichlet parameter.  Elements must be non-negative.
+        Dimension of each element of the distribution is the length
+        of the second dimension of *a*.
+    """
+
+    def __init__(self, n, a, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        if len(self.shape) > 1:
+            self.n = tt.shape_padright(n)
+            self.a = tt.as_tensor_variable(a) if a.ndim > 1 else tt.shape_padleft(a)
+        else:
+            # n is a scalar, p is a 1d array
+            self.n = tt.as_tensor_variable(n)
+            self.a = tt.as_tensor_variable(a)
+
+        p = self.a / self.a.sum(-1, keepdims=True)
+
+        self.mean = self.n * p
+        mode = tt.cast(tt.round(self.mean), "int32")
+        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
+        inc_bool_arr = tt.abs_(diff) > 0
+        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()])
+        self.mode = mode
+
+    def _random(self, n, a, size=None, raw_size=None):
+        original_dtype = a.dtype
+        # Set float type to float64 for numpy. This change is related to numpy issue #8317 (https://github.com/numpy/numpy/issues/8317)
+        a = a.astype("float64")
+
+        # Thanks to the default shape handling done in generate_values, the last
+        # axis of n is a dummy axis that allows it to broadcast well with `a`
+        n = np.broadcast_to(n, size)
+        a = np.broadcast_to(a, size)
+        n = n[..., 0]
+
+        # np.random.multinomial needs `n` to be a scalar int and `a` a
+        # sequence so we semi flatten them and iterate over them
+        size_ = to_tuple(raw_size)
+        if a.ndim > len(size_) and a.shape[: len(size_)] == size_:
+            # a and n have the size_ prepend so we don't need it in np.random
+            n_ = n.reshape([-1])
+            a_ = a.reshape([-1, a.shape[-1]])
+            p_ = np.array([np.random.dirichlet(aa) for aa in a_])
+            samples = np.array([np.random.multinomial(nn, pp) for nn, pp in zip(n_, p_)])
+            samples = samples.reshape(a.shape)
+        else:
+            # a and n don't have the size prepend
+            n_ = n.reshape([-1])
+            a_ = a.reshape([-1, a.shape[-1]])
+            p_ = np.array([np.random.dirichlet(aa) for aa in a_])
+            samples = np.array(
+                [np.random.multinomial(nn, pp, size=size_) for nn, pp in zip(n_, p_)]
+            )
+            samples = np.moveaxis(samples, 0, -1)
+            samples = samples.reshape(size + a.shape)
+        # We cast back to the original dtype
+        return samples.astype(original_dtype)
+
+    def random(self, point=None, size=None):
+        """
+        Draw random values from Dirichlet-Multinomial distribution.
+
+        Parameters
+        ----------
+        point: dict, optional
+            Dict of variable values on which random values are to be
+            conditioned (uses default point if not specified).
+        size: int, optional
+            Desired size of random sample (returns one sample if not
+            specified).
+
+        Returns
+        -------
+        array
+        """
+        n, a = draw_values([self.n, self.a], point=point, size=size)
+        samples = generate_samples(
+            self._random,
+            n,
+            a,
+            dist_shape=self.shape,
+            not_broadcast_kwargs={"raw_size": size},
+            size=size,
+        )
+        return samples
+
+    def logp(self, x):
+        a = self.a
+        n = self.n
+        sum_a = a.sum(axis=-1, keepdims=True)
+
+        const = (gammaln(n + 1) + gammaln(sum_a)) - gammaln(n + sum_a)
+        series = gammaln(x + a) - (gammaln(x + 1) + gammaln(a))
+        result = const + series.sum(axis=-1, keepdims=True)
+        return bound(
+            result,
+            tt.all(tt.ge(x, 0)),
+            tt.all(tt.gt(a, 0)),
+            tt.all(tt.ge(n, 0)),
+            tt.all(tt.eq(x.sum(axis=-1, keepdims=True), n)),
+            broadcast_conditions=False,
+        )
+
+    def _distr_parameters_for_repr(self):
+        return ["n", "a"]
+
+
 def posdef(AA):
     try:
         linalg.cholesky(AA)

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -44,6 +44,7 @@
     Constant,
     DensityDist,
     Dirichlet,
+    DirichletMultinomial,
     DiscreteUniform,
     DiscreteWeibull,
     ExGaussian,
@@ -265,6 +266,21 @@ def multinomial_logpdf(value, n, p):
         return -inf
 
 
+def dirichlet_multinomial_logpmf(value, n, a):
+    value, n, a = [np.asarray(x) for x in [value, n, a]]
+    assert value.ndim == 1
+    assert n.ndim == 0
+    assert a.shape == value.shape
+    gammaln = scipy.special.gammaln
+    if value.sum() == n and (0 <= value).all() and (value <= n).all():
+        sum_a = a.sum(axis=-1)
+        const = gammaln(n + 1) + gammaln(sum_a) - gammaln(n + sum_a)
+        series = gammaln(value + a) - gammaln(value + 1) - gammaln(a)
+        return const + series.sum(axis=-1)
+    else:
+        return -inf
+
+
 def beta_mu_sigma(value, mu, sigma):
     kappa = mu * (1 - mu) / sigma ** 2 - 1
     if kappa > 0:
@@ -1724,6 +1740,172 @@ def test_batch_multinomial(self):
         sample = dist.random(size=2)
         assert_allclose(sample, np.stack([vals, vals], axis=0))
 
+    @pytest.mark.parametrize("n", [2, 3])
+    def test_dirichlet_multinomial(self, n):
+        self.pymc3_matches_scipy(
+            DirichletMultinomial,
+            Vector(Nat, n),
+            {"a": Vector(Rplus, n), "n": Nat},
+            dirichlet_multinomial_logpmf,
+        )
+
+    def test_dirichlet_multinomial_matches_beta_binomial(self):
+        a, b, n = 2, 1, 5
+        ns = np.arange(n + 1)
+        ns_dm = np.vstack((ns, n - ns)).T  # covert ns=1 to ns_dm=[1, 4], for all ns...
+        bb_logp = pm.BetaBinomial.dist(n=n, alpha=a, beta=b).logp(ns).tag.test_value
+        dm_logp = pm.DirichletMultinomial.dist(n=n, a=[a, b]).logp(ns_dm).tag.test_value
+        dm_logp = dm_logp.ravel()
+        assert_allclose(bb_logp, dm_logp)
+
+    @pytest.mark.parametrize(
+        "a, n",
+        [
+            [[0.25, 0.25, 0.25, 0.25], 1],
+            [[0.3, 0.6, 0.05, 0.05], 2],
+            [[0.3, 0.6, 0.05, 0.05], 10],
+        ],
+    )
+    def test_dirichlet_multinomial_mode(self, a, n):
+        _a = np.array(a)
+        with Model() as model:
+            m = DirichletMultinomial("m", n, _a, _a.shape)
+        assert_allclose(m.distribution.mode.eval().sum(), n)
+        _a = np.array([a, a])
+        with Model() as model:
+            m = DirichletMultinomial("m", n, _a, _a.shape)
+        assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
+
+    @pytest.mark.parametrize(
+        "a, shape, n",
+        [
+            [[0.25, 0.25, 0.25, 0.25], 4, 2],
+            [[0.25, 0.25, 0.25, 0.25], (1, 4), 3],
+            [[0.25, 0.25, 0.25, 0.25], (10, 4), [2] * 10],
+            [[0.25, 0.25, 0.25, 0.25], (10, 1, 4), 5],
+            [[[0.25, 0.25, 0.25, 0.25]], (2, 4), [7, 11]],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), 13],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (1, 2, 4), [23, 29]],
+            [
+                [[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]],
+                (10, 2, 4),
+                [31, 37],
+            ],
+            [[[0.25, 0.25, 0.25, 0.25], [0.25, 0.25, 0.25, 0.25]], (2, 4), [17, 19]],
+        ],
+    )
+    def test_dirichlet_multinomial_random(self, a, shape, n):
+        a = np.asarray(a)
+        with Model() as model:
+            m = DirichletMultinomial("m", n=n, a=a, shape=shape)
+        m.random()
+
+    def test_dirichlet_multinomial_mode_with_shape(self):
+        n = [1, 10]
+        a = np.asarray([[0.25, 0.25, 0.25, 0.25], [0.26, 0.26, 0.26, 0.22]])
+        with Model() as model:
+            m = DirichletMultinomial("m", n=n, a=a, shape=(2, 4))
+        assert_allclose(m.distribution.mode.eval().sum(axis=-1), n)
+
+    def test_dirichlet_multinomial_vec(self):
+        vals = np.array([[2, 4, 4], [3, 3, 4]])
+        a = np.array([0.2, 0.3, 0.5])
+        n = 10
+
+        with Model() as model_single:
+            DirichletMultinomial("m", n=n, a=a, shape=len(a))
+
+        with Model() as model_many:
+            DirichletMultinomial("m", n=n, a=a, shape=vals.shape)
+
+        assert_almost_equal(
+            np.asarray([dirichlet_multinomial_logpmf(v, n, a) for v in vals]),
+            np.asarray([model_single.fastlogp({"m": val}) for val in vals]),
+            decimal=4,
+        )
+
+        assert_almost_equal(
+            np.asarray([dirichlet_multinomial_logpmf(v, n, a) for v in vals]),
+            model_many.free_RVs[0].logp_elemwise({"m": vals}).squeeze(),
+            decimal=4,
+        )
+
+        assert_almost_equal(
+            sum([model_single.fastlogp({"m": val}) for val in vals]),
+            model_many.fastlogp({"m": vals}),
+            decimal=4,
+        )
+
+    def test_dirichlet_multinomial_vec_1d_n(self):
+        vals = np.array([[2, 4, 4], [4, 3, 4]])
+        a = np.array([0.2, 0.3, 0.5])
+        ns = np.array([10, 11])
+
+        with Model() as model:
+            DirichletMultinomial("m", n=ns, a=a, shape=vals.shape)
+
+        assert_almost_equal(
+            sum([dirichlet_multinomial_logpmf(val, n, a) for val, n in zip(vals, ns)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
+
+    def test_dirichlet_multinomial_vec_1d_n_2d_a(self):
+        vals = np.array([[2, 4, 4], [4, 3, 4]])
+        as_ = np.array([[0.2, 0.3, 0.5], [0.9, 0.09, 0.01]])
+        ns = np.array([10, 11])
+
+        with Model() as model:
+            DirichletMultinomial("m", n=ns, a=as_, shape=vals.shape)
+
+        assert_almost_equal(
+            sum([dirichlet_multinomial_logpmf(val, n, a) for val, n, a in zip(vals, ns, as_)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
+
+    def test_dirichlet_multinomial_vec_2d_a(self):
+        vals = np.array([[2, 4, 4], [3, 3, 4]])
+        as_ = np.array([[0.2, 0.3, 0.5], [0.3, 0.3, 0.4]])
+        n = 10
+
+        with Model() as model:
+            DirichletMultinomial("m", n=n, a=as_, shape=vals.shape)
+
+        assert_almost_equal(
+            sum([dirichlet_multinomial_logpmf(val, n, a) for val, a in zip(vals, as_)]),
+            model.fastlogp({"m": vals}),
+            decimal=4,
+        )
+
+    def test_batch_dirichlet_multinomial(self):
+        # Test that DM can handle a 3d array for `a`
+        n = 10
+        # Create an almost deterministic DM by setting a to 0.001, everywehere
+        # except for one category / dimensions which is given the value fo 100
+        vals = np.zeros((4, 5, 3), dtype="int32")
+        a = np.zeros_like(vals, dtype=theano.config.floatX) + 0.001
+        inds = np.random.randint(vals.shape[-1], size=vals.shape[:-1])[..., None]
+        np.put_along_axis(vals, inds, n, axis=-1)
+        np.put_along_axis(a, inds, 100, axis=-1)
+
+        dist = DirichletMultinomial.dist(n=n, a=a, shape=vals.shape)
+
+        # TODO: Test logp is as expected (not as simple as the Multinomial case)
+        # value = tt.tensor3(dtype="int32")
+        # value.tag.test_value = np.zeros_like(vals, dtype="int32")
+        # logp = tt.exp(dist.logp(value))
+        # f = theano.function(inputs=[value], outputs=logp)
+        # assert_almost_equal(
+        #     f(vals),
+        #     np.ones(vals.shape[:-1] + (1,)),
+        #     decimal=select_by_precision(float64=6, float32=3),
+        # )
+
+        # Samples should be equal given the almost deterministic DM
+        sample = dist.random(size=2)
+        assert_allclose(sample, np.stack([vals, vals], axis=0))
+
     def test_categorical_bounds(self):
         with Model():
             x = Categorical("x", p=np.array([0.2, 0.3, 0.5]))

diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -989,6 +989,25 @@ def ref_rand(size, a):
                 ref_rand=ref_rand,
             )
 
+    def test_dirichlet_multinomial(self):
+        def ref_rand(size, a, n):
+            k = a.shape[-1]
+            out = np.empty((size, k), dtype=int)
+            for i in range(size):
+                p = nr.dirichlet(a)
+                x = nr.multinomial(n=n, pvals=p)
+                out[i, :] = x
+            return out
+
+        for n in [2, 3]:
+            pymc3_random_discrete(
+                pm.DirichletMultinomial,
+                {"a": Vector(Rplus, n), "n": Nat},
+                valuedomain=Vector(Nat, n),
+                size=1000,
+                ref_rand=ref_rand,
+            )
+
     def test_multinomial(self):
         def ref_rand(size, p, n):
             return nr.multinomial(pvals=p, n=n, size=size)