pymc-devs · Harivallabha · Sep 17, 2020 · Sep 17, 2020 · Sep 17, 2020 · Sep 17, 2020
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -11,6 +11,7 @@
 
 ### New features
 - `sample_posterior_predictive_w` can now feed on `xarray.Dataset` - e.g. from `InferenceData.posterior`. (see [#4042](https://github.com/pymc-devs/pymc3/pull/4042))
+- Support HyperGeometric Distribution through `pymc3.distributions.discrete.HyperGeometric`. (see [#4108](https://github.com/pymc-devs/pymc3/pull/4108))
 
 
 ## PyMC3 3.9.3 (11 August 2020)

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
@@ -63,6 +63,7 @@
 from .discrete import ZeroInflatedBinomial
 from .discrete import DiscreteUniform
 from .discrete import Geometric
+from .discrete import HyperGeometric
 from .discrete import Categorical
 from .discrete import OrderedLogistic
 
@@ -136,6 +137,7 @@
            'ZeroInflatedBinomial',
            'DiscreteUniform',
            'Geometric',
+           'HyperGeometric',
            'Categorical',
            'OrderedLogistic',
            'DensityDist',

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -28,7 +28,8 @@
 __all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'DiscreteWeibull',
            'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
            'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial',
-           'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic']
+           'DiscreteUniform', 'Geometric', 'HyperGeometric', 'Categorical',
+           'OrderedLogistic']
 
 
 class Binomial(Discrete):
@@ -819,6 +820,110 @@ def _repr_latex_(self, name=None, dist=None):
                                                 get_variable_name(p))
 
 
+class HyperGeometric(Discrete):
+    R"""
+    Hypergeometric log-likelihood.
-    Hypergeometric log-likelihood.
+    Discrete hypergeometric distribution.
-    Hypergeometric log-likelihood.
+    Discrete hypergeometric distribution.
+
+    The probability of x successes in a sequence of n Bernoulli
+    trials (That is, sample size = n) - where the population
+    size is N, containing a total of k successful individuals.
+    The process is carried out without replacement.
-    The probability of x successes in a sequence of n Bernoulli
-    trials (That is, sample size = n) - where the population
-    size is N, containing a total of k successful individuals.
-    The process is carried out without replacement.
+    The probability of :math:`x` successes in a sequence of :math:`n` bernoulli
+    trials taken without replacement from a population of :math:`N` objects,
+    containing :math:`k` good (or successful or Type I) objects.
-    The probability of x successes in a sequence of n Bernoulli
-    trials (That is, sample size = n) - where the population
-    size is N, containing a total of k successful individuals.
-    The process is carried out without replacement.
+    The probability of :math:`x` successes in a sequence of :math:`n` bernoulli
+    trials taken without replacement from a population of :math:`N` objects,
+    containing :math:`k` good (or successful or Type I) objects.
+
+    The pmf of this distribution is
+    .. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}
+    .. plot::
-    The pmf of this distribution is
-    .. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}
-    .. plot::
+    The pmf of this distribution is
+
+    .. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}
+
+    .. plot::
-    The pmf of this distribution is
-    .. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}
-    .. plot::
+    The pmf of this distribution is
+
+    .. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}}
+
+    .. plot::
+        import matplotlib.pyplot as plt
+        import numpy as np
+        import scipy.stats as st
+        plt.style.use('seaborn-darkgrid')
+        x = np.arange(1, 15)
+        N = 50
+        k = 10
+        for n in [20, 25]:
+            pmf = st.hypergeom.pmf(x, N, k, n)
+            plt.plot(x, pmf, '-o', label='n = {}'.format(n))
+        plt.plot(x, pmf, '-o', label='N = {}'.format(N))
+        plt.plot(x, pmf, '-o', label='k = {}'.format(k))
+        plt.xlabel('x', fontsize=12)
+        plt.ylabel('f(x)', fontsize=12)
+        plt.legend(loc=1)
+        plt.show()
+
+    ========  =============================
+    Support   :math:`x \in \mathbb{N}_{>0}`
+    Mean      :math:`\dfrac{n.k}{N}`
+    Variance  :math:`\dfrac{(N-n).n.k.(N-k)}{(N-1).N^2}`
-    Mean      :math:`\dfrac{n.k}{N}`
-    Variance  :math:`\dfrac{(N-n).n.k.(N-k)}{(N-1).N^2}`
+    Mean      :math:`\dfrac{nk}{N}`
+    Variance  :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}`
-    Mean      :math:`\dfrac{n.k}{N}`
-    Variance  :math:`\dfrac{(N-n).n.k.(N-k)}{(N-1).N^2}`
+    Mean      :math:`\dfrac{nk}{N}`
+    Variance  :math:`\dfrac{(N-n)nk(N-k)}{(N-1)N^2}`
+    ========  =============================
+
+    Parameters
+    ----------
+    N : integer
+        Total size of the population
+    n : integer
+        Number of samples drawn from the population
+    k : integer
+        Number of successful individuals in the population
+    """
+
+    def __init__(self, N,  k, n, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.N = N = tt.as_tensor_variable(intX(N))
+        self.k = k = tt.as_tensor_variable(intX(k))
+        self.n = n = tt.as_tensor_variable(intX(n))
-        self.N = N = tt.as_tensor_variable(intX(N))
-        self.k = k = tt.as_tensor_variable(intX(k))
-        self.n = n = tt.as_tensor_variable(intX(n))
+        self.N = intX(N)
+        self.k = intX(k)
+        self.n = intX(n)
-        self.N = N = tt.as_tensor_variable(intX(N))
-        self.k = k = tt.as_tensor_variable(intX(k))
-        self.n = n = tt.as_tensor_variable(intX(n))
+        self.N = intX(N)
+        self.k = intX(k)
+        self.n = intX(n)
+        self.mode = intX(tt.floor((n + 1)*(k + 1)/(N + 2)))
+
+    def random(self, point=None, size=None):
+        r"""
+        Draw random values from HyperGeometric distribution.
+        Parameters
+        ----------
-        Draw random values from HyperGeometric distribution.
-        Parameters
-        ----------
+        Draw random values from HyperGeometric distribution.
+
+        Parameters
+        ----------
-        Draw random values from HyperGeometric distribution.
-        Parameters
-        ----------
+        Draw random values from HyperGeometric distribution.
+
+        Parameters
+        ----------
+        point : dict, optional
+            Dict of variable values on which random values are to be
+            conditioned (uses default point if not specified).
+        size : int, optional
+            Desired size of random sample (returns one sample if not
+            specified).
+        Returns
+        -------
+        array
-            specified).
-        Returns
-        -------
-        array
+            specified).
+
+        Returns
+        -------
+        array
-            specified).
-        Returns
-        -------
-        array
+            specified).
+
+        Returns
+        -------
+        array
+        """
+        N, n, k = draw_values([self.N, self.n, self.k], point=point, size=size)
+        return generate_samples(np.random.hypergeometric, N, n, k,
+                                dist_shape=self.shape,
+                                size=size)
+
+    def logp(self, value):
+        r"""
+        Calculate log-probability of HyperGeometric distribution at specified value.
+        Parameters
+        ----------
-        Calculate log-probability of HyperGeometric distribution at specified value.
-        Parameters
-        ----------
+        Calculate log-probability of HyperGeometric distribution at specified value.
+
+        Parameters
+        ----------
-        Calculate log-probability of HyperGeometric distribution at specified value.
-        Parameters
-        ----------
+        Calculate log-probability of HyperGeometric distribution at specified value.
+
+        Parameters
+        ----------
+        value : numeric
+            Value(s) for which log-probability is calculated. If the log probabilities for multiple
+            values are desired the values must be provided in a numpy array or theano tensor
+        Returns
+        -------
-            values are desired the values must be provided in a numpy array or theano tensor
-        Returns
-        -------
+            values are desired the values must be provided in a numpy array or theano tensor
+
+        Returns
+        -------
-            values are desired the values must be provided in a numpy array or theano tensor
-        Returns
-        -------
+            values are desired the values must be provided in a numpy array or theano tensor
+
+        Returns
+        -------
+        TensorVariable
+        """
+        N = self.N
+        k = self.k
+        n = self.n
+        return bound(binomln(k, value) + binomln(N - k, n - value) - binomln(N, n),
+                     0 <= k, k <= N, 0 <= n, 0 <= N, n - N + k <= value, 0 <= value,
+                     value <= k, value <= n)
+
+    def _repr_latex_(self, name=None, dist=None):
+        if dist is None:
+            dist = self
+        N = dist.N
+        n = dist.n
+        k = dist.k
+        name = r'\text{%s}' % name
+        return r'${} \sim \text{{HyperGeometric}}(\mathit{{N}}={},~\mathit{{n}}={},~\mathit{{k}}={})$'.format(name,
+                                                  get_variable_name(N),
+                                                  get_variable_name(n),
+                                                  get_variable_name(k))
+
+
 class DiscreteUniform(Discrete):
     R"""
     Discrete uniform distribution.

diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
@@ -75,6 +75,7 @@
     Rice,
     Kumaraswamy,
     Moyal,
+    HyperGeometric
 )
 
 from ..distributions import continuous
@@ -817,6 +818,10 @@ def test_geometric(self):
             Geometric, Nat, {"p": Unit}, lambda value, p: np.log(sp.geom.pmf(value, p))
         )
 
+    def test_hypergeometric(self):
+        self.pymc3_matches_scipy(HyperGeometric, Nat, {'N': NatSmall, 'n': NatSmall, 'k': NatSmall},
+                                 lambda value, N, n, k: sp.hypergeom.logpmf(value, N, k, n))
+
     def test_negative_binomial(self):
         def test_fun(value, mu, alpha):
             return sp.nbinom.logpmf(value, alpha, 1 - mu / (mu + alpha))

diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
@@ -467,6 +467,11 @@ class TestGeometric(BaseTestCases.BaseTestCase):
     distribution = pm.Geometric
     params = {'p': 0.5}
 
+
+class TestHyperGeometric(BaseTestCases.BaseTestCase):
+    distribution = pm.HyperGeometric
+    params = {'N': 50, 'n' : 25, 'k' :10}
+
 
 class TestMoyal(BaseTestCases.BaseTestCase):
     distribution = pm.Moyal
@@ -657,6 +662,10 @@ def ref_rand(size, alpha, mu):
     def test_geometric(self):
         pymc3_random_discrete(pm.Geometric, {'p': Unit}, size=500, fails=50, ref_rand=nr.geometric)
 
+    def test_hypergeometric(self):
+        pymc3_random_discrete(pm.HyperGeometric, {'N': Nat, 'n': Nat, 'k': Nat}, size=500, fails=50,
+                                                   ref_rand=nr.hypergeometric)
+
     def test_discrete_uniform(self):
         def ref_rand(size, lower, upper):
             return st.randint.rvs(lower, upper + 1, size=size)