Skip to content

Commit

Permalink
Merge pull request #2251 from pymc-devs/zero_inflated_binomial
Browse files Browse the repository at this point in the history
Zero inflated binomial
  • Loading branch information
fonnesbeck authored Jun 2, 2017
2 parents fc20e31 + 5184768 commit 0974ecc
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 18 deletions.
2 changes: 1 addition & 1 deletion pymc3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from .external import *
from .glm import *
from . import gp
from .math import logsumexp, logit, invlogit, expand_packed_triangular, probit, invprobit
from .math import logaddexp, logsumexp, logit, invlogit, expand_packed_triangular, probit, invprobit
from .model import *
from .stats import *
from .sampling import *
Expand Down
2 changes: 2 additions & 0 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from .discrete import Constant
from .discrete import ZeroInflatedPoisson
from .discrete import ZeroInflatedNegativeBinomial
from .discrete import ZeroInflatedBinomial
from .discrete import DiscreteUniform
from .discrete import Geometric
from .discrete import Categorical
Expand Down Expand Up @@ -106,6 +107,7 @@
'Constant',
'ZeroInflatedPoisson',
'ZeroInflatedNegativeBinomial',
'ZeroInflatedBinomial',
'DiscreteUniform',
'Geometric',
'Categorical',
Expand Down
120 changes: 104 additions & 16 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from .dist_math import bound, factln, binomln, betaln, logpow
from .distribution import Discrete, draw_values, generate_samples, reshape_sampled
from pymc3.math import tround
from ..math import logaddexp

__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull',
'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant',
'ZeroInflatedPoisson', 'ZeroInflatedNegativeBinomial',
'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial',
'DiscreteUniform', 'Geometric', 'Categorical']


Expand Down Expand Up @@ -593,7 +594,7 @@ class ZeroInflatedPoisson(Discrete):
.. math::
f(x \mid \theta, \psi) = \left\{ \begin{array}{l}
f(x \mid \psi, \theta) = \left\{ \begin{array}{l}
(1-\psi) + \psi e^{-\theta}, \text{if } x = 0 \\
\psi \frac{e^{-\theta}\theta^x}{x!}, \text{if } x=1,2,3,\ldots
\end{array} \right.
Expand All @@ -606,15 +607,14 @@ class ZeroInflatedPoisson(Discrete):
Parameters
----------
psi : float
Expected proportion of Poisson variates (0 < psi < 1)
theta : float
Expected number of occurrences during the given interval
(theta >= 0).
psi : float
Expected proportion of Poisson variates (0 < psi < 1)
"""

def __init__(self, theta, psi, *args, **kwargs):
def __init__(self, psi, theta, *args, **kwargs):
super(ZeroInflatedPoisson, self).__init__(*args, **kwargs)
self.theta = theta = tt.as_tensor_variable(theta)
self.psi = psi = tt.as_tensor_variable(psi)
Expand All @@ -630,9 +630,17 @@ def random(self, point=None, size=None, repeat=None):
return reshape_sampled(sampled, size, self.shape)

def logp(self, value):
return tt.switch(value > 0,
tt.log(self.psi) + self.pois.logp(value),
tt.log((1. - self.psi) + self.psi * tt.exp(-self.theta)))
psi = self.psi
theta = self.theta

logp_val = tt.switch(value > 0,
tt.log(psi) + self.pois.logp(value),
logaddexp(tt.log1p(-psi), tt.log(psi) - theta))

return bound(logp_val,
0 <= value,
0 <= psi, psi <= 1,
0 <= theta)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
Expand All @@ -644,6 +652,76 @@ def _repr_latex_(self, name=None, dist=None):
get_variable_name(psi))


class ZeroInflatedBinomial(Discrete):
R"""
Zero-inflated Binomial log-likelihood.
.. math::
f(x \mid \psi, n, p) = \left\{ \begin{array}{l}
(1-\psi) + \psi (1-p)^{n}, \text{if } x = 0 \\
\psi {n \choose x} p^x (1-p)^{n-x}, \text{if } x=1,2,3,\ldots,n
\end{array} \right.
======== ==========================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`(1 - \psi) n p`
Variance :math:`(1-\psi) n p [1 - p(1 - \psi n)].`
======== ==========================
Parameters
----------
psi : float
Expected proportion of Binomial variates (0 < psi < 1)
n : int
Number of Bernoulli trials (n >= 0).
p : float
Probability of success in each trial (0 < p < 1).
"""

def __init__(self, psi, n, p, *args, **kwargs):
super(ZeroInflatedBinomial, self).__init__(*args, **kwargs)
self.n = n = tt.as_tensor_variable(n)
self.p = p = tt.as_tensor_variable(p)
self.psi = psi = tt.as_tensor_variable(psi)
self.bin = Binomial.dist(n, p)
self.mode = self.bin.mode

def random(self, point=None, size=None, repeat=None):
n, p, psi = draw_values([self.n, self.p, self.psi], point=point)
g = generate_samples(stats.binom.rvs, n, p,
dist_shape=self.shape,
size=size)
sampled = g * (np.random.random(np.squeeze(g.shape)) < psi)
return reshape_sampled(sampled, size, self.shape)

def logp(self, value):
psi = self.psi
p = self.p
n = self.n

logp_val = tt.switch(value > 0,
tt.log(psi) + self.bin.logp(value),
logaddexp(tt.log1p(-psi), tt.log(psi) + n * tt.log1p(-p)))

return bound(logp_val,
0 <= value, value <= n,
0 <= psi, psi <= 1,
0 <= p, p <= 1)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
dist = self
n = dist.n
p = dist.p
psi = dist.psi
return r'${} \sim \text{{ZeroInflatedBinomial}}(\mathit{{n}}={}, \mathit{{p}}={}, \mathit{{psi}}={})$'.format(name,
get_variable_name(n),
get_variable_name(p),
get_variable_name(psi))


class ZeroInflatedNegativeBinomial(Discrete):
R"""
Zero-Inflated Negative binomial log-likelihood.
Expand All @@ -654,7 +732,7 @@ class ZeroInflatedNegativeBinomial(Discrete):
.. math::
f(x \mid \mu, \alpha, \psi) = \left\{ \begin{array}{l}
f(x \mid \psi, \mu, \alpha) = \left\{ \begin{array}{l}
(1-\psi) + \psi \left (\frac{\alpha}{\alpha+\mu} \right) ^\alpha, \text{if } x = 0 \\
\psi \frac{\Gamma(x+\alpha)}{x! \Gamma(\alpha)} \left (\frac{\alpha}{\mu+\alpha} \right)^\alpha \left( \frac{\mu}{\mu+\alpha} \right)^x, \text{if } x=1,2,3,\ldots
\end{array} \right.
Expand All @@ -667,15 +745,16 @@ class ZeroInflatedNegativeBinomial(Discrete):
Parameters
----------
psi : float
Expected proportion of NegativeBinomial variates (0 < psi < 1)
mu : float
Poission distribution parameter (mu > 0).
alpha : float
Gamma distribution parameter (alpha > 0).
psi : float
Expected proportion of NegativeBinomial variates (0 < psi < 1)
"""

def __init__(self, mu, alpha, psi, *args, **kwargs):
def __init__(self, psi, mu, alpha, *args, **kwargs):
super(ZeroInflatedNegativeBinomial, self).__init__(*args, **kwargs)
self.mu = mu = tt.as_tensor_variable(mu)
self.alpha = alpha = tt.as_tensor_variable(alpha)
Expand All @@ -694,9 +773,18 @@ def random(self, point=None, size=None, repeat=None):
return reshape_sampled(sampled, size, self.shape)

def logp(self, value):
return tt.switch(value > 0,
tt.log(self.psi) + self.nb.logp(value),
tt.log((1. - self.psi) + self.psi * (self.alpha / (self.alpha + self.mu))**self.alpha))
alpha = self.alpha
mu = self.mu
psi = self.psi

logp_val = tt.switch(value > 0,
tt.log(psi) + self.nb.logp(value),
logaddexp(tt.log1p(-psi), tt.log(psi) + alpha * (tt.log(alpha) - tt.log(alpha + mu))))

return bound(logp_val,
0 <= value,
0 <= psi, psi <= 1,
mu > 0, alpha > 0)

def _repr_latex_(self, name=None, dist=None):
if dist is None:
Expand Down
5 changes: 5 additions & 0 deletions pymc3/math.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ def logsumexp(x, axis=None):
x_max = tt.max(x, axis=axis, keepdims=True)
return tt.log(tt.sum(tt.exp(x - x_max), axis=axis, keepdims=True)) + x_max

def logaddexp(a, b):
diff = b - a
return tt.switch(diff > 0,
b + tt.log1p(tt.exp(-diff)),
a + tt.log1p(tt.exp(diff)))

def invlogit(x, eps=sys.float_info.epsilon):
return (1. - 2. * eps) / (1. + tt.exp(-x)) + eps
Expand Down
6 changes: 5 additions & 1 deletion pymc3/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
NegativeBinomial, Geometric, Exponential, ExGaussian, Normal,
Flat, LKJCorr, Wald, ChiSquared, HalfNormal, DiscreteUniform,
Bound, Uniform, Triangular, Binomial, SkewNormal, DiscreteWeibull, Gumbel,
Interpolated)
Interpolated, ZeroInflatedBinomial)
from ..distributions import continuous
from pymc3.theanof import floatX
from numpy import array, inf, log, exp
Expand Down Expand Up @@ -591,6 +591,10 @@ def test_zeroinflatednegativebinomial(self):
self.checkd(ZeroInflatedNegativeBinomial, Nat,
{'mu': Rplusbig, 'alpha': Rplusbig, 'psi': Unit})

def test_zeroinflatedbinomial(self):
self.checkd(ZeroInflatedBinomial, Nat,
{'n': NatSmall, 'p': Unit, 'psi': Unit})

@pytest.mark.parametrize('n', [1, 2, 3])
def test_mvnormal(self, n):
self.pymc3_matches_scipy(MvNormal, RealMatrix(5, n),
Expand Down
3 changes: 3 additions & 0 deletions pymc3/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase):
distribution = pm.ZeroInflatedNegativeBinomial
params = {'mu': 1., 'alpha': 1., 'psi': 0.3}

class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase):
distribution = pm.ZeroInflatedBinomial
params = {'n': 10, 'p': 0.6, 'psi': 0.3}

class TestDiscreteUniform(BaseTestCases.BaseTestCase):
distribution = pm.DiscreteUniform
Expand Down

0 comments on commit 0974ecc

Please sign in to comment.