Skip to content

Commit

Permalink
Implement generalized poisson distribution
Browse files Browse the repository at this point in the history
Co-authored-by: Ricardo Vieira <28983449+ricardoV94@users.noreply.github.com>
  • Loading branch information
ally-lee and ricardoV94 committed Apr 5, 2022
1 parent 1f9a102 commit c6475e3
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/source/api/distributions/discrete.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Discrete
Bernoulli
DiscreteWeibull
Poisson
GeneralizedPoisson
NegativeBinomial
Constant
ZeroInflatedPoisson
Expand Down
2 changes: 2 additions & 0 deletions pymc/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
Constant,
DiscreteUniform,
DiscreteWeibull,
GeneralizedPoisson,
Geometric,
HyperGeometric,
NegativeBinomial,
Expand Down Expand Up @@ -139,6 +140,7 @@
"BetaBinomial",
"Bernoulli",
"Poisson",
"GeneralizedPoisson",
"NegativeBinomial",
"Constant",
"ZeroInflatedPoisson",
Expand Down
155 changes: 154 additions & 1 deletion pymc/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from pymc.distributions.logprob import logp
from pymc.distributions.mixture import Mixture
from pymc.distributions.shape_utils import rv_size_is_none
from pymc.math import sigmoid
from pymc.math import log1mexp_numpy, log1pexp_numpy, sigmoid
from pymc.vartypes import continuous_types

__all__ = [
Expand All @@ -55,6 +55,7 @@
"Bernoulli",
"DiscreteWeibull",
"Poisson",
"GeneralizedPoisson",
"NegativeBinomial",
"Constant",
"ZeroInflatedPoisson",
Expand Down Expand Up @@ -668,6 +669,158 @@ def logcdf(value, mu):
return check_parameters(res, 0 <= mu, msg="mu >= 0")


class GeneralizedPoissonRV(RandomVariable):
name = "generalized_poisson"
ndim_supp = 0
ndims_params = [0, 0]
dtype = "int64"
_print_name = ("GeneralizedPoisson", "\\operatorname{GeneralizedPoisson}")

@classmethod
def rng_fn(cls, rng, theta, lam, size):

theta = np.asarray(theta)
lam = np.asarray(lam)

if size is not None:
dist_size = size
else:
dist_size = np.broadcast_shapes(theta.shape, lam.shape)

# Inversion algorithm described by Famoye (1997), computed on the log scale for
# numerical stability.
# TODO: Authors recommend mixing this method with the "branching" algorgithm,
# but that one is only valid for positive lambdas. They also mention the normal
# approximation for lambda < 0 and mu >= 10 or 0 < lambda < 0.2 and mu >= 30.
# This could be done by splitting the values depending on lambda and then
# recombining them.

log_u = np.log(rng.uniform(size=dist_size))

pos_lam = lam > 0
with np.errstate(divide="ignore", invalid="ignore"):
mixed_log_lam = np.where(pos_lam, np.log(lam), np.log(-lam))
theta_m_lam = theta - lam

log_s = -theta
log_p = log_s.copy()
x_ = 0
x = np.zeros(dist_size)
below_cutpoint = log_s < log_u
with np.errstate(divide="ignore", invalid="ignore"):
while np.any(below_cutpoint):
x_ += 1
x[below_cutpoint] += 1
log_c = np.log(theta_m_lam + lam * x_)
# Compute log(1 + lam / C)
log1p_lam_m_C = np.where(
pos_lam,
log1pexp_numpy(mixed_log_lam - log_c),
log1mexp_numpy(mixed_log_lam - log_c, negative_input=True),
)
log_p = log_c + log1p_lam_m_C * (x_ - 1) + log_p - np.log(x_) - lam
log_s = np.logaddexp(log_s, log_p)
below_cutpoint = log_s < log_u
return x


generalized_poisson = GeneralizedPoissonRV()


class GeneralizedPoisson(Discrete):
R"""
Generalized Poisson log-likelihood.
Used to model count data that can be either overdispersed or underdispersed.
Offers greater flexibility than the standard Poisson which assumes equidispersion,
where the mean is equal to the variance.
The pmf of this distribution is
.. math:: f(x \mid \mu, \lambda) =
\frac{\mu (\mu + \lambda x)^{x-1} e^{-\mu - \lambda x}}{x!}
======== ======================================
Support :math:`x \in \mathbb{N}_0`
Mean :math:`\frac{\mu}{1 - \lambda}`
Variance :math:`\frac{\mu}{(1 - \lambda)^3}`
======== ======================================
Parameters
----------
mu : tensor_like of float
Mean parameter (mu > 0).
lam : tensor_like of float
Dispersion parameter (max(-1, -mu/4) <= lam <= 1).
Notes
-----
When lam = 0, the Generalized Poisson reduces to the standard Poisson with the same mu.
When lam < 0, the mean is greater than the variance (underdispersion).
When lam > 0, the mean is less than the variance (overdispersion).
References
----------
The PMF is taken from [1] and the random generator function is adapted from [2].
.. [1] Consul, PoC, and Felix Famoye. "Generalized Poisson regression model."
Communications in Statistics-Theory and Methods 21.1 (1992): 89-109.
.. [2] Famoye, Felix. "Generalized Poisson random variate generation." American
Journal of Mathematical and Management Sciences 17.3-4 (1997): 219-237.
"""
rv_op = generalized_poisson

@classmethod
def dist(cls, mu, lam, **kwargs):
mu = at.as_tensor_variable(floatX(mu))
lam = at.as_tensor_variable(floatX(lam))
return super().dist([mu, lam], **kwargs)

def moment(rv, size, mu, lam):
mean = at.floor(mu / (1 - lam))
if not rv_size_is_none(size):
mean = at.full(size, mean)
return mean

def logp(value, mu, lam):
r"""
Calculate log-probability of Generalized Poisson distribution at specified value.
Parameters
----------
value: numeric
Value(s) for which log-probability is calculated. If the log probabilities for multiple
values are desired the values must be provided in a numpy array or Aesara tensor
Returns
-------
TensorVariable
"""
mu_lam_value = mu + lam * value
logprob = np.log(mu) + logpow(mu_lam_value, value - 1) - mu_lam_value - factln(value)

# Probability is 0 when value > m, where m is the largest positive integer for
# which mu + m * lam > 0 (when lam < 0).
logprob = at.switch(
at.or_(
mu_lam_value < 0,
value < 0,
),
-np.inf,
logprob,
)

return check_parameters(
logprob,
0 < mu,
at.abs(lam) <= 1,
(-mu / 4) <= lam,
msg="0 < mu, max(-1, -mu/4)) <= lam <= 1",
)


class NegativeBinomial(Discrete):
R"""
Negative binomial log-likelihood.
Expand Down
39 changes: 39 additions & 0 deletions pymc/tests/test_distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def polyagamma_cdf(*args, **kwargs):
Exponential,
Flat,
Gamma,
GeneralizedPoisson,
Geometric,
Gumbel,
HalfCauchy,
Expand Down Expand Up @@ -1742,6 +1743,44 @@ def test_poisson(self):
{"mu": Rplus},
)

def test_generalized_poisson(self):
# We are only checking this distribution for lambda=0 where it's equivalent to Poisson.
self.check_logp(
GeneralizedPoisson,
Nat,
{"mu": Rplus, "lam": Domain([0], edges=(None, None))},
lambda value, mu, lam: sp.poisson.logpmf(value, mu),
skip_paramdomain_outside_edge_test=True,
)

# Check mu/lam restrictions
with aesara.config.change_flags(mode=Mode("py")):
with pytest.raises(ParameterValueError):
pm.logp(GeneralizedPoisson.dist(mu=1, lam=2), 1).eval()

with pytest.raises(ParameterValueError):
pm.logp(GeneralizedPoisson.dist(mu=0, lam=0), 1).eval()

with pytest.raises(ParameterValueError):
pm.logp(GeneralizedPoisson.dist(mu=1, lam=-1), 1).eval()

def test_generalized_poisson_lam_expected_moments(self):
# TODO: This is a costly test, we should find alternative to test logp
mu = 30
lam = np.array([-0.9, -0.7, -0.2, 0, 0.2, 0.7, 0.9])
with Model():
# We create separate dists, because the Metropolis sampler cannot find
# a good single step size to accomodate all lambda values
dist = [pm.GeneralizedPoisson(f"x_{l}", mu=mu, lam=l) for l in lam]
pm.Deterministic("x", at.stack(dist))
trace = pm.sample(return_inferencedata=False, chains=1, draws=10_000)

expected_mean = mu / (1 - lam)
np.testing.assert_allclose(trace["x"].mean(0), expected_mean, rtol=1e-1)

expected_std = np.sqrt(mu / (1 - lam) ** 3)
np.testing.assert_allclose(trace["x"].std(0), expected_std, rtol=1e-1)

def test_constantdist(self):
self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value))
self.check_logcdf(Constant, I, {"c": I}, lambda value, c: np.log(value >= c))
Expand Down
14 changes: 14 additions & 0 deletions pymc/tests/test_distributions_moments.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
Exponential,
Flat,
Gamma,
GeneralizedPoisson,
Geometric,
Gumbel,
HalfCauchy,
Expand Down Expand Up @@ -592,6 +593,19 @@ def test_poisson_moment(mu, size, expected):
assert_moment_is_expected(model, expected)


@pytest.mark.parametrize(
"mu, lam, size, expected",
[
(50, [-0.6, 0, 0.6], None, np.floor(50 / (1 - np.array([-0.6, 0, 0.6])))),
([5, 50], -0.1, (4, 2), np.full((4, 2), np.floor(np.array([5, 50]) / 1.1))),
],
)
def test_generalized_poisson_moment(mu, lam, size, expected):
with Model() as model:
GeneralizedPoisson("x", mu=mu, lam=lam, size=size)
assert_moment_is_expected(model, expected)


@pytest.mark.parametrize(
"n, p, size, expected",
[
Expand Down
30 changes: 30 additions & 0 deletions pymc/tests/test_distributions_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,36 @@ class TestPoisson(BaseTestDistributionRandom):
checks_to_run = ["check_pymc_params_match_rv_op"]


class TestGeneralizedPoisson(BaseTestDistributionRandom):
pymc_dist = pm.GeneralizedPoisson
pymc_dist_params = {"mu": 4.0, "lam": 1.0}
expected_rv_op_params = {"mu": 4.0, "lam": 1.0}
tests_to_run = [
"check_pymc_params_match_rv_op",
"check_rv_size",
]

def test_random(self):
pymc_random_discrete(
dist=self.pymc_dist,
paramdomains={"mu": Rplus, "lam": Domain([0], edges=(None, None))},
ref_rand=lambda mu, lam, size: st.poisson.rvs(mu, size=size),
)

def test_random_lam_expected_moments(self):
mu = 50
lam = np.array([-0.9, -0.7, -0.2, 0, 0.2, 0.7, 0.9])

dist = self.pymc_dist.dist(mu=mu, lam=lam, size=(10_000, len(lam)))
draws = dist.eval()

expected_mean = mu / (1 - lam)
np.testing.assert_allclose(draws.mean(0), expected_mean, rtol=1e-1)

expected_std = np.sqrt(mu / (1 - lam) ** 3)
np.testing.assert_allclose(draws.std(0), expected_std, rtol=1e-1)


class TestMvNormalCov(BaseTestDistributionRandom):
pymc_dist = pm.MvNormal
pymc_dist_params = {
Expand Down

0 comments on commit c6475e3

Please sign in to comment.