From fa0e9309a603631f0aa9482bbe136f75b08adbd6 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 13 Apr 2021 16:55:22 +0200 Subject: [PATCH 01/12] Fix Uniform logp regression from #4541 --- pymc3/distributions/continuous.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 9e03eb6d61..a2b9bd4f47 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -253,8 +253,6 @@ class Uniform(BoundedContinuous): def dist(cls, lower=0, upper=1, **kwargs): lower = at.as_tensor_variable(floatX(lower)) upper = at.as_tensor_variable(floatX(upper)) - # mean = (upper + lower) / 2.0 - # median = self.mean return super().dist([lower, upper], **kwargs) def logp(value, lower, upper): @@ -270,7 +268,11 @@ def logp(value, lower, upper): ------- TensorVariable """ - return bound(-at.log(upper - lower), value >= lower, value <= upper) + return bound( + at.fill(value, -at.log(upper - lower)), + value >= lower, + value <= upper, + ) def logcdf(value, lower, upper): """ From 03e1df5d88a1f6b24bf3721e5f751fc88164d1d2 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 3 May 2021 16:37:50 +0200 Subject: [PATCH 02/12] Refactor DiscreteUniform --- pymc3/distributions/discrete.py | 59 +++++++++--------------- pymc3/tests/test_distributions.py | 8 ++-- pymc3/tests/test_distributions_random.py | 34 +++++++------- 3 files changed, 44 insertions(+), 57 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 03672ac41e..385efa3c8a 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -970,6 +970,21 @@ def logcdf(value, good, bad, n): ) +class DiscreteUniformRV(RandomVariable): + name = "discrete_uniform" + ndim_supp = 0 + ndims_params = [0, 0] + dtype = "int64" + _print_name = ("DiscreteUniform", "\\operatorname{DiscreteUniform}") + + @classmethod + def rng_fn(cls, rng, lower, upper, size=None): + return stats.randint.rvs(lower, upper + 1, size=size, random_state=rng) + + +discrete_uniform = DiscreteUniformRV() + + class DiscreteUniform(Discrete): R""" Discrete uniform distribution. @@ -1010,39 +1025,15 @@ class DiscreteUniform(Discrete): Upper limit (upper > lower). """ - def __init__(self, lower, upper, *args, **kwargs): - super().__init__(*args, **kwargs) - self.lower = intX(at.floor(lower)) - self.upper = intX(at.floor(upper)) - self.mode = at.maximum(intX(at.floor((upper + lower) / 2.0)), self.lower) - - def _random(self, lower, upper, size=None): - # This way seems to be the only to deal with lower and upper - # as array-like. - samples = stats.randint.rvs(lower, upper + 1, size=size) - return samples - - def random(self, point=None, size=None): - r""" - Draw random values from DiscreteUniform distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). + rv_op = discrete_uniform - Returns - ------- - array - """ - # lower, upper = draw_values([self.lower, self.upper], point=point, size=size) - # return generate_samples(self._random, lower, upper, dist_shape=self.shape, size=size) + @classmethod + def dist(cls, lower, upper, *args, **kwargs): + lower = intX(at.floor(lower)) + upper = intX(at.floor(upper)) + return super().dist([lower, upper], **kwargs) - def logp(self, value): + def logp(value, lower, upper): r""" Calculate log-probability of DiscreteUniform distribution at specified value. @@ -1056,15 +1047,13 @@ def logp(self, value): ------- TensorVariable """ - upper = self.upper - lower = self.lower return bound( at.fill(value, -at.log(upper - lower + 1)), lower <= value, value <= upper, ) - def logcdf(self, value): + def logcdf(value, lower, upper): """ Compute the log of the cumulative distribution function for Discrete uniform distribution at the specified value. @@ -1079,8 +1068,6 @@ def logcdf(self, value): ------- TensorVariable """ - upper = self.upper - lower = self.lower return bound( at.switch( diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 397d2ba6af..7e0ae4753a 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -929,7 +929,6 @@ def test_bound_normal(self): x = PositiveNormal("x", mu=0, sigma=1, transform=None) assert np.isinf(logpt(x, -1).eval()) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_discrete_unif(self): self.check_logp( DiscreteUniform, @@ -2817,17 +2816,16 @@ def test_issue_3051(self, dims, dist_cls, kwargs): assert isinstance(actual_a, np.ndarray) assert actual_a.shape == (X.shape[0],) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_issue_4499(self): # Test for bug in Uniform and DiscreteUniform logp when setting check_bounds = False # https://github.com/pymc-devs/pymc3/issues/4499 with pm.Model(check_bounds=False) as m: x = pm.Uniform("x", 0, 2, shape=10, transform=None) - assert_almost_equal(m.logp_array(np.ones(10)), -np.log(2) * 10) + assert_almost_equal(m.logp({"x": np.ones(10)}), -np.log(2) * 10) with pm.Model(check_bounds=False) as m: - x = pm.DiscreteUniform("x", 0, 1, shape=10) - assert_almost_equal(m.logp_array(np.ones(10)), -np.log(2) * 10) + x = pm.DiscreteUniform("x", 0, 1, size=10) + assert_almost_equal(m.logp({"x": np.ones(10)}), -np.log(2) * 10) @pytest.mark.xfail(reason="DensityDist no longer supported") diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 1a7aa185a4..4ce2902fca 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -42,7 +42,6 @@ Domain, I, Nat, - NatSmall, PdMatrix, PdMatrixChol, R, @@ -339,12 +338,6 @@ class TestZeroInflatedBinomial(BaseTestCases.BaseTestCase): params = {"n": 10, "p": 0.6, "psi": 0.3} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestDiscreteUniform(BaseTestCases.BaseTestCase): - distribution = pm.DiscreteUniform - params = {"lower": 0.0, "upper": 10.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestMoyal(BaseTestCases.BaseTestCase): distribution = pm.Moyal @@ -907,6 +900,24 @@ class TestBetaBinomial(BaseTestDistribution): ] +class TestDiscreteUniform(BaseTestDistribution): + def discrete_uniform_rng_fn(self, size, lower, upper, rng): + return st.randint.rvs(lower, upper + 1, size=size, random_state=rng) + + pymc_dist = pm.DiscreteUniform + pymc_dist_params = {"lower": -1, "upper": 9} + expected_rv_op_params = {"lower": -1, "upper": 9} + reference_dist_params = {"lower": -1, "upper": 9} + reference_dist = lambda self: functools.partial( + self.discrete_uniform_rng_fn, rng=self.get_random_state() + ) + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestScalarParameterSamples(SeededTest): @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_bounded(self): @@ -1016,15 +1027,6 @@ def test_half_flat(self): with pytest.raises(ValueError): f.random(1) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") - def test_discrete_uniform(self): - def ref_rand(size, lower, upper): - return st.randint.rvs(lower, upper + 1, size=size) - - pymc3_random_discrete( - pm.DiscreteUniform, {"lower": -NatSmall, "upper": NatSmall}, ref_rand=ref_rand - ) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_constant_dist(self): def ref_rand(size, c): From ea7afbab83c62e8a742fe089d8fb3a2e33205d43 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 3 May 2021 17:11:40 +0200 Subject: [PATCH 03/12] Refactor Constant --- pymc3/distributions/__init__.py | 2 - pymc3/distributions/discrete.py | 67 ++++++++++-------------- pymc3/tests/test_distributions.py | 7 ++- pymc3/tests/test_distributions_random.py | 32 ++++++----- 4 files changed, 50 insertions(+), 58 deletions(-) diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py index d92dad0cfe..807b483712 100644 --- a/pymc3/distributions/__init__.py +++ b/pymc3/distributions/__init__.py @@ -63,7 +63,6 @@ Binomial, Categorical, Constant, - ConstantDist, DiscreteUniform, DiscreteWeibull, Geometric, @@ -138,7 +137,6 @@ "Bernoulli", "Poisson", "NegativeBinomial", - "ConstantDist", "Constant", "ZeroInflatedPoisson", "ZeroInflatedNegativeBinomial", diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 385efa3c8a..54d561ddce 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -11,8 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import warnings - import aesara.tensor as at import numpy as np @@ -51,7 +49,6 @@ "DiscreteWeibull", "Poisson", "NegativeBinomial", - "ConstantDist", "Constant", "ZeroInflatedPoisson", "ZeroInflatedBinomial", @@ -1164,6 +1161,23 @@ def logp(value, p): ) +class ConstantRV(RandomVariable): + name = "constant" + ndim_supp = 0 + ndims_params = [0] + dtype = "floatX" # Should be treated as a discrete variable! + _print_name = ("Constant", "\\operatorname{Constant}") + + @classmethod + def rng_fn(cls, rng, c, size=None): + if size is None: + return c.copy() + return np.full(size, c) + + +constant = ConstantRV() + + class Constant(Discrete): r""" Constant log-likelihood. @@ -1174,40 +1188,14 @@ class Constant(Discrete): Constant parameter. """ - def __init__(self, c, *args, **kwargs): - warnings.warn( - "Constant has been deprecated. We recommend using a Deterministic object instead.", - DeprecationWarning, - ) - super().__init__(*args, **kwargs) - self.mean = self.median = self.mode = self.c = c = at.as_tensor_variable(c) - - def random(self, point=None, size=None): - r""" - Draw random values from Constant distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). + rv_op = constant - Returns - ------- - array - """ - # c = draw_values([self.c], point=point, size=size)[0] - # dtype = np.array(c).dtype - # - # def _random(c, dtype=dtype, size=None): - # return np.full(size, fill_value=c, dtype=dtype) - # - # return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(dtype) + @classmethod + def dist(cls, c, *args, **kwargs): + c = at.as_tensor_variable(floatX(c)) + return super().dist([c], **kwargs) - def logp(self, value): + def logp(value, c): r""" Calculate log-probability of Constant distribution at specified value. @@ -1221,11 +1209,10 @@ def logp(self, value): ------- TensorVariable """ - c = self.c - return bound(0, at.eq(value, c)) - - -ConstantDist = Constant + return bound( + at.zeros_like(value), + at.eq(value, c), + ) class ZeroInflatedPoisson(Discrete): diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 7e0ae4753a..2c384cba55 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1614,7 +1614,6 @@ def test_bound_poisson(self): x = NonZeroPoisson("x", mu=4) assert np.isinf(logpt(x, 0).eval()) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_constantdist(self): self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value)) @@ -2820,13 +2819,17 @@ def test_issue_4499(self): # Test for bug in Uniform and DiscreteUniform logp when setting check_bounds = False # https://github.com/pymc-devs/pymc3/issues/4499 with pm.Model(check_bounds=False) as m: - x = pm.Uniform("x", 0, 2, shape=10, transform=None) + x = pm.Uniform("x", 0, 2, size=10, transform=None) assert_almost_equal(m.logp({"x": np.ones(10)}), -np.log(2) * 10) with pm.Model(check_bounds=False) as m: x = pm.DiscreteUniform("x", 0, 1, size=10) assert_almost_equal(m.logp({"x": np.ones(10)}), -np.log(2) * 10) + with pm.Model(check_bounds=False) as m: + x = pm.Constant("x", 1, size=10) + assert_almost_equal(m.logp({"x": np.ones(10)}), 0 * 10) + @pytest.mark.xfail(reason="DensityDist no longer supported") def test_serialize_density_dist(): diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 4ce2902fca..16b895cf97 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -40,7 +40,6 @@ from pymc3.tests.helpers import SeededTest, select_by_precision from pymc3.tests.test_distributions import ( Domain, - I, Nat, PdMatrix, PdMatrixChol, @@ -314,12 +313,6 @@ class TestLogitNormal(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestConstant(BaseTestCases.BaseTestCase): - distribution = pm.Constant - params = {"c": 3} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestZeroInflatedPoisson(BaseTestCases.BaseTestCase): distribution = pm.ZeroInflatedPoisson @@ -918,6 +911,24 @@ def discrete_uniform_rng_fn(self, size, lower, upper, rng): ] +class TestConstant(BaseTestDistribution): + def constant_rng_fn(self, size, c): + if size is None: + return c + return np.full(size, c) + + pymc_dist = pm.Constant + pymc_dist_params = {"c": 3} + expected_rv_op_params = {"c": 3} + reference_dist_params = {"c": 3} + reference_dist = lambda self: self.constant_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestScalarParameterSamples(SeededTest): @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_bounded(self): @@ -1027,13 +1038,6 @@ def test_half_flat(self): with pytest.raises(ValueError): f.random(1) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") - def test_constant_dist(self): - def ref_rand(size, c): - return c * np.ones(size, dtype=int) - - pymc3_random_discrete(pm.Constant, {"c": I}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_matrix_normal(self): def ref_rand(size, mu, rowcov, colcov): From 2a1071db91b0b3eea92cd91ae4ff36e4d10f23a6 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 3 May 2021 17:39:46 +0200 Subject: [PATCH 04/12] Refactor OrderedLogistic --- pymc3/distributions/discrete.py | 13 +++++++----- pymc3/tests/test_distributions.py | 27 +++++++++++++++--------- pymc3/tests/test_distributions_random.py | 10 +++++++++ 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 54d561ddce..8a315ac581 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1717,11 +1717,14 @@ class OrderedLogistic(Categorical): """ - def __init__(self, eta, cutpoints, *args, **kwargs): - self.eta = at.as_tensor_variable(floatX(eta)) - self.cutpoints = at.as_tensor_variable(cutpoints) + rv_op = categorical - pa = sigmoid(self.cutpoints - at.shape_padright(self.eta)) + @classmethod + def dist(cls, eta, cutpoints, *args, **kwargs): + eta = at.as_tensor_variable(floatX(eta)) + cutpoints = at.as_tensor_variable(cutpoints) + + pa = sigmoid(cutpoints - at.shape_padright(eta)) p_cum = at.concatenate( [ at.zeros_like(at.shape_padright(pa[..., 0])), @@ -1732,7 +1735,7 @@ def __init__(self, eta, cutpoints, *args, **kwargs): ) p = p_cum[..., 1:] - p_cum[..., :-1] - super().__init__(p=p, *args, **kwargs) + return super().dist(p, **kwargs) class OrderedProbit(Categorical): diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 2c384cba55..6b1e8b17d4 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -101,6 +101,7 @@ continuous, logcdf, logpt, + logpt_sum, ) from pymc3.math import kronecker, logsumexp from pymc3.model import Deterministic, Model, Point @@ -2297,7 +2298,6 @@ def test_categorical(self, n): ) @pytest.mark.parametrize("n", [2, 3, 4]) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_orderedlogistic(self, n): self.check_logp( OrderedLogistic, @@ -2738,27 +2738,34 @@ def test_discrete_trafo(): err.match("Transformations for discrete distributions") +# TODO: Is this test working as expected / still relevant? @pytest.mark.parametrize("shape", [tuple(), (1,), (3, 1), (3, 2)], ids=str) -@pytest.mark.xfail(reason="Distribution not refactored yet") def test_orderedlogistic_dimensions(shape): # Test for issue #3535 loge = np.log10(np.exp(1)) size = 7 p = np.ones(shape + (10,)) / 10 cutpoints = np.tile(logit(np.linspace(0, 1, 11)[1:-1]), shape + (1,)) - obs = np.random.randint(0, 1, size=(size,) + shape) + obs = np.random.randint(0, 2, size=(size,) + shape) with Model(): ol = OrderedLogistic( - "ol", eta=np.zeros(shape), cutpoints=cutpoints, size=shape, observed=obs - ) - c = Categorical("c", p=p, size=shape, observed=obs) - ologp = logpt(ol, 1).eval() * loge - clogp = logpt(c, 1) * loge + "ol", + eta=np.zeros(shape), + cutpoints=cutpoints, + observed=obs, + ) + c = Categorical( + "c", + p=p, + observed=obs, + ) + ologp = logpt_sum(ol, np.ones_like(obs)).eval() * loge + clogp = logpt_sum(c, np.ones_like(obs)).eval() * loge expected = -np.prod((size,) + shape) - assert c.distribution.p.ndim == (len(shape) + 1) + assert c.owner.inputs[3].ndim == (len(shape) + 1) assert np.allclose(clogp, expected) - assert ol.distribution.p.ndim == (len(shape) + 1) + assert ol.owner.inputs[3].ndim == (len(shape) + 1) assert np.allclose(ologp, expected) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 16b895cf97..849fc6be47 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -929,6 +929,16 @@ def constant_rng_fn(self, size, c): ] +class TestOrderedLogistic(BaseTestDistribution): + pymc_dist = pm.OrderedLogistic + pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} + expected_rv_op_params = {"p": np.array([0.11920292, 0.38079708, 0.38079708, 0.11920292])} + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + class TestScalarParameterSamples(SeededTest): @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_bounded(self): From 9c40f8fec3c93c3f4a0c3423061f4308ed8a3b4e Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 6 May 2021 16:52:19 +0200 Subject: [PATCH 05/12] Refactor OrderedProbit --- pymc3/distributions/discrete.py | 50 ++++-------------------- pymc3/tests/test_distributions.py | 1 - pymc3/tests/test_distributions_random.py | 10 +++++ 3 files changed, 17 insertions(+), 44 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 8a315ac581..c905359b2f 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1808,12 +1808,14 @@ class OrderedProbit(Categorical): """ - def __init__(self, eta, cutpoints, *args, **kwargs): + rv_op = categorical - self.eta = at.as_tensor_variable(floatX(eta)) - self.cutpoints = at.as_tensor_variable(cutpoints) + @classmethod + def dist(cls, eta, cutpoints, *args, **kwargs): + eta = at.as_tensor_variable(floatX(eta)) + cutpoints = at.as_tensor_variable(cutpoints) - probits = at.shape_padright(self.eta) - self.cutpoints + probits = at.shape_padright(eta) - cutpoints _log_p = at.concatenate( [ at.shape_padright(normal_lccdf(0, 1, probits[..., 0])), @@ -1823,44 +1825,6 @@ def __init__(self, eta, cutpoints, *args, **kwargs): axis=-1, ) _log_p = at.as_tensor_variable(floatX(_log_p)) - - self._log_p = _log_p - self.mode = at.argmax(_log_p, axis=-1) p = at.exp(_log_p) - super().__init__(p=p, *args, **kwargs) - - def logp(self, value): - r""" - Calculate log-probability of Ordered Probit distribution at specified value. - - Parameters - ---------- - value: numeric - Value(s) for which log-probability is calculated. If the log probabilities for multiple - values are desired the values must be provided in a numpy array or Aesara tensor - - Returns - ------- - TensorVariable - """ - logp = self._log_p - k = self.k - - # Clip values before using them for indexing - value_clip = at.clip(value, 0, k - 1) - - if logp.ndim > 1: - if logp.ndim > value_clip.ndim: - value_clip = at.shape_padleft(value_clip, logp.ndim - value_clip.ndim) - elif logp.ndim < value_clip.ndim: - logp = at.shape_padleft(logp, value_clip.ndim - logp.ndim) - pattern = (logp.ndim - 1,) + tuple(range(logp.ndim - 1)) - a = take_along_axis( - logp.dimshuffle(pattern), - value_clip, - ) - else: - a = logp[value_clip] - - return bound(a, value >= 0, value <= (k - 1)) + return super().dist(p, **kwargs) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 6b1e8b17d4..86ca9cee5e 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -2307,7 +2307,6 @@ def test_orderedlogistic(self, n): ) @pytest.mark.parametrize("n", [2, 3, 4]) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_orderedprobit(self, n): self.check_logp( OrderedProbit, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 849fc6be47..4e6ccdd956 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -939,6 +939,16 @@ class TestOrderedLogistic(BaseTestDistribution): ] +class TestOrderedProbit(BaseTestDistribution): + pymc_dist = pm.OrderedProbit + pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} + expected_rv_op_params = {"p": np.array([0.02275013, 0.47724987, 0.47724987, 0.02275013])} + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + class TestScalarParameterSamples(SeededTest): @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_bounded(self): From 5c723b3e3e96ca2fdedc3811b453655cf21ce085 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 6 May 2021 16:52:53 +0200 Subject: [PATCH 06/12] Add missing discrete distributions to API rst --- docs/source/api/distributions/discrete.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/api/distributions/discrete.rst b/docs/source/api/distributions/discrete.rst index ee20b28abc..88d81682db 100644 --- a/docs/source/api/distributions/discrete.rst +++ b/docs/source/api/distributions/discrete.rst @@ -15,10 +15,12 @@ Discrete ZeroInflatedNegativeBinomial DiscreteUniform Geometric + HyperGeometric Categorical DiscreteWeibull Constant OrderedLogistic + OrderedProbit .. automodule:: pymc3.distributions.discrete :members: From ce252fac76b61a70c19144d0293f42bd6a38dc19 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 6 May 2021 18:05:14 +0200 Subject: [PATCH 07/12] Refactor ZeroInflatedPoisson --- pymc3/distributions/discrete.py | 68 ++++++++++++------------ pymc3/tests/test_distributions.py | 29 ++++++++-- pymc3/tests/test_distributions_random.py | 37 ++++++++++--- 3 files changed, 89 insertions(+), 45 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index c905359b2f..a82e374664 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1215,6 +1215,21 @@ def logp(value, c): ) +class ZeroInflatedPoissonRV(RandomVariable): + name = "zero_inflated_poisson" + ndim_supp = 0 + ndims_params = [0, 0] + dtype = "int64" + _print_name = ("ZeroInflatedPois", "\\operatorname{ZeroInflatedPois}") + + @classmethod + def rng_fn(cls, rng, psi, lam, size): + return rng.poisson(lam, size=size) * (rng.random(size=size) < psi) + + +zero_inflated_poisson = ZeroInflatedPoissonRV() + + class ZeroInflatedPoisson(Discrete): R""" Zero-inflated Poisson log-likelihood. @@ -1266,36 +1281,15 @@ class ZeroInflatedPoisson(Discrete): (theta >= 0). """ - def __init__(self, psi, theta, *args, **kwargs): - super().__init__(*args, **kwargs) - self.theta = theta = at.as_tensor_variable(floatX(theta)) - self.psi = at.as_tensor_variable(floatX(psi)) - self.pois = Poisson.dist(theta) - self.mode = self.pois.mode - - def random(self, point=None, size=None): - r""" - Draw random values from ZeroInflatedPoisson distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). + rv_op = zero_inflated_poisson - Returns - ------- - array - """ - # theta, psi = draw_values([self.theta, self.psi], point=point, size=size) - # g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size) - # g, psi = broadcast_distribution_samples([g, psi], size=size) - # return g * (np.random.random(g.shape) < psi) + @classmethod + def dist(cls, psi, theta, *args, **kwargs): + psi = at.as_tensor_variable(floatX(psi)) + theta = at.as_tensor_variable(floatX(theta)) + return super().dist([psi, theta], *args, **kwargs) - def logp(self, value): + def logp(value, psi, theta): r""" Calculate log-probability of ZeroInflatedPoisson distribution at specified value. @@ -1309,18 +1303,22 @@ def logp(self, value): ------- TensorVariable """ - psi = self.psi - theta = self.theta logp_val = at.switch( at.gt(value, 0), - at.log(psi) + self.pois.logp(value), + at.log(psi) + Poisson.logp(value, theta), logaddexp(at.log1p(-psi), at.log(psi) - theta), ) - return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, 0 <= theta) + return bound( + logp_val, + 0 <= value, + 0 <= psi, + psi <= 1, + 0 <= theta, + ) - def logcdf(self, value): + def logcdf(value, psi, theta): """ Compute the log of the cumulative distribution function for ZeroInflatedPoisson distribution at the specified value. @@ -1335,13 +1333,13 @@ def logcdf(self, value): ------- TensorVariable """ - psi = self.psi return bound( - logaddexp(at.log1p(-psi), at.log(psi) + self.pois.logcdf(value)), + logaddexp(at.log1p(-psi), at.log(psi) + Poisson.logcdf(value, theta)), 0 <= value, 0 <= psi, psi <= 1, + 0 <= theta, ) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 86ca9cee5e..8d4fa36dc2 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1618,8 +1618,7 @@ def test_bound_poisson(self): def test_constantdist(self): self.check_logp(Constant, I, {"c": I}, lambda value, c: np.log(c == value)) - # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail(reason="Test has not been refactored") @pytest.mark.xfail( condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to inf issues", @@ -1631,8 +1630,30 @@ def test_zeroinflatedpoisson_distribution(self): {"theta": Rplus, "psi": Unit}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_zeroinflatedpoisson_logcdf(self): + def test_zeroinflatedpoisson(self): + def logp_fn(value, psi, theta): + if value == 0: + return np.log((1 - psi) * sp.poisson.pmf(0, theta)) + else: + return np.log(psi * sp.poisson.pmf(value, theta)) + + def logcdf_fn(value, psi, theta): + return np.log((1 - psi) + psi * sp.poisson.cdf(value, theta)) + + self.check_logp( + ZeroInflatedPoisson, + Nat, + {"psi": Unit, "theta": Rplus}, + logp_fn, + ) + + self.check_logcdf( + ZeroInflatedPoisson, + Nat, + {"psi": Unit, "theta": Rplus}, + logcdf_fn, + ) + self.check_selfconsistency_discrete_logcdf( ZeroInflatedPoisson, Nat, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 4e6ccdd956..6bba6d06c8 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -313,12 +313,6 @@ class TestLogitNormal(BaseTestCases.BaseTestCase): params = {"mu": 0.0, "sigma": 1.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestZeroInflatedPoisson(BaseTestCases.BaseTestCase): - distribution = pm.ZeroInflatedPoisson - params = {"theta": 1.0, "psi": 0.3} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestZeroInflatedNegativeBinomial(BaseTestCases.BaseTestCase): distribution = pm.ZeroInflatedNegativeBinomial @@ -929,6 +923,37 @@ def constant_rng_fn(self, size, c): ] +class TestZeroInflatedPoisson(BaseTestDistribution): + def zero_inflated_poisson_rng_fn(self, size, psi, theta, poisson_rng_fct, random_rng_fct): + return poisson_rng_fct(theta, size=size) * (random_rng_fct(size=size) < psi) + + def seeded_zero_inflated_poisson_rng_fn(self): + poisson_rng_fct = functools.partial( + getattr(np.random.RandomState, "poisson"), self.get_random_state() + ) + + random_rng_fct = functools.partial( + getattr(np.random.RandomState, "random"), self.get_random_state() + ) + + return functools.partial( + self.zero_inflated_poisson_rng_fn, + poisson_rng_fct=poisson_rng_fct, + random_rng_fct=random_rng_fct, + ) + + pymc_dist = pm.ZeroInflatedPoisson + pymc_dist_params = {"psi": 0.9, "theta": 4.0} + expected_rv_op_params = {"psi": 0.9, "theta": 4.0} + reference_dist_params = {"psi": 0.9, "theta": 4.0} + reference_dist = seeded_zero_inflated_poisson_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestOrderedLogistic(BaseTestDistribution): pymc_dist = pm.OrderedLogistic pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} From 7a7405f5a623e4bcfd1191bf96359b79491f9cf8 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 6 May 2021 18:28:02 +0200 Subject: [PATCH 08/12] Refactor ZeroInflatedBinomial --- pymc3/distributions/discrete.py | 77 ++++++++++++------------ pymc3/tests/test_distributions.py | 30 +++++++-- pymc3/tests/test_distributions_random.py | 31 ++++++++++ 3 files changed, 96 insertions(+), 42 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index a82e374664..25b961b2ed 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1343,6 +1343,21 @@ def logcdf(value, psi, theta): ) +class ZeroInflatedBinomialRV(RandomVariable): + name = "zero_inflated_binomial" + ndim_supp = 0 + ndims_params = [0, 0, 0] + dtype = "int64" + _print_name = ("ZeroInflatedBinom", "\\operatorname{ZeroInflatedBinom}") + + @classmethod + def rng_fn(cls, rng, psi, n, p, size): + return rng.binomial(n=n, p=p, size=size) * (rng.random(size=size) < psi) + + +zero_inflated_binomial = ZeroInflatedBinomialRV() + + class ZeroInflatedBinomial(Discrete): R""" Zero-inflated Binomial log-likelihood. @@ -1395,37 +1410,16 @@ class ZeroInflatedBinomial(Discrete): """ - def __init__(self, psi, n, p, *args, **kwargs): - super().__init__(*args, **kwargs) - self.n = n = at.as_tensor_variable(intX(n)) - self.p = p = at.as_tensor_variable(floatX(p)) - self.psi = psi = at.as_tensor_variable(floatX(psi)) - self.bin = Binomial.dist(n, p) - self.mode = self.bin.mode - - def random(self, point=None, size=None): - r""" - Draw random values from ZeroInflatedBinomial distribution. + rv_op = zero_inflated_binomial - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size) - # g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size) - # g, psi = broadcast_distribution_samples([g, psi], size=size) - # return g * (np.random.random(g.shape) < psi) + @classmethod + def dist(cls, psi, n, p, *args, **kwargs): + psi = at.as_tensor_variable(floatX(psi)) + n = at.as_tensor_variable(intX(n)) + p = at.as_tensor_variable(floatX(p)) + return super().dist([psi, n, p], *args, **kwargs) - def logp(self, value): + def logp(value, psi, n, p): r""" Calculate log-probability of ZeroInflatedBinomial distribution at specified value. @@ -1439,19 +1433,24 @@ def logp(self, value): ------- TensorVariable """ - psi = self.psi - p = self.p - n = self.n logp_val = at.switch( at.gt(value, 0), - at.log(psi) + self.bin.logp(value), + at.log(psi) + Binomial.logp(value, n, p), logaddexp(at.log1p(-psi), at.log(psi) + n * at.log1p(-p)), ) - return bound(logp_val, 0 <= value, value <= n, 0 <= psi, psi <= 1, 0 <= p, p <= 1) + return bound( + logp_val, + 0 <= value, + value <= n, + 0 <= psi, + psi <= 1, + 0 <= p, + p <= 1, + ) - def logcdf(self, value): + def logcdf(value, psi, n, p): """ Compute the log of the cumulative distribution function for ZeroInflatedBinomial distribution at the specified value. @@ -1465,19 +1464,21 @@ def logcdf(self, value): ------- TensorVariable """ + # logcdf can only handle scalar values due to limitation in Binomial.logcdf if np.ndim(value): raise TypeError( f"ZeroInflatedBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - psi = self.psi - return bound( - logaddexp(at.log1p(-psi), at.log(psi) + self.bin.logcdf(value)), + logaddexp(at.log1p(-psi), at.log(psi) + Binomial.logcdf(value, n, p)), 0 <= value, + value <= n, 0 <= psi, psi <= 1, + 0 <= p, + p <= 1, ) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 8d4fa36dc2..a05c71f570 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1682,8 +1682,7 @@ def test_zeroinflatednegativebinomial_logcdf(self): n_samples=10, ) - # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail(reason="Test not refactored yet") def test_zeroinflatedbinomial_distribution(self): self.checkd( ZeroInflatedBinomial, @@ -1691,8 +1690,31 @@ def test_zeroinflatedbinomial_distribution(self): {"n": NatSmall, "p": Unit, "psi": Unit}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_zeroinflatedbinomial_logcdf(self): + def test_zeroinflatedbinomial(self): + def logp_fn(value, psi, n, p): + if value == 0: + return np.log((1 - psi) * sp.binom.pmf(0, n, p)) + else: + return np.log(psi * sp.binom.pmf(value, n, p)) + + def logcdf_fn(value, psi, n, p): + return np.log((1 - psi) + psi * sp.binom.cdf(value, n, p)) + + self.check_logp( + ZeroInflatedBinomial, + Nat, + {"psi": Unit, "n": NatSmall, "p": Unit}, + logp_fn, + ) + + self.check_logcdf( + ZeroInflatedBinomial, + Nat, + {"psi": Unit, "n": NatSmall, "p": Unit}, + logcdf_fn, + n_samples=10, + ) + self.check_selfconsistency_discrete_logcdf( ZeroInflatedBinomial, Nat, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 6bba6d06c8..f6ef60eca7 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -954,6 +954,37 @@ def seeded_zero_inflated_poisson_rng_fn(self): ] +class TestZeroInflatedBinomial(BaseTestDistribution): + def zero_inflated_poisson_rng_fn(self, size, psi, n, p, binomial_rng_fct, random_rng_fct): + return binomial_rng_fct(n, p, size=size) * (random_rng_fct(size=size) < psi) + + def seeded_zero_inflated_binomial_rng_fn(self): + binomial_rng_fct = functools.partial( + getattr(np.random.RandomState, "binomial"), self.get_random_state() + ) + + random_rng_fct = functools.partial( + getattr(np.random.RandomState, "random"), self.get_random_state() + ) + + return functools.partial( + self.zero_inflated_poisson_rng_fn, + binomial_rng_fct=binomial_rng_fct, + random_rng_fct=random_rng_fct, + ) + + pymc_dist = pm.ZeroInflatedBinomial + pymc_dist_params = {"psi": 0.9, "n": 12, "p": 0.7} + expected_rv_op_params = {"psi": 0.9, "n": 12, "p": 0.7} + reference_dist_params = {"psi": 0.9, "n": 12, "p": 0.7} + reference_dist = seeded_zero_inflated_binomial_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestOrderedLogistic(BaseTestDistribution): pymc_dist = pm.OrderedLogistic pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} From 43f75ddcafb6fb538ae1ed217a1ee92a52f70ab4 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 6 May 2021 19:01:58 +0200 Subject: [PATCH 09/12] Refactor ZeroInflatedNegativeBinomial --- pymc3/distributions/discrete.py | 97 ++++++++++-------------- pymc3/tests/test_distributions.py | 34 +++++++-- pymc3/tests/test_distributions_random.py | 39 +++++++++- 3 files changed, 107 insertions(+), 63 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 25b961b2ed..0ac1c254c5 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -668,7 +668,7 @@ def NegBinom(a, m, x): @classmethod def dist(cls, mu=None, alpha=None, p=None, n=None, *args, **kwargs): - n, p = cls.get_n_p(mu, alpha, p, n) + n, p = cls.get_n_p(mu=mu, alpha=alpha, p=p, n=n) n = at.as_tensor_variable(floatX(n)) p = at.as_tensor_variable(floatX(p)) return super().dist([n, p], *args, **kwargs) @@ -1482,6 +1482,21 @@ def logcdf(value, psi, n, p): ) +class ZeroInflatedNegBinomialRV(RandomVariable): + name = "zero_inflated_neg_binomial" + ndim_supp = 0 + ndims_params = [0, 0, 0] + dtype = "int64" + _print_name = ("ZeroInflatedNegBinom", "\\operatorname{ZeroInflatedNegBinom}") + + @classmethod + def rng_fn(cls, rng, psi, n, p, size): + return rng.negative_binomial(n=n, p=p, size=size) * (rng.random(size=size) < psi) + + +zero_inflated_neg_binomial = ZeroInflatedNegBinomialRV() + + class ZeroInflatedNegativeBinomial(Discrete): R""" Zero-Inflated Negative binomial log-likelihood. @@ -1551,50 +1566,17 @@ def ZeroInfNegBinom(a, m, psi, x): """ - def __init__(self, psi, mu, alpha, *args, **kwargs): - super().__init__(*args, **kwargs) - self.mu = mu = at.as_tensor_variable(floatX(mu)) - self.alpha = alpha = at.as_tensor_variable(floatX(alpha)) - self.psi = psi = at.as_tensor_variable(floatX(psi)) - self.nb = NegativeBinomial.dist(mu, alpha) - self.mode = self.nb.mode + rv_op = zero_inflated_neg_binomial - def random(self, point=None, size=None): - r""" - Draw random values from ZeroInflatedNegativeBinomial distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # mu, alpha, psi = draw_values([self.mu, self.alpha, self.psi], point=point, size=size) - # g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size) - # g[g == 0] = np.finfo(float).eps # Just in case - # g, psi = broadcast_distribution_samples([g, psi], size=size) - # return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi) - - def _random(self, mu, alpha, size): - r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's - parametrization to scipy.gamma. All parameter arrays should have - been broadcasted properly by generate_samples at this point and size is - the scipy.rvs representation. - """ - return stats.gamma.rvs( - a=alpha, - scale=mu / alpha, - size=size, - ) + @classmethod + def dist(cls, psi, mu, alpha, *args, **kwargs): + psi = at.as_tensor_variable(floatX(psi)) + n, p = NegativeBinomial.get_n_p(mu=mu, alpha=alpha) + n = at.as_tensor_variable(floatX(n)) + p = at.as_tensor_variable(floatX(p)) + return super().dist([psi, n, p], *args, **kwargs) - def logp(self, value): + def logp(value, psi, n, p): r""" Calculate log-probability of ZeroInflatedNegativeBinomial distribution at specified value. @@ -1608,20 +1590,22 @@ def logp(self, value): ------- TensorVariable """ - alpha = self.alpha - mu = self.mu - psi = self.psi - logp_other = at.log(psi) + self.nb.logp(value) - logp_0 = logaddexp( - at.log1p(-psi), at.log(psi) + alpha * (at.log(alpha) - at.log(alpha + mu)) + return bound( + at.switch( + at.gt(value, 0), + at.log(psi) + NegativeBinomial.logp(value, n, p), + logaddexp(at.log1p(-psi), at.log(psi) + n * at.log(p)), + ), + 0 <= value, + 0 <= psi, + psi <= 1, + 0 < n, + 0 <= p, + p <= 1, ) - logp_val = at.switch(at.gt(value, 0), logp_other, logp_0) - - return bound(logp_val, 0 <= value, 0 <= psi, psi <= 1, mu > 0, alpha > 0) - - def logcdf(self, value): + def logcdf(value, psi, n, p): """ Compute the log of the cumulative distribution function for ZeroInflatedNegativeBinomial distribution at the specified value. @@ -1640,13 +1624,14 @@ def logcdf(self, value): raise TypeError( f"ZeroInflatedNegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." ) - psi = self.psi return bound( - logaddexp(at.log1p(-psi), at.log(psi) + self.nb.logcdf(value)), + logaddexp(at.log1p(-psi), at.log(psi) + NegativeBinomial.logcdf(value, n, p)), 0 <= value, 0 <= psi, psi <= 1, + 0 < p, + p <= 1, ) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index a05c71f570..92780173d6 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1660,8 +1660,7 @@ def logcdf_fn(value, psi, theta): {"theta": Rplus, "psi": Unit}, ) - # Too lazy to propagate decimal parameter through the whole chain of deps - @pytest.mark.xfail(reason="Distribution not refactored yet") + @pytest.mark.xfail(reason="Test not refactored yet") @pytest.mark.xfail( condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to inf issues", @@ -1673,12 +1672,37 @@ def test_zeroinflatednegativebinomial_distribution(self): {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit}, ) - @pytest.mark.xfail(reason="Distribution not refactored yet") - def test_zeroinflatednegativebinomial_logcdf(self): + def test_zeroinflatednegativebinomial(self): + def logp_fn(value, psi, mu, alpha): + n, p = NegativeBinomial.get_n_p(mu=mu, alpha=alpha) + if value == 0: + return np.log((1 - psi) * sp.nbinom.pmf(0, n, p)) + else: + return np.log(psi * sp.nbinom.pmf(value, n, p)) + + def logcdf_fn(value, psi, mu, alpha): + n, p = NegativeBinomial.get_n_p(mu=mu, alpha=alpha) + return np.log((1 - psi) + psi * sp.nbinom.cdf(value, n, p)) + + self.check_logp( + ZeroInflatedNegativeBinomial, + Nat, + {"psi": Unit, "mu": Rplusbig, "alpha": Rplusbig}, + logp_fn, + ) + + self.check_logcdf( + ZeroInflatedNegativeBinomial, + Nat, + {"psi": Unit, "mu": Rplusbig, "alpha": Rplusbig}, + logcdf_fn, + n_samples=10, + ) + self.check_selfconsistency_discrete_logcdf( ZeroInflatedNegativeBinomial, Nat, - {"mu": Rplusbig, "alpha": Rplusbig, "psi": Unit}, + {"psi": Unit, "mu": Rplusbig, "alpha": Rplusbig}, n_samples=10, ) diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index f6ef60eca7..82d4f4e091 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -955,7 +955,7 @@ def seeded_zero_inflated_poisson_rng_fn(self): class TestZeroInflatedBinomial(BaseTestDistribution): - def zero_inflated_poisson_rng_fn(self, size, psi, n, p, binomial_rng_fct, random_rng_fct): + def zero_inflated_binomial_rng_fn(self, size, psi, n, p, binomial_rng_fct, random_rng_fct): return binomial_rng_fct(n, p, size=size) * (random_rng_fct(size=size) < psi) def seeded_zero_inflated_binomial_rng_fn(self): @@ -968,7 +968,7 @@ def seeded_zero_inflated_binomial_rng_fn(self): ) return functools.partial( - self.zero_inflated_poisson_rng_fn, + self.zero_inflated_binomial_rng_fn, binomial_rng_fct=binomial_rng_fct, random_rng_fct=random_rng_fct, ) @@ -985,6 +985,41 @@ def seeded_zero_inflated_binomial_rng_fn(self): ] +class TestZeroInflatedNegativeBinomial(BaseTestDistribution): + def zero_inflated_negbinomial_rng_fn( + self, size, psi, n, p, negbinomial_rng_fct, random_rng_fct + ): + return negbinomial_rng_fct(n, p, size=size) * (random_rng_fct(size=size) < psi) + + def seeded_zero_inflated_negbinomial_rng_fn(self): + negbinomial_rng_fct = functools.partial( + getattr(np.random.RandomState, "negative_binomial"), self.get_random_state() + ) + + random_rng_fct = functools.partial( + getattr(np.random.RandomState, "random"), self.get_random_state() + ) + + return functools.partial( + self.zero_inflated_negbinomial_rng_fn, + negbinomial_rng_fct=negbinomial_rng_fct, + random_rng_fct=random_rng_fct, + ) + + n, p = pm.NegativeBinomial.get_n_p(mu=3, alpha=5) + + pymc_dist = pm.ZeroInflatedNegativeBinomial + pymc_dist_params = {"psi": 0.9, "mu": 3, "alpha": 5} + expected_rv_op_params = {"psi": 0.9, "n": n, "p": p} + reference_dist_params = {"psi": 0.9, "n": n, "p": p} + reference_dist = seeded_zero_inflated_negbinomial_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestOrderedLogistic(BaseTestDistribution): pymc_dist = pm.OrderedLogistic pymc_dist_params = {"eta": 0, "cutpoints": np.array([-2, 0, 2])} From d13e8714e0b6754f4cd30df26a8610ccefc2c1ba Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 6 May 2021 20:08:03 +0200 Subject: [PATCH 10/12] Update several test xfails --- pymc3/tests/test_examples.py | 20 ++++++++++---------- pymc3/tests/test_model.py | 2 +- pymc3/tests/test_sampling.py | 1 - 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pymc3/tests/test_examples.py b/pymc3/tests/test_examples.py index fa2e74f6f1..ad54236543 100644 --- a/pymc3/tests/test_examples.py +++ b/pymc3/tests/test_examples.py @@ -51,7 +51,7 @@ def get_city_data(): return data.merge(unique, "inner", on="fips") -@pytest.mark.xfail(reason="Bernoulli distribution not refactored") +@pytest.mark.xfail(reason="Bernoulli logitp distribution not refactored") class TestARM5_4(SeededTest): def build_model(self): data = pd.read_csv( @@ -194,7 +194,7 @@ def build_disaster_model(masked=False): @pytest.mark.xfail( - reason="DiscreteUniform hasn't been refactored" + reason="_check_start_shape fails with start dictionary" # condition=(aesara.config.floatX == "float32"), reason="Fails on float32" ) class TestDisasterModel(SeededTest): @@ -204,9 +204,9 @@ def test_disaster_model(self): model = build_disaster_model(masked=False) with model: # Initial values for stochastic nodes - start = {"early_mean": 2.0, "late_mean": 3.0} + start = {"early_mean": 2, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) - step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) + step = pm.Slice([model["early_mean_log__"], model["late_mean_log__"]]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) az.summary(tr) @@ -217,12 +217,12 @@ def test_disaster_model_missing(self): # Initial values for stochastic nodes start = {"early_mean": 2.0, "late_mean": 3.0} # Use slice sampler for means (other variables auto-selected) - step = pm.Slice([model.early_mean_log__, model.late_mean_log__]) + step = pm.Slice([model["early_mean_log__"], model["late_mean_log__"]]) tr = pm.sample(500, tune=50, start=start, step=step, chains=2) az.summary(tr) -@pytest.mark.xfail(reason="ZeroInflatedPoisson hasn't been refactored for v4") +@pytest.mark.xfail(reason="_check_start_shape fails with start dictionary") class TestLatentOccupancy(SeededTest): """ From the PyMC example list @@ -277,14 +277,14 @@ def test_run(self): "z": (self.y > 0).astype("int16"), "theta": np.array(5, dtype="f"), } - step_one = pm.Metropolis([model.theta_interval__, model.psi_logodds__]) + step_one = pm.Metropolis([model["theta_interval__"], model["psi_logodds__"]]) step_two = pm.BinaryMetropolis([model.z]) pm.sample(50, step=[step_one, step_two], start=start, chains=1) @pytest.mark.xfail( - # condition=(aesara.config.floatX == "float32"), - # reason="Fails on float32 due to starting inf at starting logP", + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to starting inf at starting logP", ) class TestRSV(SeededTest): """ @@ -314,7 +314,7 @@ def build_model(self): # Prior probability prev_rsv = pm.Beta("prev_rsv", 1, 5, shape=3) # RSV in Amman - y_amman = pm.Binomial("y_amman", n_amman, prev_rsv, shape=3, testval=100) + y_amman = pm.Binomial("y_amman", n_amman, prev_rsv, shape=3) # Likelihood for number with RSV in hospital (assumes Pr(hosp | RSV) = 1) pm.Binomial("y_hosp", y_amman, market_share, observed=rsv_cases) return model diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py index a155702ba1..f307ad93c5 100644 --- a/pymc3/tests/test_model.py +++ b/pymc3/tests/test_model.py @@ -274,7 +274,7 @@ def test_grad(self): assert val == 21 npt.assert_allclose(grad, [5, 5, 5, 1, 1, 1, 1, 1, 1]) - @pytest.mark.xfail(reason="Lognormal not refactored for v4") + @pytest.mark.xfail(reason="Test not refactored for v4") def test_edge_case(self): # Edge case discovered in #2948 ndim = 3 diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py index 217cf96010..2581c0ae47 100644 --- a/pymc3/tests/test_sampling.py +++ b/pymc3/tests/test_sampling.py @@ -1040,7 +1040,6 @@ def test_shape_edgecase(self): prior = pm.sample_prior_predictive(10) assert prior["mu"].shape == (10, 5) - @pytest.mark.xfail(reason="ZeroInflatedPoisson not refactored for v4") def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta("theta", alpha=1, beta=1) From a616e0a61fc285846bda8ee13e49c533cfb8241c Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 13 May 2021 12:26:32 +0200 Subject: [PATCH 11/12] Use _logp and _logcdf dispatcher in ZeroInflated* methods --- pymc3/distributions/discrete.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 0ac1c254c5..ae3fba4639 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -40,6 +40,7 @@ normal_lcdf, ) from pymc3.distributions.distribution import Discrete +from pymc3.distributions.logp import _logcdf, _logp from pymc3.math import log1mexp, logaddexp, logsumexp, sigmoid __all__ = [ @@ -1306,7 +1307,7 @@ def logp(value, psi, theta): logp_val = at.switch( at.gt(value, 0), - at.log(psi) + Poisson.logp(value, theta), + at.log(psi) + _logp(poisson, value, {}, theta), logaddexp(at.log1p(-psi), at.log(psi) - theta), ) @@ -1335,7 +1336,7 @@ def logcdf(value, psi, theta): """ return bound( - logaddexp(at.log1p(-psi), at.log(psi) + Poisson.logcdf(value, theta)), + logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(poisson, value, {}, theta)), 0 <= value, 0 <= psi, psi <= 1, @@ -1436,7 +1437,7 @@ def logp(value, psi, n, p): logp_val = at.switch( at.gt(value, 0), - at.log(psi) + Binomial.logp(value, n, p), + at.log(psi) + _logp(binomial, value, {}, n, p), logaddexp(at.log1p(-psi), at.log(psi) + n * at.log1p(-p)), ) @@ -1472,7 +1473,7 @@ def logcdf(value, psi, n, p): ) return bound( - logaddexp(at.log1p(-psi), at.log(psi) + Binomial.logcdf(value, n, p)), + logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(binomial, value, {}, n, p)), 0 <= value, value <= n, 0 <= psi, @@ -1594,7 +1595,7 @@ def logp(value, psi, n, p): return bound( at.switch( at.gt(value, 0), - at.log(psi) + NegativeBinomial.logp(value, n, p), + at.log(psi) + _logp(nbinom, value, {}, n, p), logaddexp(at.log1p(-psi), at.log(psi) + n * at.log(p)), ), 0 <= value, @@ -1626,7 +1627,7 @@ def logcdf(value, psi, n, p): ) return bound( - logaddexp(at.log1p(-psi), at.log(psi) + NegativeBinomial.logcdf(value, n, p)), + logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(nbinom, value, {}, n, p)), 0 <= value, 0 <= psi, psi <= 1, From bc89287fa257bacf970d2d7bf0ec0f5d8cd37f8a Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 13 May 2021 12:26:49 +0200 Subject: [PATCH 12/12] Fix `check_logcdf` test regression --- pymc3/tests/test_distributions.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 92780173d6..63466b7902 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -815,8 +815,14 @@ def check_logcdf( ) # Test that method works with multiple values or raises informative TypeError - with pytest.raises(TypeError), aesara.config.change_flags(mode=Mode("py")): - logcdf(valid_dist, np.array([valid_value, valid_value])).eval() + valid_dist = change_rv_size(valid_dist, 2) + with aesara.config.change_flags(mode=Mode("py")): + try: + logcdf(valid_dist, np.array([valid_value, valid_value])).eval() + except TypeError as err: + assert str(err).endswith( + "logcdf expects a scalar value but received a 1-dimensional object." + ) def check_selfconsistency_discrete_logcdf( self, distribution, domain, paramdomains, decimal=None, n_samples=100