From 52682eb96cc7443c62cbded02a9297c452ae23f6 Mon Sep 17 00:00:00 2001 From: Purna Chandra Mansingh <42216008+purna135@users.noreply.github.com> Date: Tue, 22 Mar 2022 02:10:32 +0530 Subject: [PATCH] Parametrize Binomial and Categorical distributions via logit_p (#5637) --- RELEASE-NOTES.md | 1 + pymc/distributions/discrete.py | 30 ++++++++++++++--- pymc/tests/test_distributions_random.py | 45 ++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 6 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index e3e9f8e6fce..468d68bbc76 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -133,6 +133,7 @@ This includes API changes we did not warn about since at least `3.11.0` (2021-01 - Adding support for blackjax's NUTS sampler `pymc.sampling_jax` (see [#5477](ihttps://github.com/pymc-devs/pymc/pull/5477)) - `pymc.sampling_jax` samplers support `log_likelihood`, `observed_data`, and `sample_stats` in returned InferenceData object (see [#5189](https://github.com/pymc-devs/pymc/pull/5189)) - Adding support for `pm.Deterministic` in `pymc.sampling_jax` (see [#5182](https://github.com/pymc-devs/pymc/pull/5182)) +- Added an alternative parametrization, `logit_p` to `pm.Binomial` and `pm.Categorical` distributions (see [5637](https://github.com/pymc-devs/pymc/pull/5637)). - ... diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py index 31ff0bd698a..be1f26826fd 100644 --- a/pymc/distributions/discrete.py +++ b/pymc/distributions/discrete.py @@ -106,15 +106,25 @@ class Binomial(Discrete): Parameters ---------- - n: int + n : int Number of Bernoulli trials (n >= 0). - p: float + p : float Probability of success in each trial (0 < p < 1). + logit_p : float + Alternative log odds for the probability of success. """ rv_op = binomial @classmethod - def dist(cls, n, p, *args, **kwargs): + def dist(cls, n, p=None, logit_p=None, *args, **kwargs): + if p is not None and logit_p is not None: + raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.") + elif p is None and logit_p is None: + raise ValueError("Incompatible parametrization. Must specify either p or logit_p.") + + if logit_p is not None: + p = at.sigmoid(logit_p) + n = at.as_tensor_variable(intX(n)) p = at.as_tensor_variable(floatX(p)) return super().dist([n, p], **kwargs) @@ -1245,14 +1255,24 @@ class Categorical(Discrete): Parameters ---------- - p: array of floats + p : array of floats p > 0 and the elements of p must sum to 1. They will be automatically rescaled otherwise. + logit_p : float + Alternative log odds for the probability of success. """ rv_op = categorical @classmethod - def dist(cls, p, **kwargs): + def dist(cls, p=None, logit_p=None, **kwargs): + if p is not None and logit_p is not None: + raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.") + elif p is None and logit_p is None: + raise ValueError("Incompatible parametrization. Must specify either p or logit_p.") + + if logit_p is not None: + p = pm.math.softmax(logit_p, axis=-1) + if isinstance(p, np.ndarray) or isinstance(p, list): if (np.asarray(p) < 0).any(): raise ValueError(f"Negative `p` parameters are not valid, got: {p}") diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index 2be81395cd6..78bd3fc2230 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -38,7 +38,7 @@ def random_polyagamma(*args, **kwargs): raise RuntimeError("polyagamma package is not installed!") -from scipy.special import expit +from scipy.special import expit, softmax import pymc as pm @@ -1006,6 +1006,25 @@ class TestBinomial(BaseTestDistributionRandom): checks_to_run = ["check_pymc_params_match_rv_op"] +class TestLogitBinomial(BaseTestDistributionRandom): + pymc_dist = pm.Binomial + pymc_dist_params = {"n": 100, "logit_p": 0.5} + expected_rv_op_params = {"n": 100, "p": expit(0.5)} + tests_to_run = ["check_pymc_params_match_rv_op"] + + @pytest.mark.parametrize( + "n, p, logit_p, expected", + [ + (5, None, None, "Must specify either p or logit_p."), + (5, 0.5, 0.5, "Can't specify both p and logit_p."), + ], + ) + def test_binomial_init_fail(self, n, p, logit_p, expected): + with pm.Model() as model: + with pytest.raises(ValueError, match=f"Incompatible parametrization. {expected}"): + pm.Binomial("x", n=n, p=p, logit_p=logit_p) + + class TestNegativeBinomial(BaseTestDistributionRandom): pymc_dist = pm.NegativeBinomial pymc_dist_params = {"n": 100, "p": 0.33} @@ -1411,6 +1430,30 @@ class TestCategorical(BaseTestDistributionRandom): ] +class TestLogitCategorical(BaseTestDistributionRandom): + pymc_dist = pm.Categorical + pymc_dist_params = {"logit_p": np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]])} + expected_rv_op_params = { + "p": softmax(np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]]), axis=-1) + } + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + @pytest.mark.parametrize( + "p, logit_p, expected", + [ + (None, None, "Must specify either p or logit_p."), + (0.5, 0.5, "Can't specify both p and logit_p."), + ], + ) + def test_categorical_init_fail(self, p, logit_p, expected): + with pm.Model() as model: + with pytest.raises(ValueError, match=f"Incompatible parametrization. {expected}"): + pm.Categorical("x", p=p, logit_p=logit_p) + + class TestGeometric(BaseTestDistributionRandom): pymc_dist = pm.Geometric pymc_dist_params = {"p": 0.9}