From 34815a871d420fae0a1942eba41c900fd1c3014f Mon Sep 17 00:00:00 2001 From: Purna Chandra Mansingh Date: Mon, 21 Mar 2022 19:17:33 +0530 Subject: [PATCH 1/5] added logit_p to Binomial and Categorical distributions --- pymc/distributions/discrete.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py index 31ff0bd698a..f4ed9dac6c3 100644 --- a/pymc/distributions/discrete.py +++ b/pymc/distributions/discrete.py @@ -114,7 +114,15 @@ class Binomial(Discrete): rv_op = binomial @classmethod - def dist(cls, n, p, *args, **kwargs): + def dist(cls, n, p=None, logit_p=None, *args, **kwargs): + if p is not None and logit_p is not None: + raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.") + elif p is None and logit_p is None: + raise ValueError("Incompatible parametrization. Must specify either p or logit_p.") + + if logit_p is not None: + p = at.sigmoid(logit_p) + n = at.as_tensor_variable(intX(n)) p = at.as_tensor_variable(floatX(p)) return super().dist([n, p], **kwargs) @@ -1252,7 +1260,15 @@ class Categorical(Discrete): rv_op = categorical @classmethod - def dist(cls, p, **kwargs): + def dist(cls, p=None, logit_p=None, **kwargs): + if p is not None and logit_p is not None: + raise ValueError("Incompatible parametrization. Can't specify both p and logit_p.") + elif p is None and logit_p is None: + raise ValueError("Incompatible parametrization. Must specify either p or logit_p.") + + if logit_p is not None: + p = pm.math.softmax(logit_p, axis=-1) + if isinstance(p, np.ndarray) or isinstance(p, list): if (np.asarray(p) < 0).any(): raise ValueError(f"Negative `p` parameters are not valid, got: {p}") From a4994b1d23b5a9516a430e02d8c1a04056b8b3c7 Mon Sep 17 00:00:00 2001 From: Purna Chandra Mansingh Date: Mon, 21 Mar 2022 19:24:58 +0530 Subject: [PATCH 2/5] added tests for logit_p check --- pymc/tests/test_distributions_random.py | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index c0c09d736a8..aab2f288f5c 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -1005,6 +1005,25 @@ class TestBinomial(BaseTestDistributionRandom): checks_to_run = ["check_pymc_params_match_rv_op"] +class TestLogitBinomial(BaseTestDistributionRandom): + pymc_dist = pm.Binomial + pymc_dist_params = {"n": 100, "logit_p": 2.197224577} + expected_rv_op_params = {"n": 100, "p": 0.9} + tests_to_run = ["check_pymc_params_match_rv_op"] + + @pytest.mark.parametrize( + "n, p, logit_p, expected", + [ + (5, None, None, "Must specify either p or logit_p."), + (5, 0.5, 0.5, "Can't specify both p and logit_p."), + ], + ) + def test_binomial_init_fail(self, n, p, logit_p, expected): + with pm.Model() as model: + with pytest.raises(ValueError, match=f"Incompatible parametrization. {expected}"): + pm.Binomial("x", n=n, p=p, logit_p=logit_p) + + class TestNegativeBinomial(BaseTestDistributionRandom): pymc_dist = pm.NegativeBinomial pymc_dist_params = {"n": 100, "p": 0.33} @@ -1410,6 +1429,28 @@ class TestCategorical(BaseTestDistributionRandom): ] +class TestLogitCategorical(BaseTestDistributionRandom): + pymc_dist = pm.Categorical + pymc_dist_params = {"logit_p": np.array([-0.944461608841, 0.489548225319, -2.197224577336])} + expected_rv_op_params = {"p": np.array([0.28, 0.62, 0.10])} + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_rv_size", + ] + + @pytest.mark.parametrize( + "p, logit_p, expected", + [ + (None, None, "Must specify either p or logit_p."), + (0.5, 0.5, "Can't specify both p and logit_p."), + ], + ) + def test_categorical_init_fail(self, p, logit_p, expected): + with pm.Model() as model: + with pytest.raises(ValueError, match=f"Incompatible parametrization. {expected}"): + pm.Categorical("x", p=p, logit_p=logit_p) + + class TestGeometric(BaseTestDistributionRandom): pymc_dist = pm.Geometric pymc_dist_params = {"p": 0.9} From 5b9d55e22fa380424dd245e298803f33413bb77a Mon Sep 17 00:00:00 2001 From: Purna Chandra Mansingh Date: Tue, 22 Mar 2022 00:15:02 +0530 Subject: [PATCH 3/5] updated test with expit and softmax --- pymc/tests/test_distributions_random.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index aab2f288f5c..af30b7fda4f 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -37,7 +37,7 @@ def random_polyagamma(*args, **kwargs): raise RuntimeError("polyagamma package is not installed!") -from scipy.special import expit +from scipy.special import expit, softmax import pymc as pm @@ -1007,8 +1007,8 @@ class TestBinomial(BaseTestDistributionRandom): class TestLogitBinomial(BaseTestDistributionRandom): pymc_dist = pm.Binomial - pymc_dist_params = {"n": 100, "logit_p": 2.197224577} - expected_rv_op_params = {"n": 100, "p": 0.9} + pymc_dist_params = {"n": 100, "logit_p": 0.5} + expected_rv_op_params = {"n": 100, "p": expit(0.5)} tests_to_run = ["check_pymc_params_match_rv_op"] @pytest.mark.parametrize( @@ -1431,8 +1431,8 @@ class TestCategorical(BaseTestDistributionRandom): class TestLogitCategorical(BaseTestDistributionRandom): pymc_dist = pm.Categorical - pymc_dist_params = {"logit_p": np.array([-0.944461608841, 0.489548225319, -2.197224577336])} - expected_rv_op_params = {"p": np.array([0.28, 0.62, 0.10])} + pymc_dist_params = {"logit_p": np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]])} + expected_rv_op_params = {"p": softmax(np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]]), axis=-1)} tests_to_run = [ "check_pymc_params_match_rv_op", "check_rv_size", From 0db7139358aca2ebe14754addd2a3e43dd7563da Mon Sep 17 00:00:00 2001 From: Purna Chandra Mansingh Date: Tue, 22 Mar 2022 00:21:06 +0530 Subject: [PATCH 4/5] fixed pre-commit test --- pymc/tests/test_distributions_random.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index af30b7fda4f..a701a758779 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -1432,7 +1432,9 @@ class TestCategorical(BaseTestDistributionRandom): class TestLogitCategorical(BaseTestDistributionRandom): pymc_dist = pm.Categorical pymc_dist_params = {"logit_p": np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]])} - expected_rv_op_params = {"p": softmax(np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]]), axis=-1)} + expected_rv_op_params = { + "p": softmax(np.array([[0.28, 0.62, 0.10], [0.28, 0.62, 0.10]]), axis=-1) + } tests_to_run = [ "check_pymc_params_match_rv_op", "check_rv_size", From 4fba621075ad0709cb97c5dfdb6d6af7514ec14f Mon Sep 17 00:00:00 2001 From: Purna Chandra Mansingh Date: Tue, 22 Mar 2022 01:28:11 +0530 Subject: [PATCH 5/5] =?UTF-8?q?=E2=9C=A8added=20logit=5Fp=20to=20docstring?= =?UTF-8?q?=20&=20noted=20in=20ReleaseNote?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- RELEASE-NOTES.md | 1 + pymc/distributions/discrete.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index e3e9f8e6fce..468d68bbc76 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -133,6 +133,7 @@ This includes API changes we did not warn about since at least `3.11.0` (2021-01 - Adding support for blackjax's NUTS sampler `pymc.sampling_jax` (see [#5477](ihttps://github.com/pymc-devs/pymc/pull/5477)) - `pymc.sampling_jax` samplers support `log_likelihood`, `observed_data`, and `sample_stats` in returned InferenceData object (see [#5189](https://github.com/pymc-devs/pymc/pull/5189)) - Adding support for `pm.Deterministic` in `pymc.sampling_jax` (see [#5182](https://github.com/pymc-devs/pymc/pull/5182)) +- Added an alternative parametrization, `logit_p` to `pm.Binomial` and `pm.Categorical` distributions (see [5637](https://github.com/pymc-devs/pymc/pull/5637)). - ... diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py index f4ed9dac6c3..be1f26826fd 100644 --- a/pymc/distributions/discrete.py +++ b/pymc/distributions/discrete.py @@ -106,10 +106,12 @@ class Binomial(Discrete): Parameters ---------- - n: int + n : int Number of Bernoulli trials (n >= 0). - p: float + p : float Probability of success in each trial (0 < p < 1). + logit_p : float + Alternative log odds for the probability of success. """ rv_op = binomial @@ -1253,9 +1255,11 @@ class Categorical(Discrete): Parameters ---------- - p: array of floats + p : array of floats p > 0 and the elements of p must sum to 1. They will be automatically rescaled otherwise. + logit_p : float + Alternative log odds for the probability of success. """ rv_op = categorical