From 9d70d4a60db354c2f4d6a8724737c0fea15e0061 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sun, 14 Mar 2021 19:09:53 +0100 Subject: [PATCH 1/6] Fix logp of (Discrete) Uniform to not depend on bound --- pymc3/distributions/continuous.py | 6 +++++- pymc3/distributions/discrete.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 25310b4781..d073ceef49 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -268,7 +268,11 @@ def logp(self, value): """ lower = self.lower upper = self.upper - return bound(-aet.log(upper - lower), value >= lower, value <= upper) + return bound( + aet.fill(value, -aet.log(upper - lower)), + value >= lower, + value <= upper, + ) def logcdf(self, value): """ diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 06cd504f40..ab1aa2a198 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1278,7 +1278,11 @@ def logp(self, value): """ upper = self.upper lower = self.lower - return bound(-aet.log(upper - lower + 1), lower <= value, value <= upper) + return bound( + aet.fill(value, -aet.log(upper - lower + 1)), + lower <= value, + value <= upper, + ) def logcdf(self, value): """ From 47edb175e9999be08fad627bbf70a5c8c6f02ac6 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sun, 14 Mar 2021 19:11:23 +0100 Subject: [PATCH 2/6] Add unittest --- pymc3/tests/test_distributions.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 7662e0ed58..d6a98ff233 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -2667,6 +2667,17 @@ def test_issue_3051(self, dims, dist_cls, kwargs): assert actual_a.shape == (X.shape[0],) pass + def test_issue_4499(self): + # Test for bug in Uniform and DiscreteUniform logp when setting check_bounds = False + # https://github.com/pymc-devs/pymc3/issues/4499 + with pm.Model(check_bounds=False) as m: + x = pm.Uniform("x", 0, 2, shape=10, transform=None) + assert_almost_equal(m.logp_array(np.ones(10)), -np.log(2) * 10) + + with pm.Model(check_bounds=False) as m: + x = pm.DiscreteUniform("x", 0, 1, shape=10) + assert_almost_equal(m.logp_array(np.ones(10)), -np.log(2) * 10) + def test_serialize_density_dist(): def func(x): From fb9a5a156855460416dbb25404a1a022d396b952 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sun, 14 Mar 2021 19:14:34 +0100 Subject: [PATCH 3/6] Remove redundant `all()` bound conditions in multivariate distributions and improve documentation of dist_math::bound --- pymc3/distributions/dist_math.py | 16 +++++++++----- pymc3/distributions/multivariate.py | 33 +++++++++++++++-------------- 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index 90ff0c1908..771d1802b2 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -48,17 +48,23 @@ def bound(logp, *conditions, **kwargs): """ Bounds a log probability density with several conditions. + When conditions are not met, the logp values are replaced by -inf. + + Note that bound should not be used to enforce the logic of the logp under the normal + support as it can be disabled by the user via check_bounds = False in pm.Model() Parameters ---------- logp: float *conditions: booleans broadcast_conditions: bool (optional, default=True) - If True, broadcasts logp to match the largest shape of the conditions. - This is used e.g. in DiscreteUniform where logp is a scalar constant and the shape - is specified via the conditions. - If False, will return the same shape as logp. - This is used e.g. in Multinomial where broadcasting can lead to differences in the logp. + If True, conditions are broadcasted and applied element-wise to each value in logp. + If False, conditions are collapsed via aet.all(). As a consequence the entire logp + array is either replaced by -inf or unchanged. + + Setting broadcasts_conditions to False is necessary for most (all?) multivariate + distributions where the dimensions of the conditions do not unambigously match + that of the logp. Returns ------- diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py index 18751e32c7..94eef7bdc1 100755 --- a/pymc3/distributions/multivariate.py +++ b/pymc3/distributions/multivariate.py @@ -527,9 +527,9 @@ def logp(self, value): # only defined for sum(value) == 1 return bound( aet.sum(logpow(value, a - 1) - gammaln(a), axis=-1) + gammaln(aet.sum(a, axis=-1)), - aet.all(value >= 0), - aet.all(value <= 1), - aet.all(a > 0), + value >= 0, + value <= 1, + a > 0, broadcast_conditions=False, ) @@ -671,11 +671,11 @@ def logp(self, x): return bound( factln(n) + aet.sum(-factln(x) + logpow(p, x), axis=-1, keepdims=True), - aet.all(x >= 0), - aet.all(aet.eq(aet.sum(x, axis=-1, keepdims=True), n)), - aet.all(p <= 1), - aet.all(aet.eq(aet.sum(p, axis=-1), 1)), - aet.all(aet.ge(n, 0)), + x >= 0, + aet.eq(aet.sum(x, axis=-1, keepdims=True), n), + p <= 1, + aet.eq(aet.sum(p, axis=-1), 1), + n >= 0, broadcast_conditions=False, ) @@ -823,10 +823,10 @@ def logp(self, value): # and that each observation value_i sums to n_i. return bound( result, - aet.all(aet.ge(value, 0)), - aet.all(aet.gt(a, 0)), - aet.all(aet.ge(n, 0)), - aet.all(aet.eq(value.sum(axis=-1, keepdims=True), n)), + value >= 0, + a > 0, + n >= 0, + aet.eq(value.sum(axis=-1, keepdims=True), n), broadcast_conditions=False, ) @@ -1575,8 +1575,8 @@ def logp(self, x): result += (eta - 1.0) * aet.log(det(X)) return bound( result, - aet.all(X <= 1), - aet.all(X >= -1), + X >= -1, + X <= 1, matrix_pos_def(X), eta > 0, broadcast_conditions=False, @@ -2204,9 +2204,10 @@ def logp(self, value): logquad = (self.tau * delta * tau_dot_delta).sum(axis=-1) return bound( 0.5 * (logtau + logdet - logquad), - aet.all(self.alpha <= 1), - aet.all(self.alpha >= -1), + self.alpha >= -1, + self.alpha <= 1, self.tau > 0, + broadcast_conditions=False, ) def random(self, point=None, size=None): From 7cabbfe7ab70c29a36c0501359d6c50e7eda4912 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Sun, 14 Mar 2021 19:25:51 +0100 Subject: [PATCH 4/6] Add recommendation for check_bounds. --- pymc3/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc3/model.py b/pymc3/model.py index 76800e7960..656eb9f078 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -817,7 +817,8 @@ class Model(Factor, WithMemoization, metaclass=ContextMeta): Ensure that input parameters to distributions are in a valid range. If your model is built in a way where you know your parameters can only take on valid values you can set this to - False for increased speed. + False for increased speed. This should not be used if your model + contains discrete variables. Examples -------- From 20f1c0ce4d65379b57d7a1e4325c3737a5771e4e Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 16 Mar 2021 13:44:21 +0100 Subject: [PATCH 5/6] Add release note --- RELEASE-NOTES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index b8f7537b89..ddf9db4f67 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -13,6 +13,8 @@ - The `pymc3.memoize` module was removed and replaced with `cachetools`. The `hashable` function and `WithMemoization` class were moved to `pymc3.util` (see [#4509](https://github.com/pymc-devs/pymc3/pull/4509)). - Remove float128 dtype support (see [#4514](https://github.com/pymc-devs/pymc3/pull/4514)). - `pm.make_shared_replacements` now retains broadcasting information which fixes issues with Metropolis samplers (see [#4492](https://github.com/pymc-devs/pymc3/pull/4492)). +- Logp method of `Uniform` and `DiscreteUniform` no longer depends on `pymc3.distributions.dist_math.bound` for proper evaluation (see [#4541](https://github.com/pymc-devs/pymc3/pull/4541)). + + ... ## PyMC3 3.11.1 (12 February 2021) From 7cb9974ef10afc25b40755ee26b36f30fee7aa4a Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 16 Mar 2021 14:48:22 +0100 Subject: [PATCH 6/6] Include Release Notes from 3.11.2 --- RELEASE-NOTES.md | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index ddf9db4f67..7baffaabdd 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,21 +1,37 @@ # Release Notes -## PyMC3 vNext (TBD) +## PyMC3 vNext (4.0.0) ### Breaking Changes - ⚠ Theano-PyMC has been replaced with Aesara, so all external references to `theano`, `tt`, and `pymc3.theanof` need to be replaced with `aesara`, `aet`, and `pymc3.aesaraf` (see [4471](https://github.com/pymc-devs/pymc3/pull/4471)). ### New Features -+ `pm.math.cartesian` can now handle inputs that are themselves >1D (see [#4482](https://github.com/pymc-devs/pymc3/pull/4482)). -+ The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models. -+ ... +- The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models. +- ... ### Maintenance -- The `pymc3.memoize` module was removed and replaced with `cachetools`. The `hashable` function and `WithMemoization` class were moved to `pymc3.util` (see [#4509](https://github.com/pymc-devs/pymc3/pull/4509)). - Remove float128 dtype support (see [#4514](https://github.com/pymc-devs/pymc3/pull/4514)). -- `pm.make_shared_replacements` now retains broadcasting information which fixes issues with Metropolis samplers (see [#4492](https://github.com/pymc-devs/pymc3/pull/4492)). - Logp method of `Uniform` and `DiscreteUniform` no longer depends on `pymc3.distributions.dist_math.bound` for proper evaluation (see [#4541](https://github.com/pymc-devs/pymc3/pull/4541)). +- ... + +## PyMC3 3.11.2 (14 March 2021) + +### New Features ++ `pm.math.cartesian` can now handle inputs that are themselves >1D (see [#4482](https://github.com/pymc-devs/pymc3/pull/4482)). ++ Statistics and plotting functions that were removed in `3.11.0` were brought back, albeit with deprecation warnings if an old naming scheme is used (see [#4536](https://github.com/pymc-devs/pymc3/pull/4536)). In order to future proof your code, rename these function calls: + + `pm.traceplot` → `pm.plot_trace` + + `pm.compareplot` → `pm.plot_compare` (here you might need to rename some columns in the input according to the [`arviz.plot_compare` documentation](https://arviz-devs.github.io/arviz/api/generated/arviz.plot_compare.html)) + + `pm.autocorrplot` → `pm.plot_autocorr` + + `pm.forestplot` → `pm.plot_forest` + + `pm.kdeplot` → `pm.plot_kde` + + `pm.energyplot` → `pm.plot_energy` + + `pm.densityplot` → `pm.plot_density` + + `pm.pairplot` → `pm.plot_pair` + +### Maintenance +- ⚠ Our memoization mechanism wasn't robust against hash collisions ([#4506](https://github.com/pymc-devs/pymc3/issues/4506)), sometimes resulting in incorrect values in, for example, posterior predictives. The `pymc3.memoize` module was removed and replaced with `cachetools`. The `hashable` function and `WithMemoization` class were moved to `pymc3.util` (see [#4525](https://github.com/pymc-devs/pymc3/pull/4525)). +- `pm.make_shared_replacements` now retains broadcasting information which fixes issues with Metropolis samplers (see [#4492](https://github.com/pymc-devs/pymc3/pull/4492)). -+ ... +**Release manager** for 3.11.2: Michael Osthege ([@michaelosthege](https://github.com/michaelosthege)) ## PyMC3 3.11.1 (12 February 2021)