From bb88c3d554bc9596055a55e0f19b9f699b0c86a2 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 5 Jan 2021 15:46:42 +0100 Subject: [PATCH 1/4] - Improve ExGaussian logcdf and refactor logp --- pymc3/distributions/continuous.py | 50 +++++++++++++++++-------------- pymc3/tests/test_distributions.py | 8 +++++ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index c60898ab16c..5758bec9635 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -37,6 +37,7 @@ gammaln, i0e, incomplete_beta, + log_normal, logpow, normal_lccdf, normal_lcdf, @@ -3214,22 +3215,22 @@ def logp(self, value): sigma = self.sigma nu = self.nu - standardized_val = (value - mu) / sigma - cdf_val = std_cdf(standardized_val - sigma / nu) - cdf_val_safe = tt.switch(tt.eq(cdf_val, 0), np.finfo(theano.config.floatX).eps, cdf_val) - - # This condition is suggested by exGAUS.R from gamlss - lp = tt.switch( - tt.gt(nu, 0.05 * sigma), - -tt.log(nu) + (mu - value) / nu + 0.5 * (sigma / nu) ** 2 + logpow(cdf_val_safe, 1.0), - -tt.log(sigma * tt.sqrt(2 * np.pi)) - 0.5 * standardized_val ** 2, + # Alogithm is adapted from dexGAUS.R from gamlss + return bound( + tt.switch( + tt.gt(nu, 0.05 * sigma), + ( + -tt.log(nu) + + (mu - value) / nu + + 0.5 * (sigma / nu) ** 2 + + normal_lcdf(mu + (sigma ** 2) / nu, sigma, value) + ), + log_normal(value, mean=mu, sigma=sigma), + ), + 0 < sigma, + 0 < nu, ) - return bound(lp, sigma > 0.0, nu > 0.0) - - def _distr_parameters_for_repr(self): - return ["mu", "sigma", "nu"] - def logcdf(self, value): """ Compute the log of the cumulative distribution function for ExGaussian distribution @@ -3253,22 +3254,25 @@ def logcdf(self, value): """ mu = self.mu sigma = self.sigma - sigma_2 = sigma ** 2 nu = self.nu - z = value - mu - sigma_2 / nu + + # Alogithm is adapted from pexGAUS.R from gamlss return tt.switch( tt.gt(nu, 0.05 * sigma), - tt.log( - std_cdf((value - mu) / sigma) - - std_cdf(z / sigma) - * tt.exp( - ((mu + (sigma_2 / nu)) ** 2 - (mu ** 2) - 2 * value * ((sigma_2) / nu)) - / (2 * sigma_2) - ) + logdiffexp( + normal_lcdf(mu, sigma, value), + ( + (mu - value) / nu + + 0.5 * (sigma / nu) ** 2 + + normal_lcdf(mu + (sigma ** 2) / nu, sigma, value) + ), ), normal_lcdf(mu, sigma, value), ) + def _distr_parameters_for_repr(self): + return ["mu", "sigma", "nu"] + class VonMises(Continuous): r""" diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index a103008035f..c2ec32e2464 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -559,6 +559,7 @@ def check_logp(self, model, value, domain, paramdomains, logp_reference, decimal logp = model.fastlogp for pt in product(domains, n_samples=100): pt = Point(pt, model=model) + print(pt) if decimal is None: decimal = select_by_precision(float64=6, float32=3) assert_almost_equal(logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt)) @@ -578,6 +579,7 @@ def check_logcdf( decimal = select_by_precision(float64=6, float32=3) for pt in product(domains, n_samples=n_samples): params = dict(pt) + print(params) scipy_cdf = scipy_logcdf(**params) value = params.pop("value") dist = pymc3_dist.dist(**params) @@ -1825,6 +1827,9 @@ def test_get_tau_sigma(self): (15.0, 5.000, 7.500, 7.500, -3.3093854), (50.0, 50.000, 10.000, 10.000, -3.6436067), (1000.0, 500.000, 10.000, 20.000, -27.8707323), + (-1.0, 1.0, 20.0, 0.9, -3.91967108), # Fails in scipy version + (0.01, 0.01, 100.0, 0.01, -5.5241087), # Fails in scipy version + (-1.0, 0.0, 0.1, 0.1, -51.022349), # Fails in previous pymc3 version ], ) def test_ex_gaussian(self, value, mu, sigma, nu, logp): @@ -1851,6 +1856,9 @@ def test_ex_gaussian(self, value, mu, sigma, nu, logp): (15.0, 5.000, 7.500, 7.500, -0.4545255), (50.0, 50.000, 10.000, 10.000, -1.433714), (1000.0, 500.000, 10.000, 20.000, -1.573708e-11), + (0.01, 0.01, 100.0, 0.01, -0.69314718), # Fails in scipy version + (-0.43402407, 0.0, 0.1, 0.1, -13.59615423), # Previous 32-bit version failed here + (-0.72402009, 0.0, 0.1, 0.1, -31.26571842), # Previous 64-bit version failed here ], ) def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf): From aba18f1cf313a7903dd2964238a7ac44450c1cbb Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 5 Jan 2021 16:14:49 +0100 Subject: [PATCH 2/4] - Remove unused std_cdf --- pymc3/distributions/continuous.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 5758bec9635..71c37af042d 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -41,7 +41,6 @@ logpow, normal_lccdf, normal_lcdf, - std_cdf, zvalue, ) from pymc3.distributions.distribution import Continuous, draw_values, generate_samples From e2c1d0c8b6b91a1c4ddddd00238bed42ff7ff7a3 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 5 Jan 2021 16:21:00 +0100 Subject: [PATCH 3/4] - Add release note --- RELEASE-NOTES.md | 1 + pymc3/tests/test_distributions.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 10c3e5a2ecf..6e1f0b46a48 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -32,6 +32,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang - Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367)) - Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368)) - Update the `logcdf` method of several continuous distributions to return -inf for invalid parameters and values, and raise an informative error when multiple values cannot be evaluated in a single call. (see [4393](https://github.com/pymc-devs/pymc3/pull/4393)) +- Improve numerical stability in `logp` and `logcdf` methods of `ExGaussian` (see [#4407](https://github.com/pymc-devs/pymc3/pull/4407)) ## PyMC3 3.10.0 (7 December 2020) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index c2ec32e2464..dcfceee4415 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -559,7 +559,6 @@ def check_logp(self, model, value, domain, paramdomains, logp_reference, decimal logp = model.fastlogp for pt in product(domains, n_samples=100): pt = Point(pt, model=model) - print(pt) if decimal is None: decimal = select_by_precision(float64=6, float32=3) assert_almost_equal(logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt)) @@ -579,7 +578,6 @@ def check_logcdf( decimal = select_by_precision(float64=6, float32=3) for pt in product(domains, n_samples=n_samples): params = dict(pt) - print(params) scipy_cdf = scipy_logcdf(**params) value = params.pop("value") dist = pymc3_dist.dist(**params) From b49ba9f8c8a356bdeecb4a7b6054992c40c42b62 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 5 Jan 2021 17:02:17 +0100 Subject: [PATCH 4/4] - Remove unused import `theano` --- pymc3/distributions/continuous.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 71c37af042d..985f90bcbaf 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -20,7 +20,6 @@ import warnings import numpy as np -import theano import theano.tensor as tt from scipy import stats