From bb88c3d554bc9596055a55e0f19b9f699b0c86a2 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 5 Jan 2021 15:46:42 +0100
Subject: [PATCH 1/4] - Improve ExGaussian logcdf and refactor logp

---
 pymc3/distributions/continuous.py | 50 +++++++++++++++++--------------
 pymc3/tests/test_distributions.py |  8 +++++
 2 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index c60898ab16c..5758bec9635 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -37,6 +37,7 @@
     gammaln,
     i0e,
     incomplete_beta,
+    log_normal,
     logpow,
     normal_lccdf,
     normal_lcdf,
@@ -3214,22 +3215,22 @@ def logp(self, value):
         sigma = self.sigma
         nu = self.nu
 
-        standardized_val = (value - mu) / sigma
-        cdf_val = std_cdf(standardized_val - sigma / nu)
-        cdf_val_safe = tt.switch(tt.eq(cdf_val, 0), np.finfo(theano.config.floatX).eps, cdf_val)
-
-        # This condition is suggested by exGAUS.R from gamlss
-        lp = tt.switch(
-            tt.gt(nu, 0.05 * sigma),
-            -tt.log(nu) + (mu - value) / nu + 0.5 * (sigma / nu) ** 2 + logpow(cdf_val_safe, 1.0),
-            -tt.log(sigma * tt.sqrt(2 * np.pi)) - 0.5 * standardized_val ** 2,
+        # Alogithm is adapted from dexGAUS.R from gamlss
+        return bound(
+            tt.switch(
+                tt.gt(nu, 0.05 * sigma),
+                (
+                    -tt.log(nu)
+                    + (mu - value) / nu
+                    + 0.5 * (sigma / nu) ** 2
+                    + normal_lcdf(mu + (sigma ** 2) / nu, sigma, value)
+                ),
+                log_normal(value, mean=mu, sigma=sigma),
+            ),
+            0 < sigma,
+            0 < nu,
         )
 
-        return bound(lp, sigma > 0.0, nu > 0.0)
-
-    def _distr_parameters_for_repr(self):
-        return ["mu", "sigma", "nu"]
-
     def logcdf(self, value):
         """
         Compute the log of the cumulative distribution function for ExGaussian distribution
@@ -3253,22 +3254,25 @@ def logcdf(self, value):
         """
         mu = self.mu
         sigma = self.sigma
-        sigma_2 = sigma ** 2
         nu = self.nu
-        z = value - mu - sigma_2 / nu
+
+        # Alogithm is adapted from pexGAUS.R from gamlss
         return tt.switch(
             tt.gt(nu, 0.05 * sigma),
-            tt.log(
-                std_cdf((value - mu) / sigma)
-                - std_cdf(z / sigma)
-                * tt.exp(
-                    ((mu + (sigma_2 / nu)) ** 2 - (mu ** 2) - 2 * value * ((sigma_2) / nu))
-                    / (2 * sigma_2)
-                )
+            logdiffexp(
+                normal_lcdf(mu, sigma, value),
+                (
+                    (mu - value) / nu
+                    + 0.5 * (sigma / nu) ** 2
+                    + normal_lcdf(mu + (sigma ** 2) / nu, sigma, value)
+                ),
             ),
             normal_lcdf(mu, sigma, value),
         )
 
+    def _distr_parameters_for_repr(self):
+        return ["mu", "sigma", "nu"]
+
 
 class VonMises(Continuous):
     r"""
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index a103008035f..c2ec32e2464 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -559,6 +559,7 @@ def check_logp(self, model, value, domain, paramdomains, logp_reference, decimal
         logp = model.fastlogp
         for pt in product(domains, n_samples=100):
             pt = Point(pt, model=model)
+            print(pt)
             if decimal is None:
                 decimal = select_by_precision(float64=6, float32=3)
             assert_almost_equal(logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt))
@@ -578,6 +579,7 @@ def check_logcdf(
             decimal = select_by_precision(float64=6, float32=3)
         for pt in product(domains, n_samples=n_samples):
             params = dict(pt)
+            print(params)
             scipy_cdf = scipy_logcdf(**params)
             value = params.pop("value")
             dist = pymc3_dist.dist(**params)
@@ -1825,6 +1827,9 @@ def test_get_tau_sigma(self):
             (15.0, 5.000, 7.500, 7.500, -3.3093854),
             (50.0, 50.000, 10.000, 10.000, -3.6436067),
             (1000.0, 500.000, 10.000, 20.000, -27.8707323),
+            (-1.0, 1.0, 20.0, 0.9, -3.91967108),  # Fails in scipy version
+            (0.01, 0.01, 100.0, 0.01, -5.5241087),  # Fails in scipy version
+            (-1.0, 0.0, 0.1, 0.1, -51.022349),  # Fails in previous pymc3 version
         ],
     )
     def test_ex_gaussian(self, value, mu, sigma, nu, logp):
@@ -1851,6 +1856,9 @@ def test_ex_gaussian(self, value, mu, sigma, nu, logp):
             (15.0, 5.000, 7.500, 7.500, -0.4545255),
             (50.0, 50.000, 10.000, 10.000, -1.433714),
             (1000.0, 500.000, 10.000, 20.000, -1.573708e-11),
+            (0.01, 0.01, 100.0, 0.01, -0.69314718),  # Fails in scipy version
+            (-0.43402407, 0.0, 0.1, 0.1, -13.59615423),  # Previous 32-bit version failed here
+            (-0.72402009, 0.0, 0.1, 0.1, -31.26571842),  # Previous 64-bit version failed here
         ],
     )
     def test_ex_gaussian_cdf(self, value, mu, sigma, nu, logcdf):

From aba18f1cf313a7903dd2964238a7ac44450c1cbb Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 5 Jan 2021 16:14:49 +0100
Subject: [PATCH 2/4] - Remove unused std_cdf

---
 pymc3/distributions/continuous.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 5758bec9635..71c37af042d 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -41,7 +41,6 @@
     logpow,
     normal_lccdf,
     normal_lcdf,
-    std_cdf,
     zvalue,
 )
 from pymc3.distributions.distribution import Continuous, draw_values, generate_samples

From e2c1d0c8b6b91a1c4ddddd00238bed42ff7ff7a3 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 5 Jan 2021 16:21:00 +0100
Subject: [PATCH 3/4] - Add release note

---
 RELEASE-NOTES.md                  | 1 +
 pymc3/tests/test_distributions.py | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 10c3e5a2ecf..6e1f0b46a48 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -32,6 +32,7 @@ It also brings some dreadfully awaited fixes, so be sure to go through the chang
 - Fix issue in `logp` method of `HyperGeometric`. It now returns `-inf` for invalid parameters (see [4367](https://github.com/pymc-devs/pymc3/pull/4367))
 - Fixed `MatrixNormal` random method to work with parameters as random variables. (see [#4368](https://github.com/pymc-devs/pymc3/pull/4368))
 - Update the `logcdf` method of several continuous distributions to return -inf for invalid parameters and values, and raise an informative error when multiple values cannot be evaluated in a single call. (see [4393](https://github.com/pymc-devs/pymc3/pull/4393))
+- Improve numerical stability in `logp` and `logcdf` methods of `ExGaussian` (see [#4407](https://github.com/pymc-devs/pymc3/pull/4407))
 
 ## PyMC3 3.10.0 (7 December 2020)
 
diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py
index c2ec32e2464..dcfceee4415 100644
--- a/pymc3/tests/test_distributions.py
+++ b/pymc3/tests/test_distributions.py
@@ -559,7 +559,6 @@ def check_logp(self, model, value, domain, paramdomains, logp_reference, decimal
         logp = model.fastlogp
         for pt in product(domains, n_samples=100):
             pt = Point(pt, model=model)
-            print(pt)
             if decimal is None:
                 decimal = select_by_precision(float64=6, float32=3)
             assert_almost_equal(logp(pt), logp_reference(pt), decimal=decimal, err_msg=str(pt))
@@ -579,7 +578,6 @@ def check_logcdf(
             decimal = select_by_precision(float64=6, float32=3)
         for pt in product(domains, n_samples=n_samples):
             params = dict(pt)
-            print(params)
             scipy_cdf = scipy_logcdf(**params)
             value = params.pop("value")
             dist = pymc3_dist.dist(**params)

From b49ba9f8c8a356bdeecb4a7b6054992c40c42b62 Mon Sep 17 00:00:00 2001
From: Ricardo <ricardo.vieira1994@gmail.com>
Date: Tue, 5 Jan 2021 17:02:17 +0100
Subject: [PATCH 4/4] - Remove unused import `theano`

---
 pymc3/distributions/continuous.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 71c37af042d..985f90bcbaf 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -20,7 +20,6 @@
 import warnings
 
 import numpy as np
-import theano
 import theano.tensor as tt
 
 from scipy import stats