From 7fc38416978afc335e03c389f48afa007022ece2 Mon Sep 17 00:00:00 2001 From: drabbit17 Date: Sun, 13 Jun 2021 17:16:18 +0100 Subject: [PATCH] Refactor Wald distribution --- pymc3/distributions/continuous.py | 135 +++++++++++++---------- pymc3/tests/test_distributions.py | 4 - pymc3/tests/test_distributions_random.py | 91 +++++++++++---- 3 files changed, 149 insertions(+), 81 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 253e207a313..f34f9389c50 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -18,7 +18,7 @@ nodes in PyMC. """ -from typing import Union +from typing import List, Optional, Tuple, Union import aesara.tensor as at import numpy as np @@ -891,6 +891,37 @@ def _distr_parameters_for_repr(self): return ["sigma"] +class WaldRV(RandomVariable): + name = "wald" + ndim_supp = 0 + ndims_params = [0, 0, 0] + dtype = "floatX" + _print_name = ("Wald", "\\operatorname{Wald}") + + @classmethod + def rng_fn( + cls, + rng: np.random.RandomState, + mu: Union[np.ndarray, float], + lam: Union[np.ndarray, float], + alpha: Union[np.ndarray, float], + size: Optional[Union[List[int], int]], + ) -> np.ndarray: + v = rng.normal(size=size) ** 2 + z = rng.uniform(size=size) + value = ( + mu + + (mu ** 2) * v / (2.0 * lam) + - mu / (2.0 * lam) * np.sqrt(4.0 * mu * lam * v + (mu * v) ** 2) + ) + i = np.floor(z - mu / (mu + value)) * 2 + 1 + value = (value ** -i) * (mu ** (i + 1)) + return value + alpha + + +wald = WaldRV() + + class Wald(PositiveContinuous): r""" Wald log-likelihood. @@ -969,27 +1000,33 @@ class Wald(PositiveContinuous): .. [Giner2016] Göknur Giner, Gordon K. Smyth (2016) statmod: Probability Calculations for the Inverse Gaussian Distribution """ + rv_op = wald - def __init__(self, mu=None, lam=None, phi=None, alpha=0.0, *args, **kwargs): - super().__init__(*args, **kwargs) - mu, lam, phi = self.get_mu_lam_phi(mu, lam, phi) - self.alpha = alpha = at.as_tensor_variable(floatX(alpha)) - self.mu = mu = at.as_tensor_variable(floatX(mu)) - self.lam = lam = at.as_tensor_variable(floatX(lam)) - self.phi = phi = at.as_tensor_variable(floatX(phi)) - - self.mean = self.mu + self.alpha - self.mode = ( - self.mu * (at.sqrt(1.0 + (1.5 * self.mu / self.lam) ** 2) - 1.5 * self.mu / self.lam) - + self.alpha - ) - self.variance = (self.mu ** 3) / self.lam + @classmethod + def dist( + cls, + mu: Optional[Union[float, np.ndarray]] = None, + lam: Optional[Union[float, np.ndarray]] = None, + phi: Optional[Union[float, np.ndarray]] = None, + alpha: Union[float, np.ndarray] = 0.0, + *args, + **kwargs, + ) -> RandomVariable: + mu, lam, phi = cls.get_mu_lam_phi(mu, lam, phi) + alpha = at.as_tensor_variable(floatX(alpha)) + mu = at.as_tensor_variable(floatX(mu)) + lam = at.as_tensor_variable(floatX(lam)) assert_negative_support(phi, "phi", "Wald") assert_negative_support(mu, "mu", "Wald") assert_negative_support(lam, "lam", "Wald") - def get_mu_lam_phi(self, mu, lam, phi): + return super().dist([mu, lam, alpha], **kwargs) + + @staticmethod + def get_mu_lam_phi( + mu: Optional[float], lam: Optional[float], phi: Optional[float] + ) -> Tuple[Union[float, np.ndarray], Union[float, np.ndarray], Union[float, np.ndarray]]: if mu is None: if lam is not None and phi is not None: return lam / phi, lam, phi @@ -1008,39 +1045,12 @@ def get_mu_lam_phi(self, mu, lam, phi): "mu and lam, mu and phi, or lam and phi." ) - def _random(self, mu, lam, alpha, size=None): - v = np.random.normal(size=size) ** 2 - value = ( - mu - + (mu ** 2) * v / (2.0 * lam) - - mu / (2.0 * lam) * np.sqrt(4.0 * mu * lam * v + (mu * v) ** 2) - ) - z = np.random.uniform(size=size) - i = np.floor(z - mu / (mu + value)) * 2 + 1 - value = (value ** -i) * (mu ** (i + 1)) - return value + alpha - - def random(self, point=None, size=None): - """ - Draw random values from Wald distribution. - - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha], point=point, size=size) - # return generate_samples(self._random, mu, lam, alpha, dist_shape=self.shape, size=size) - - def logp(self, value): + def logp( + value, + mu: Union[float, np.ndarray, TensorVariable], + lam: Union[float, np.ndarray, TensorVariable], + alpha: Union[float, np.ndarray, TensorVariable], + ) -> RandomVariable: """ Calculate log-probability of Wald distribution at specified value. @@ -1049,14 +1059,17 @@ def logp(self, value): value: numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple values are desired the values must be provided in a numpy array or Aesara tensor + mu: float or TensorVariable + Mean of the distribution (mu > 0). + lam: float or TensorVariable + Relative precision (lam > 0). + alpha: float or TensorVariable + Shift/location parameter (alpha >= 0). Returns ------- TensorVariable """ - mu = self.mu - lam = self.lam - alpha = self.alpha centered_value = value - alpha # value *must* be iid. Otherwise this is wrong. return bound( @@ -1072,7 +1085,12 @@ def logp(self, value): def _distr_parameters_for_repr(self): return ["mu", "lam", "alpha"] - def logcdf(self, value): + def logcdf( + value, + mu: Union[float, np.ndarray, TensorVariable], + lam: Union[float, np.ndarray, TensorVariable], + alpha: Union[float, np.ndarray, TensorVariable], + ) -> RandomVariable: """ Compute the log of the cumulative distribution function for Wald distribution at the specified value. @@ -1082,16 +1100,17 @@ def logcdf(self, value): value: numeric or np.ndarray or aesara.tensor Value(s) for which log CDF is calculated. If the log CDF for multiple values are desired the values must be provided in a numpy array or Aesara tensor. + mu: float or TensorVariable + Mean of the distribution (mu > 0). + lam: float or TensorVariable + Relative precision (lam > 0). + alpha: float or TensorVariable + Shift/location parameter (alpha >= 0). Returns ------- TensorVariable """ - # Distribution parameters - mu = self.mu - lam = self.lam - alpha = self.alpha - value -= alpha q = value / mu l = lam * mu diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index c21b05a0ca5..1f3e522b8f9 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1067,7 +1067,6 @@ def test_chisquared_logcdf(self): lambda value, nu: sp.chi2.logcdf(value, df=nu), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_wald_logp(self): self.check_logp( Wald, @@ -1077,7 +1076,6 @@ def test_wald_logp(self): decimal=select_by_precision(float64=6, float32=1), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.xfail( condition=(aesara.config.floatX == "float32"), reason="Poor CDF in SciPy. See scipy/scipy#869 for details.", @@ -1090,7 +1088,6 @@ def test_wald_logcdf(self): lambda value, mu, alpha: sp.invgauss.logcdf(value, mu=mu, loc=alpha), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") @pytest.mark.parametrize( "value,mu,lam,phi,alpha,logp", [ @@ -1110,7 +1107,6 @@ def test_wald_logcdf(self): (50.0, 15.0, None, 0.666666, 10.0, -5.6481874), ], ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_wald_logp_custom_points(self, value, mu, lam, phi, alpha, logp): # Log probabilities calculated using the dIG function from the R package gamlss. # See e.g., doi: 10.1111/j.1467-9876.2005.00510.x, or diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 6ff348ff0fe..145849ba7e5 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -245,12 +245,6 @@ class TestGaussianRandomWalk(BaseTestCases.BaseTestCase): default_shape = (1,) -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestWald(BaseTestCases.BaseTestCase): - distribution = pm.Wald - params = {"mu": 1.0, "lam": 1.0, "alpha": 0.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestAsymmetricLaplace(BaseTestCases.BaseTestCase): distribution = pm.AsymmetricLaplace @@ -575,6 +569,78 @@ class TestTruncatedNormalUpperTau(BaseTestDistribution): ] +class BaseTestWald(BaseTestDistribution): + def wald_rng_fn(self, size, mu, lam, alpha, uniform_rng_fct, normal_rng_fct): + v = normal_rng_fct(size=size) ** 2 + z = uniform_rng_fct(size=size) + value = ( + mu + + (mu ** 2) * v / (2.0 * lam) + - mu / (2.0 * lam) * np.sqrt(4.0 * mu * lam * v + (mu * v) ** 2) + ) + i = np.floor(z - mu / (mu + value)) * 2 + 1 + value = (value ** -i) * (mu ** (i + 1)) + return value + alpha + + def seeded_wald_rng_fn(self): + uniform_rng_fct = functools.partial( + getattr(np.random.RandomState, "uniform"), self.get_random_state() + ) + normal_rng_fct = functools.partial( + getattr(np.random.RandomState, "normal"), self.get_random_state() + ) + return functools.partial( + self.wald_rng_fn, uniform_rng_fct=uniform_rng_fct, normal_rng_fct=normal_rng_fct + ) + + pymc_dist = pm.Wald + reference_dist = seeded_wald_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + +class TestWaldAlpha(BaseTestDistribution): + pymc_dist = pm.Wald + mu, lam, alpha = 1.0, 1.0, 2.0 + mu_rv, lam_rv, phi_rv = pm.Wald.get_mu_lam_phi(mu=mu, lam=lam, phi=None) + pymc_dist_params = {"mu": mu, "lam": lam, "alpha": alpha} + expected_rv_op_params = {"mu": mu_rv, "lam": lam_rv, "alpha": alpha} + reference_dist_params = {"loc": alpha} + reference_dist = seeded_scipy_distribution_builder("wald") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + +class TestWaldMuLam(BaseTestWald): + mu, lam, alpha = 1.0, 3.0, 0.0 + mu_rv, lam_rv, phi_rv = pm.Wald.get_mu_lam_phi(mu=mu, lam=lam, phi=None) + pymc_dist_params = {"mu": mu, "lam": lam} + expected_rv_op_params = {"mu": mu_rv, "lam": lam_rv, "alpha": 0.0} + reference_dist_params = {"mu": mu_rv, "lam": lam_rv, "alpha": 0.0} + + +class TestWaldMuLamShifted(BaseTestWald): + mu, lam, alpha = 1.0, 3.0, 2.0 + mu_rv, lam_rv, phi_rv = pm.Wald.get_mu_lam_phi(mu=mu, lam=lam, phi=None) + pymc_dist_params = {"mu": mu, "lam": lam, "alpha": alpha} + expected_rv_op_params = {"mu": mu_rv, "lam": lam_rv, "alpha": 2.0} + reference_dist_params = {"mu": mu_rv, "lam": lam_rv, "alpha": 2.0} + + +class TestWaldMuPhi(BaseTestWald): + mu, phi, alpha = 1.0, 3.0, 0.0 + mu_rv, lam_rv, phi_rv = pm.Wald.get_mu_lam_phi(mu=mu, lam=None, phi=phi) + pymc_dist_params = {"mu": mu, "phi": phi, "alpha": alpha} + expected_rv_op_params = {"mu": mu_rv, "lam": phi_rv, "alpha": 0.0} + reference_dist_params = {"mu": mu_rv, "lam": lam_rv, "alpha": 0.0} + + class TestSkewNormal(BaseTestDistribution): pymc_dist = pm.SkewNormal pymc_dist_params = {"mu": 0.0, "sigma": 1.0, "alpha": 5.0} @@ -1263,19 +1329,6 @@ def ref_rand(size, alpha, mu, sigma): pymc3_random(pm.SkewNormal, {"mu": R, "sigma": Rplus, "alpha": R}, ref_rand=ref_rand) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") - def test_wald(self): - # Cannot do anything too exciting as scipy wald is a - # location-scale model of the *standard* wald with mu=1 and lam=1 - def ref_rand(size, mu, lam, alpha): - return st.wald.rvs(size=size, loc=alpha) - - pymc3_random( - pm.Wald, - {"mu": Domain([1.0, 1.0, 1.0]), "lam": Domain([1.0, 1.0, 1.0]), "alpha": Rplus}, - ref_rand=ref_rand, - ) - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") def test_laplace_asymmetric(self): def ref_rand(size, kappa, b, mu):