diff --git a/docs/source/api/distributions/continuous.rst b/docs/source/api/distributions/continuous.rst index a6d4d99507c..c1fa65ca1dd 100644 --- a/docs/source/api/distributions/continuous.rst +++ b/docs/source/api/distributions/continuous.rst @@ -37,6 +37,7 @@ Continuous LogitNormal Interpolated PolyaGamma + GenExtreme .. automodule:: pymc.distributions.continuous :members: diff --git a/pymc/distributions/__init__.py b/pymc/distributions/__init__.py index d9c26738ba7..51843744280 100644 --- a/pymc/distributions/__init__.py +++ b/pymc/distributions/__init__.py @@ -32,6 +32,7 @@ Exponential, Flat, Gamma, + GenExtreme, Gumbel, HalfCauchy, HalfFlat, @@ -198,4 +199,5 @@ "logcdf", "_logcdf", "logpt_sum", + "GenExtreme", ] diff --git a/pymc/distributions/continuous.py b/pymc/distributions/continuous.py index 282fba1dc9e..9911e6415e1 100644 --- a/pymc/distributions/continuous.py +++ b/pymc/distributions/continuous.py @@ -123,6 +123,7 @@ def polyagamma_cdf(*args, **kwargs): "Moyal", "AsymmetricLaplace", "PolyaGamma", + "GenExtreme", ] @@ -4246,3 +4247,183 @@ def logcdf(value, h, z): TensorVariable """ return bound(_PolyaGammaLogDistFunc(False)(value, h, z), h > 0, value > 0) + + +class GenExtremeRV(RandomVariable): + name: str = "Generalized Extreme Value" + ndim_supp: int = 0 + ndims_params: List[int] = [0, 0, 0] + dtype: str = "floatX" + _print_name: Tuple[str, str] = ("Generalized Extreme Value", "\\operatorname{GEV}") + + def __call__(self, mu=0.0, sigma=1.0, xi=0.0, size=None, **kwargs) -> TensorVariable: + return super().__call__(mu, sigma, xi, size=size, **kwargs) + + @classmethod + def rng_fn( + cls, + rng: np.random.RandomState, + mu: np.ndarray, + sigma: np.ndarray, + xi: np.ndarray, + size: Tuple[int, ...], + ) -> np.ndarray: + # Notice negative here, since remainder of GenExtreme is based on Coles parametrization + return stats.genextreme.rvs(c=-xi, loc=mu, scale=sigma, random_state=rng, size=size) + + +gev = GenExtremeRV() + + +class GenExtreme(Continuous): + r""" + Univariate Generalized Extreme Value log-likelihood + + The cdf of this distribution is + + .. math:: + + G(x \mid \mu, \sigma, \xi) = \exp\left[ -\left(1 + \xi z\right)^{-\frac{1}{\xi}} \right] + + where + + .. math:: + + z = \frac{x - \mu}{\sigma} + + and is defined on the set: + + .. math:: + + \left\{x: 1 + \xi\left(\frac{x-\mu}{\sigma}\right) > 0 \right\}. + + Note that this parametrization is per Coles (2001), and differs from that of + Scipy in the sign of the shape parameter, :math:`\xi`. + + .. plot:: + + import matplotlib.pyplot as plt + import numpy as np + import scipy.stats as st + import arviz as az + plt.style.use('arviz-darkgrid') + x = np.linspace(-10, 20, 200) + mus = [0., 4., -1.] + sigmas = [2., 2., 4.] + xis = [-0.3, 0.0, 0.3] + for mu, sigma, xi in zip(mus, sigmas, xis): + pdf = st.genextreme.pdf(x, c=-xi, loc=mu, scale=sigma) + plt.plot(x, pdf, label=rf'$\mu$ = {mu}, $\sigma$ = {sigma}, $\xi$={xi}') + plt.xlabel('x', fontsize=12) + plt.ylabel('f(x)', fontsize=12) + plt.legend(loc=1) + plt.show() + + + ======== ========================================================================= + Support * :math:`x \in [\mu - \sigma/\xi, +\infty]`, when :math:`\xi > 0` + * :math:`x \in \mathbb{R}` when :math:`\xi = 0` + * :math:`x \in [-\infty, \mu - \sigma/\xi]`, when :math:`\xi < 0` + Mean * :math:`\mu + \sigma(g_1 - 1)/\xi`, when :math:`\xi \neq 0, \xi < 1` + * :math:`\mu + \sigma \gamma`, when :math:`\xi = 0` + * :math:`\infty`, when :math:`\xi \geq 1` + where :math:`\gamma` is the Euler-Mascheroni constant, and + :math:`g_k = \Gamma (1-k\xi)` + Variance * :math:`\sigma^2 (g_2 - g_1^2)/\xi^2`, when :math:`\xi \neq 0, \xi < 0.5` + * :math:`\frac{\pi^2}{6} \sigma^2`, when :math:`\xi = 0` + * :math:`\infty`, when :math:`\xi \geq 0.5` + ======== ========================================================================= + + Parameters + ---------- + mu: float + Location parameter. + sigma: float + Scale parameter (sigma > 0). + xi: float + Shape parameter + scipy: bool + Whether or not to use the Scipy interpretation of the shape parameter + (defaults to `False`). + + References + ---------- + .. [Coles2001] Coles, S.G. (2001). + An Introduction to the Statistical Modeling of Extreme Values + Springer-Verlag, London + + """ + + rv_op = gev + + @classmethod + def dist(cls, mu=0, sigma=1, xi=0, scipy=False, **kwargs): + # If SciPy, use its parametrization, otherwise convert to standard + if scipy: + xi = -xi + mu = at.as_tensor_variable(floatX(mu)) + sigma = at.as_tensor_variable(floatX(sigma)) + xi = at.as_tensor_variable(floatX(xi)) + + return super().dist([mu, sigma, xi], **kwargs) + + def logp(value, mu, sigma, xi): + """ + Calculate log-probability of Generalized Extreme Value distribution + at specified value. + + Parameters + ---------- + value: numeric + Value(s) for which log-probability is calculated. If the log probabilities for multiple + values are desired the values must be provided in a numpy array or Aesara tensor + + Returns + ------- + TensorVariable + """ + scaled = (value - mu) / sigma + + logp_expression = at.switch( + at.isclose(xi, 0), + -at.log(sigma) - scaled - at.exp(-scaled), + -at.log(sigma) + - ((xi + 1) / xi) * at.log1p(xi * scaled) + - at.pow(1 + xi * scaled, -1 / xi), + ) + # bnd = mu - sigma/xi + return bound( + logp_expression, + 1 + xi * (value - mu) / sigma > 0, + # at.switch(xi > 0, value > bnd, value < bnd), + sigma > 0, + ) + + def logcdf(value, mu, sigma, xi): + """ + Compute the log of the cumulative distribution function for Generalized Extreme Value + distribution at the specified value. + + Parameters + ---------- + value: numeric or np.ndarray or `TensorVariable` + Value(s) for which log CDF is calculated. If the log CDF for + multiple values are desired the values must be provided in a numpy + array or `TensorVariable`. + + Returns + ------- + TensorVariable + """ + scaled = (value - mu) / sigma + logc_expression = at.switch( + at.isclose(xi, 0), -at.exp(-scaled), -at.pow(1 + xi * scaled, -1 / xi) + ) + return bound(logc_expression, 1 + xi * scaled > 0, sigma > 0) + + def get_moment(value_var, size, mu, sigma, xi): + r""" + Using the mode, as the mean can be infinite when :math:`\xi > 1` + """ + mode = at.switch(at.isclose(xi, 0), mu, mu + sigma * (at.pow(1 + xi, -xi) - 1) / xi) + return at.full(size, mode, dtype=aesara.config.floatX) diff --git a/pymc/tests/test_distributions.py b/pymc/tests/test_distributions.py index d29a929e971..1688b201fb9 100644 --- a/pymc/tests/test_distributions.py +++ b/pymc/tests/test_distributions.py @@ -75,6 +75,7 @@ def polyagamma_cdf(*args, **kwargs): Flat, Gamma, Geometric, + GenExtreme, Gumbel, HalfCauchy, HalfFlat, @@ -2544,6 +2545,20 @@ def test_gumbel(self): lambda value, mu, beta: sp.gumbel_r.logcdf(value, loc=mu, scale=beta), ) + def test_genextreme(self): + self.check_logp( + GenExtreme, + R, + {"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, + lambda value, mu, sigma, xi: sp.genextreme.logpdf(value, c=-xi, loc=mu, scale=sigma), + ) + self.check_logcdf( + GenExtreme, + R, + {"mu": R, "sigma": Rplus, "xi": Domain([-1, -1, -0.5, 0, 0.5, 1, 1])}, + lambda value, mu, sigma, xi: sp.genextreme.logcdf(value, c=-xi, loc=mu, scale=sigma), + ) + def test_logistic(self): self.check_logp( Logistic, diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index 8b7c189e6c8..ab33ea92540 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -548,6 +548,20 @@ def seeded_exgaussian_rng_fn(self): ] +class TestGenExtreme(BaseTestDistribution): + pymc_dist = pm.GenExtreme + pymc_dist_params = {"mu": 0, "sigma": 1, "xi": -0.1} + expected_rv_op_params = {"mu": 0, "sigma": 1, "xi": -0.1} + # Notice, using different parametrization of xi sign to scipy + reference_dist_params = {"loc": 0, "scale": 1, "c": 0.1} + reference_dist = seeded_scipy_distribution_builder("genextreme") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestGumbel(BaseTestDistribution): pymc_dist = pm.Gumbel pymc_dist_params = {"mu": 1.5, "beta": 3.0}