From 61c6fb7fa2093f86257f9e3e12bf96cea314bbad Mon Sep 17 00:00:00 2001 From: Jarle Brinchmann Date: Mon, 11 Dec 2023 00:08:01 +0000 Subject: [PATCH 1/7] Added GeneralizedNormal to the import list --- pymc_experimental/distributions/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py index f06db969..e3ad9d8d 100644 --- a/pymc_experimental/distributions/__init__.py +++ b/pymc_experimental/distributions/__init__.py @@ -17,7 +17,7 @@ Experimental probability distributions for stochastic nodes in PyMC. """ -from pymc_experimental.distributions.continuous import Chi, GenExtreme, Maxwell +from pymc_experimental.distributions.continuous import Chi, GenExtreme, Maxwell, GeneralizedNormal from pymc_experimental.distributions.discrete import ( BetaNegativeBinomial, GeneralizedPoisson, @@ -37,4 +37,5 @@ "histogram_approximation", "Chi", "Maxwell", + "GeneralizedNormal", ] From 6c8a0294b8516358ebeb4ca4fa300f892c9c7343 Mon Sep 17 00:00:00 2001 From: Jarle Brinchmann Date: Mon, 11 Dec 2023 00:08:13 +0000 Subject: [PATCH 2/7] A first implementation of the generalised normal distribution was included. This follows broadly the Wikipedia page for notation - this differs slightly from the implementation in scipy.stats, but the quantitative results are identical as far as I can see. This only implements the symmetric distribution - in accorandance with the naming of normal distributions in pymc I have opted against including Symmetric in the name. It would also be moderately easy to implement the asymmetric distribution based on what is included here. --- pymc_experimental/distributions/continuous.py | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index dcf9b775..c1fb30e1 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -351,3 +351,167 @@ def dist(cls, a, **kwargs): class_name="Maxwell", **kwargs, ) + + +class GeneralizedNormalRV(RandomVariable): + name: str = "Generalized Normal" + ndim_supp: int = 0 + ndims_params: List[int] = [0, 0, 0] + dtype: str = "floatX" + _print_name: Tuple[str, str] = ("Generalized Normal", "\\operatorname{GenNorm}") + + def __call__(self, loc=0.0, alpha=1.0, beta=2.0, **kwargs) -> TensorVariable: + return super().__call__(loc, alpha, beta, **kwargs) + + @classmethod + def rng_fn( + cls, + rng: np.random.RandomState, + loc: np.ndarray, + alpha: np.ndarray, + beta: np.ndarray, + size: Tuple[int, ...], + ) -> np.ndarray: + return stats.gennorm.rvs(beta, loc=loc, scale=alpha, size=size, random_state=rng) + +# Create the actual `RandomVariable` `Op`... +gennorm = GeneralizedNormalRV() + + +class GeneralizedNormal(Continuous): + r""" + Univariate Generalized Normal distribution + + The cdf of this distribution is + + .. math:: + + + G(x \mid \mu, \alpha, \beta) = \frac{1}{2} + \mathrm{sign}(x-\mu) \frac{1}{2\Gamma(1/\beta)} \gamma\left(1/\beta, \left|\frac{x-\mu}{\alpha}\right|^\beta\right) + + where :math:`\gamma()` is the unnormalized incomplete lower gamma function. + + The support of the function is the whole real line, while the parameters are defined as + + .. math:: + + \alpha, \beta > 0 + + This is the same parametrization used in Scipy and also follows + `Wikipedia `_ + + The :math:`\beta` parameter controls the kurtosis of the distribution, where the excess + kurtosis is given by + + .. math:: + + \mathrm{exKurt}(\beta) = \frac{\Gamma(5/\beta) Gamma(1/\beta)}{\gamma(3/\beta)^2} - 3 + + .. plot:: + + :context: close-figs + + import matplotlib.pyplot as plt + import numpy as np + import scipy.stats as stats + from cycler import cycler + + plt.style.use('arviz-darkgrid') + x = np.linspace(-3, 3, 200) + + betas = [0.5, 2, 8] + beta_cycle = cycler(color=['r', 'g', 'b']) + alphas = [1.0, 2.0] + alpha_cycle = cycler(ls=['-', '--']) + cycler = beta_cycle*alpha_cycle + icycler = iter(cycler) + + for beta in betas: + for alpha in alphas: + pdf = scipy.stats.gennorm.pdf(x, beta, loc=0.0, scale=alpha) + args = next(icycler) + plt.plot(x, pdf, label=r'$\alpha=$ {0} $\beta=$ {1}'.format(alpha, beta), **args) + plt.xlabel('x', fontsize=12) + plt.ylabel('f(x)', fontsize=12) + plt.legend(loc=1) + plt.show() + + ======== ========================================================================= + Support :math:`x \in (-\infty, \infty)` + Mean :math:`\mu` + Variance :math:`\frac{\alpha^2 \Gamma(3/\beta)}{\Gamma(1/\beta)}` + ======== ========================================================================= + + Parameters + ---------- + Parameters + ---------- + mu : float + Location parameter. + alpha : float + Scale parameter (alpha > 0). Note that the variance of the distribution is + not alpha squared. In particular, whene beta=2 (normal distribution), the variance + is :math:`alpha^2/2`. + beta : float + Shape parameter (beta > 0). This is directly related to the kurtosis which is a + monotonically decreasing function of beta + + """ + rv_op = gennorm + + @classmethod + def dist(cls, mu, alpha, beta, **kwargs) -> RandomVariable: + mu = pt.as_tensor_variable(floatX(mu)) + alpha = pt.as_tensor_variable(floatX(alpha)) + beta = pt.as_tensor_variable(floatX(beta)) + + return super().dist([mu, alpha, beta], **kwargs) + + + # moment here returns the mean + def moment(rv, size, mu, alpha, beta): + moment, _ = pt.broadcast_arrays(mu, alpha, beta) + if not rv_size_is_none(size): + moment = pt.full(size, moment) + return moment + + def logp(value, mu, alpha, beta): + + z = (value-mu)/alpha + lp = pt.log(0.5*beta) - pt.log(2.0*alpha) - pt.gammaln(1.0/beta) - pt.pow(pt.abs(z), beta) + + return check_parameters( + lp, + alpha > 0, + beta > 0, + msg="alpha > 0, beta > 0", + ) + + + def logcdf(value, mu, alpha, beta): + """ + Compute the log of the cumulative distribution function for a Generalized + Normal distribution + + Parameters + ---------- + value: numeric or np.ndarray or `TensorVariable` + Value(s) for which the log CDF is calculated + alpha: numeric + Scale parameter + beta: numeric + Shape parameter + + """ + + + z = pt.abs((value-mu)/alpha) + c = pt.sign(value-mu) + cdf = 0.5 + pt.sign(value-mu)*pt.gammaincc(1.0/beta, pt.pow(z, beta)) + + print("CDF={0}".format(cdf)) + + return pt.log(cdf) + + + From dd8940e38d01cbc49687036994efb43624bc534b Mon Sep 17 00:00:00 2001 From: Jarle Brinchmann Date: Mon, 11 Dec 2023 00:11:37 +0000 Subject: [PATCH 3/7] Added tests for GeneralizedNormal --- .../tests/distributions/test_continuous.py | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py index 5df4aef1..efd16df4 100644 --- a/pymc_experimental/tests/distributions/test_continuous.py +++ b/pymc_experimental/tests/distributions/test_continuous.py @@ -33,7 +33,7 @@ ) # the distributions to be tested -from pymc_experimental.distributions import Chi, GenExtreme, Maxwell +from pymc_experimental.distributions import Chi, GenExtreme, Maxwell, GeneralizedNormal class TestGenExtremeClass: @@ -182,3 +182,39 @@ def test_logcdf(self): {"a": Rplus}, lambda value, a: sp.maxwell.logcdf(value, scale=a), ) + + +class TestGeneralizedNormal: + """ + Wrapper class so that tests of experimental additions can be dropped into + PyMC directly on adoption. + """ + + def test_logp(self): + check_logp( + pymc_dist=GeneralizedNormal, + domain=R, + paradomains={"mu": R, "alpha": Rplus, "beta": Rplus}, + scipy_logp=lambda value, mu, alpha, beta: sp.gennorm.logpdf(value, beta, loc=mu, scale=alpha), + ) + + def test_logcdf(self): + check_logcdf( + pymc_dist=GeneralizedNormal, + domain=R, + paradomains={"mu": R, "alpha": Rplus, "beta": Rplus}, + scipy_logcdf=lambda value, mu, alpha, beta: sp.gennorm.logcdf(value, beta, loc=mu, scale=alpha), + ) + + +class TestGeneralizedNormal(BaseTestDistributionRandom): + pymc_dist = GeneralizedNormal + pymc_dist_params = {"mu": 0, "alpha": 1, "beta": 2.0} + expected_rv_op_params = {"mu": 0, "alpha": 1, "beta": 2.0} + reference_dist_params = {"loc": 0, "scale": 1, "beta": 2.0} + reference_dist = seeded_scipy_distribution_builder("gennorm") + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] From 0e768f1863ea0230b422d58adda6f11542728b42 Mon Sep 17 00:00:00 2001 From: Jarle Brinchmann Date: Mon, 11 Dec 2023 00:17:22 +0000 Subject: [PATCH 4/7] Added some comments and changed gammaincc to gammainc in the cdf for GeneralizedNormal --- pymc_experimental/distributions/continuous.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index c1fb30e1..080557e1 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -507,9 +507,12 @@ def logcdf(value, mu, alpha, beta): z = pt.abs((value-mu)/alpha) c = pt.sign(value-mu) - cdf = 0.5 + pt.sign(value-mu)*pt.gammaincc(1.0/beta, pt.pow(z, beta)) - print("CDF={0}".format(cdf)) + # This follows the Wikipedia entry for the function - scipy has + # opted for a slightly different version using gammaincc instead. + # Note that on the Wikipedia page the unnormalised \gamma function + # is used, while gammainc is a normalised function + cdf = 0.5 + pt.sign(value-mu)*pt.gammainc(1.0/beta, pt.pow(z, beta)) return pt.log(cdf) From 0f9a467485502c6a571fdb169dab89894cbabc89 Mon Sep 17 00:00:00 2001 From: Jarle Brinchmann Date: Mon, 11 Dec 2023 09:09:25 +0000 Subject: [PATCH 5/7] Fixed two small bugs in GeneralizedNormal logp and logcdf I had accidentally added an extra division by two in logp and a similar in logcdf. The results from the codes have been double-checked with scipy.stats.gennorm --- pymc_experimental/distributions/continuous.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index 080557e1..349890dc 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -478,7 +478,7 @@ def moment(rv, size, mu, alpha, beta): def logp(value, mu, alpha, beta): z = (value-mu)/alpha - lp = pt.log(0.5*beta) - pt.log(2.0*alpha) - pt.gammaln(1.0/beta) - pt.pow(pt.abs(z), beta) + lp = pt.log(0.5*beta) - pt.log(alpha) - pt.gammaln(1.0/beta) - pt.pow(pt.abs(z), beta) return check_parameters( lp, @@ -512,8 +512,8 @@ def logcdf(value, mu, alpha, beta): # opted for a slightly different version using gammaincc instead. # Note that on the Wikipedia page the unnormalised \gamma function # is used, while gammainc is a normalised function - cdf = 0.5 + pt.sign(value-mu)*pt.gammainc(1.0/beta, pt.pow(z, beta)) - + cdf = 0.5 + 0.5*pt.sign(value-mu)*pt.gammainc(1.0/beta, pt.pow(z, beta)) + return pt.log(cdf) From 2d756ac637e0faa52dea81a9f1b085ef11467260 Mon Sep 17 00:00:00 2001 From: Jarle Brinchmann Date: Mon, 11 Dec 2023 09:11:19 +0000 Subject: [PATCH 6/7] Added moment test for GeneralizedGaussian --- pymc_experimental/tests/distributions/test_continuous.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pymc_experimental/tests/distributions/test_continuous.py b/pymc_experimental/tests/distributions/test_continuous.py index efd16df4..5f2dbc72 100644 --- a/pymc_experimental/tests/distributions/test_continuous.py +++ b/pymc_experimental/tests/distributions/test_continuous.py @@ -207,6 +207,13 @@ def test_logcdf(self): ) + def test_gennorm_moment(self, mu, alpha, beta, size, expected): + with pm.Model() as model: + GeneralizedNormal("x", mu=mu, alpha=alpha, beta=beta, size=size) + assert_moment_is_expected(model, expected) + + + class TestGeneralizedNormal(BaseTestDistributionRandom): pymc_dist = GeneralizedNormal pymc_dist_params = {"mu": 0, "alpha": 1, "beta": 2.0} From 5c3a1d54efc5a06bb121bfe1ea1700ff55077372 Mon Sep 17 00:00:00 2001 From: Jarle Brinchmann Date: Mon, 11 Dec 2023 09:14:32 +0000 Subject: [PATCH 7/7] Added minor fix in momen function for GeneralizedNormal --- pymc_experimental/distributions/continuous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc_experimental/distributions/continuous.py b/pymc_experimental/distributions/continuous.py index 349890dc..70eefc71 100644 --- a/pymc_experimental/distributions/continuous.py +++ b/pymc_experimental/distributions/continuous.py @@ -470,7 +470,7 @@ def dist(cls, mu, alpha, beta, **kwargs) -> RandomVariable: # moment here returns the mean def moment(rv, size, mu, alpha, beta): - moment, _ = pt.broadcast_arrays(mu, alpha, beta) + moment, *_ = pt.broadcast_arrays(mu, alpha, beta) if not rv_size_is_none(size): moment = pt.full(size, moment) return moment