From a904fb2eb005f704c6b39a40abb26f3796136c5b Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Mon, 17 May 2021 09:54:22 +0700 Subject: [PATCH 1/6] refactor kumaraswamy --- pymc3/distributions/continuous.py | 62 +++++++++--------------- pymc3/tests/test_distributions.py | 1 - pymc3/tests/test_distributions_random.py | 28 ++++++++--- 3 files changed, 45 insertions(+), 46 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 76c1ad794f3..e885684a6f4 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1271,6 +1271,22 @@ def logcdf(value, alpha, beta): ) +class KumaraswamyRV(RandomVariable): + name = "kumaraswamy" + ndim_supp = 0 + ndims_params = [0, 0] + dtype = "int64" + _print_name = ("Kumaraswamy", "\\operatorname{Kumaraswamy}") + + @classmethod + def rng_fn(cls, rng, a, b, size): + u = rng.uniform(size=size) + return (1 - (1 - u) ** (1 / b)) ** (1 / a) + + +kumaraswamy = KumaraswamyRV() + + class Kumaraswamy(UnitContinuous): r""" Kumaraswamy log-likelihood. @@ -1313,48 +1329,19 @@ class Kumaraswamy(UnitContinuous): b: float b > 0. """ + rv_op = kumaraswamy - def __init__(self, a, b, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.a = a = at.as_tensor_variable(floatX(a)) - self.b = b = at.as_tensor_variable(floatX(b)) - - ln_mean = at.log(b) + at.gammaln(1 + 1 / a) + at.gammaln(b) - at.gammaln(1 + 1 / a + b) - self.mean = at.exp(ln_mean) - ln_2nd_raw_moment = ( - at.log(b) + at.gammaln(1 + 2 / a) + at.gammaln(b) - at.gammaln(1 + 2 / a + b) - ) - self.variance = at.exp(ln_2nd_raw_moment) - self.mean ** 2 + @classmethod + def dist(cls, a, b, *args, **kwargs): + a = at.as_tensor_variable(floatX(a)) + b = at.as_tensor_variable(floatX(b)) assert_negative_support(a, "a", "Kumaraswamy") assert_negative_support(b, "b", "Kumaraswamy") - def _random(self, a, b, size=None): - u = np.random.uniform(size=size) - return (1 - (1 - u) ** (1 / b)) ** (1 / a) - - def random(self, point=None, size=None): - """ - Draw random values from Kumaraswamy distribution. + return super().dist([a, b], *args, **kwargs) - Parameters - ---------- - point: dict, optional - Dict of variable values on which random values are to be - conditioned (uses default point if not specified). - size: int, optional - Desired size of random sample (returns one sample if not - specified). - - Returns - ------- - array - """ - # a, b = draw_values([self.a, self.b], point=point, size=size) - # return generate_samples(self._random, a, b, dist_shape=self.shape, size=size) - - def logp(self, value): + def logp(value, a, b): """ Calculate log-probability of Kumaraswamy distribution at specified value. @@ -1368,9 +1355,6 @@ def logp(self, value): ------- TensorVariable """ - a = self.a - b = self.b - logp = at.log(a) + at.log(b) + (a - 1) * at.log(value) + (b - 1) * at.log(1 - value ** a) return bound(logp, value >= 0, value <= 1, a > 0, b > 0) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index bad3670df0e..0a020106c5b 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1110,7 +1110,6 @@ def test_beta(self): decimal=select_by_precision(float64=5, float32=3), ) - @pytest.mark.xfail(reason="Distribution not refactored yet") def test_kumaraswamy(self): # Scipy does not have a built-in Kumaraswamy pdf def scipy_log_pdf(value, a, b): diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index 0bf820df8b3..ed8c2e5b2e9 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -277,12 +277,6 @@ class TestWald(BaseTestCases.BaseTestCase): params = {"mu": 1.0, "lam": 1.0, "alpha": 0.0} -@pytest.mark.xfail(reason="This distribution has not been refactored for v4") -class TestKumaraswamy(BaseTestCases.BaseTestCase): - distribution = pm.Kumaraswamy - params = {"a": 1.0, "b": 1.0} - - @pytest.mark.xfail(reason="This distribution has not been refactored for v4") class TestAsymmetricLaplace(BaseTestCases.BaseTestCase): distribution = pm.AsymmetricLaplace @@ -498,6 +492,28 @@ class TestMoyal(BaseTestDistribution): ] +class TestKumaraswamy(BaseTestDistribution): + def kumaraswamy_rng_fn(self, a, b, size, uniform_rng_fct): + return (1 - (1 - uniform_rng_fct(size=size)) ** (1 / b)) ** (1 / a) + + def seeded_kumaraswamy_rng_fn(self): + uniform_rng_fct = functools.partial( + getattr(np.random.RandomState, "uniform"), self.get_random_state() + ) + return functools.partial(self.kumaraswamy_rng_fn, uniform_rng_fct=uniform_rng_fct) + + pymc_dist = pm.Kumaraswamy + pymc_dist_params = {"a": 1.0, "b": 1.0} + expected_rv_op_params = {"a": 1.0, "b": 1.0} + reference_dist_params = {"a": 1.0, "b": 1.0} + reference_dist = seeded_kumaraswamy_rng_fn + tests_to_run = [ + "check_pymc_params_match_rv_op", + "check_pymc_draws_match_reference", + "check_rv_size", + ] + + class TestStudentTLam(BaseTestDistribution): pymc_dist = pm.StudentT lam, sigma = get_tau_sigma(tau=2.0) From 0b29957a8e37ab27f4881b9e39f6cc1e886561fa Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Mon, 17 May 2021 11:00:53 +0700 Subject: [PATCH 2/6] change dtype to floatX --- pymc3/distributions/continuous.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index e885684a6f4..12445988ce3 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1275,7 +1275,7 @@ class KumaraswamyRV(RandomVariable): name = "kumaraswamy" ndim_supp = 0 ndims_params = [0, 0] - dtype = "int64" + dtype = "floatX" _print_name = ("Kumaraswamy", "\\operatorname{Kumaraswamy}") @classmethod From 84a5ff72a3a382021e469add245ada17b9820df8 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Mon, 17 May 2021 12:35:06 +0700 Subject: [PATCH 3/6] create logcdf function --- pymc3/distributions/continuous.py | 18 ++++++++++++++++++ pymc3/tests/test_distributions.py | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 12445988ce3..dc0cb4d3922 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1359,6 +1359,24 @@ def logp(value, a, b): return bound(logp, value >= 0, value <= 1, a > 0, b > 0) + def logcdf(value, a, b): + r""" + Compute the log of cumulative distribution function for the Kumaraswamy distribution + at the specified value. + + Parameters + ---------- + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for + multiple values are desired the values must be provided in a numpy + array or Aesara tensor. + + Returns + ------- + TensorVariable + """ + return bound(at.log1p(-((1 - value ** a) ** b)), value >= 0, value <= 1) + class Exponential(PositiveContinuous): r""" diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 0a020106c5b..f49f491b6a7 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1117,7 +1117,11 @@ def scipy_log_pdf(value, a, b): np.log(a) + np.log(b) + (a - 1) * np.log(value) + (b - 1) * np.log(1 - value ** a) ) + def scipy_log_cdf(value, a, b): + return np.log1p(-((1 - value ** a) ** b)) + self.check_logp(Kumaraswamy, Unit, {"a": Rplus, "b": Rplus}, scipy_log_pdf) + self.check_logcdf(Kumaraswamy, Unit, {"a": Rplus, "b": Rplus}, scipy_log_cdf) def test_exponential(self): self.check_logp( From dd5e6e6507b5e3a5b2366c4d0bd2687571158076 Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Mon, 17 May 2021 14:20:29 +0700 Subject: [PATCH 4/6] constraint on Kumaraswamy logcdf --- pymc3/distributions/continuous.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index dc0cb4d3922..528b9a4da1d 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1375,7 +1375,8 @@ def logcdf(value, a, b): ------- TensorVariable """ - return bound(at.log1p(-((1 - value ** a) ** b)), value >= 0, value <= 1) + logcdf = at.log1p(-((1 - value ** a) ** b)) + return bound(at.switch(value < 1, logcdf, 0), value >= 0, a > 0, b > 0) class Exponential(PositiveContinuous): From 4978b71fe62f01b0e3f734218e687847d4eff0ec Mon Sep 17 00:00:00 2001 From: Farhan Reynaldo Date: Mon, 17 May 2021 14:21:14 +0700 Subject: [PATCH 5/6] update release notes on Kumaraswamy logcdf method --- RELEASE-NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 493673d3387..cc4522f3839 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -9,6 +9,7 @@ ### New Features - The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models. +- Add `logcdf` method to Kumaraswamy distribution (see [#4706](https://github.com/pymc-devs/pymc3/pull/4706)). - ... ### Maintenance From 368ebd8554373cc1e3a7d2dcfa2ce9d4018e7620 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Mon, 17 May 2021 11:13:02 +0200 Subject: [PATCH 6/6] Add more numerically stable cdf expression --- pymc3/distributions/continuous.py | 2 +- pymc3/tests/test_distributions.py | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 528b9a4da1d..f22f511249f 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -1375,7 +1375,7 @@ def logcdf(value, a, b): ------- TensorVariable """ - logcdf = at.log1p(-((1 - value ** a) ** b)) + logcdf = log1mexp(-(b * at.log1p(-(value ** a)))) return bound(at.switch(value < 1, logcdf, 0), value >= 0, a > 0, b > 0) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index f49f491b6a7..e842d3f057d 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1111,17 +1111,27 @@ def test_beta(self): ) def test_kumaraswamy(self): - # Scipy does not have a built-in Kumaraswamy pdf + # Scipy does not have a built-in Kumaraswamy def scipy_log_pdf(value, a, b): return ( np.log(a) + np.log(b) + (a - 1) * np.log(value) + (b - 1) * np.log(1 - value ** a) ) def scipy_log_cdf(value, a, b): - return np.log1p(-((1 - value ** a) ** b)) + return pm.math.log1mexp_numpy(-(b * np.log1p(-(value ** a)))) - self.check_logp(Kumaraswamy, Unit, {"a": Rplus, "b": Rplus}, scipy_log_pdf) - self.check_logcdf(Kumaraswamy, Unit, {"a": Rplus, "b": Rplus}, scipy_log_cdf) + self.check_logp( + Kumaraswamy, + Unit, + {"a": Rplus, "b": Rplus}, + scipy_log_pdf, + ) + self.check_logcdf( + Kumaraswamy, + Unit, + {"a": Rplus, "b": Rplus}, + scipy_log_cdf, + ) def test_exponential(self): self.check_logp(