From eebed5ed4613edcac472f37d6f626d6985fead8b Mon Sep 17 00:00:00 2001 From: Ricardo Date: Wed, 12 May 2021 18:15:01 +0200 Subject: [PATCH 1/6] Replace custom incomplete_beta with aesara.tensor.betainc Closes #4420 --- RELEASE-NOTES.md | 4 +- pymc3/distributions/continuous.py | 26 ++--- pymc3/distributions/discrete.py | 49 +++------ pymc3/distributions/dist_math.py | 176 ++---------------------------- requirements.txt | 2 +- 5 files changed, 37 insertions(+), 220 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index cd37f6e834f..937fafd913f 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -8,7 +8,7 @@ - The `Distribution` keyword argument `testval` has been deprecated in favor of `initval`. - `pm.sample` now returns results as `InferenceData` instead of `MultiTrace` by default (see [#4744](https://github.com/pymc-devs/pymc3/pull/4744)). - `pm.sample_prior_predictive` no longer returns transformed variable values by default. Pass them by name in `var_names` if you want to obtain these draws (see [4769](https://github.com/pymc-devs/pymc3/pull/4769)). -... +- ... ### New Features - The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models. @@ -20,12 +20,14 @@ - Add `logcdf` method to Kumaraswamy distribution (see [#4706](https://github.com/pymc-devs/pymc3/pull/4706)). - The `OrderedMultinomial` distribution has been added for use on ordinal data which are _aggregated_ by trial, like multinomial observations, whereas `OrderedLogistic` only accepts ordinal data in a _disaggregated_ format, like categorical observations (see [#4773](https://github.com/pymc-devs/pymc3/pull/4773)). +- ... ### Maintenance - Remove float128 dtype support (see [#4514](https://github.com/pymc-devs/pymc3/pull/4514)). - Logp method of `Uniform` and `DiscreteUniform` no longer depends on `pymc3.distributions.dist_math.bound` for proper evaluation (see [#4541](https://github.com/pymc-devs/pymc3/pull/4541)). - `Model.RV_dims` and `Model.coords` are now read-only properties. To modify the `coords` dictionary use `Model.add_coord`. Also `dims` or coordinate values that are `None` will be auto-completed (see [#4625](https://github.com/pymc-devs/pymc3/pull/4625)). - The length of `dims` in the model is now tracked symbolically through `Model.dim_lengths` (see [#4625](https://github.com/pymc-devs/pymc3/pull/4625)). +- The `incomplete_beta` function in `pymc3.distributions.dist_math` was replaced by `aesara.tensor.betainc` (see [4736](https://github.com/pymc-devs/pymc3/pull/4736)). - ... ## PyMC3 3.11.2 (14 March 2021) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 86f51e75d4d..aeaaae413af 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -59,7 +59,6 @@ bound, clipped_beta_rvs, i0e, - incomplete_beta, log_i0, log_normal, logpow, @@ -1246,23 +1245,19 @@ def logcdf(value, alpha, beta): Parameters ---------- - value: numeric - Value(s) for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # incomplete_beta function can only handle scalar values (see #4342) - if np.ndim(value): - raise TypeError( - f"Beta.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) return bound( at.switch( at.lt(value, 1), - at.log(incomplete_beta(alpha, beta, value)), + at.log(at.betainc(alpha, beta, value)), 0, ), 0 <= value, @@ -1915,19 +1910,14 @@ def logcdf(value, nu, mu, sigma): Parameters ---------- - value: numeric - Value(s) for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # incomplete_beta function can only handle scalar values (see #4342) - if np.ndim(value): - raise TypeError( - f"StudentT.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - lam, sigma = get_tau_sigma(sigma=sigma) t = (value - mu) / sigma @@ -1935,7 +1925,7 @@ def logcdf(value, nu, mu, sigma): x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu) return bound( - at.log(incomplete_beta(nu / 2.0, nu / 2.0, x)), + at.log(at.betainc(nu / 2.0, nu / 2.0, x)), 0 < nu, 0 < sigma, 0 < lam, diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 8efeb60bc93..9ffe3b7cce8 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -35,7 +35,6 @@ binomln, bound, factln, - incomplete_beta, log_diff_normal_cdf, logpow, normal_lccdf, @@ -145,25 +144,20 @@ def logcdf(value, n, p): Parameters ---------- - value: numeric - Value for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # incomplete_beta function can only handle scalar values (see #4342) - if np.ndim(value): - raise TypeError( - f"Binomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - value = at.floor(value) return bound( at.switch( at.lt(value, n), - at.log(incomplete_beta(n - value, value + 1, 1 - p)), + at.log(at.betainc(n - value, value + 1, 1 - p)), 0, ), 0 <= value, @@ -732,21 +726,16 @@ def logcdf(value, n, p): Parameters ---------- - value: numeric - Value for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # incomplete_beta function can only handle scalar values (see #4342) - if np.ndim(value): - raise TypeError( - f"NegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - return bound( - at.log(incomplete_beta(n, at.floor(value) + 1, p)), + at.log(at.betainc(n, at.floor(value) + 1, p)), 0 <= value, 0 < n, 0 <= p, @@ -1461,20 +1450,15 @@ def logcdf(value, psi, n, p): Parameters ---------- - value: numeric - Value for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # logcdf can only handle scalar values due to limitation in Binomial.logcdf - if np.ndim(value): - raise TypeError( - f"ZeroInflatedBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - return bound( logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(binomial, value, {}, n, p)), 0 <= value, @@ -1616,19 +1600,14 @@ def logcdf(value, psi, n, p): Parameters ---------- - value: numeric - Value for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # logcdf can only handle scalar values due to limitation in NegativeBinomial.logcdf - if np.ndim(value): - raise TypeError( - f"ZeroInflatedNegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - return bound( logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(nbinom, value, {}, n, p)), 0 <= value, diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index e8ea88f32a3..b4a34a05b09 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -17,18 +17,18 @@ @author: johnsalvatier """ +import warnings + import aesara import aesara.tensor as at import numpy as np import scipy.linalg import scipy.stats -from aesara import scan from aesara.compile.builders import OpFromGraph from aesara.graph.basic import Apply from aesara.graph.op import Op from aesara.scalar import UnaryScalarOp, upgrade_to_float_no_complex -from aesara.scan import until from aesara.tensor import gammaln from aesara.tensor.elemwise import Elemwise from aesara.tensor.slinalg import Cholesky @@ -433,169 +433,6 @@ def zvalue(value, sigma, mu): return (value - mu) / sigma -def incomplete_beta_cfe(a, b, x, small): - """Incomplete beta continued fraction expansions - based on Cephes library by Steve Moshier (incbet.c). - small: Choose element-wise which continued fraction expansion to use. - """ - BIG = at.constant(4.503599627370496e15, dtype="float64") - BIGINV = at.constant(2.22044604925031308085e-16, dtype="float64") - THRESH = at.constant(3.0 * np.MachAr().eps, dtype="float64") - - zero = at.constant(0.0, dtype="float64") - one = at.constant(1.0, dtype="float64") - two = at.constant(2.0, dtype="float64") - - r = one - k1 = a - k3 = a - k4 = a + one - k5 = one - k8 = a + two - - k2 = at.switch(small, a + b, b - one) - k6 = at.switch(small, b - one, a + b) - k7 = at.switch(small, k4, a + one) - k26update = at.switch(small, one, -one) - x = at.switch(small, x, x / (one - x)) - - pkm2 = zero - qkm2 = one - pkm1 = one - qkm1 = one - r = one - - def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r, a, b, x, small): - xk = -(x * k1 * k2) / (k3 * k4) - pk = pkm1 + pkm2 * xk - qk = qkm1 + qkm2 * xk - pkm2 = pkm1 - pkm1 = pk - qkm2 = qkm1 - qkm1 = qk - - xk = (x * k5 * k6) / (k7 * k8) - pk = pkm1 + pkm2 * xk - qk = qkm1 + qkm2 * xk - pkm2 = pkm1 - pkm1 = pk - qkm2 = qkm1 - qkm1 = qk - - old_r = r - r = at.switch(at.eq(qk, zero), r, pk / qk) - - k1 += one - k2 += k26update - k3 += two - k4 += two - k5 += one - k6 -= k26update - k7 += two - k8 += two - - big_cond = at.gt(at.abs_(qk) + at.abs_(pk), BIG) - biginv_cond = at.or_(at.lt(at.abs_(qk), BIGINV), at.lt(at.abs_(pk), BIGINV)) - - pkm2 = at.switch(big_cond, pkm2 * BIGINV, pkm2) - pkm1 = at.switch(big_cond, pkm1 * BIGINV, pkm1) - qkm2 = at.switch(big_cond, qkm2 * BIGINV, qkm2) - qkm1 = at.switch(big_cond, qkm1 * BIGINV, qkm1) - - pkm2 = at.switch(biginv_cond, pkm2 * BIG, pkm2) - pkm1 = at.switch(biginv_cond, pkm1 * BIG, pkm1) - qkm2 = at.switch(biginv_cond, qkm2 * BIG, qkm2) - qkm1 = at.switch(biginv_cond, qkm1 * BIG, qkm1) - - return ( - (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), - until(at.abs_(old_r - r) < (THRESH * at.abs_(r))), - ) - - (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan( - _step, - sequences=[at.arange(0, 300)], - outputs_info=[ - e - for e in at.cast((pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64") - ], - non_sequences=[a, b, x, small], - ) - - return r[-1] - - -def incomplete_beta_ps(a, b, value): - """Power series for incomplete beta - Use when b*x is small and value not too close to 1. - Based on Cephes library by Steve Moshier (incbet.c) - """ - one = at.constant(1, dtype="float64") - ai = one / a - u = (one - b) * value - t1 = u / (a + one) - t = u - threshold = np.MachAr().eps * ai - s = at.constant(0, dtype="float64") - - def _step(i, t, s, a, b, value): - t *= (i - b) * value / i - step = t / (a + i) - s += step - return ((t, s), until(at.abs_(step) < threshold)) - - (t, s), _ = scan( - _step, - sequences=[at.arange(2, 302)], - outputs_info=[e for e in at.cast((t, s), "float64")], - non_sequences=[a, b, value], - ) - - s = s[-1] + t1 + ai - - t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * at.log(value) + at.log(s) - return at.exp(t) - - -def incomplete_beta(a, b, value): - """Incomplete beta implementation - Power series and continued fraction expansions chosen for best numerical - convergence across the board based on inputs. - """ - machep = at.constant(np.MachAr().eps, dtype="float64") - one = at.constant(1, dtype="float64") - w = one - value - - ps = incomplete_beta_ps(a, b, value) - - flip = at.gt(value, (a / (a + b))) - aa, bb = a, b - a = at.switch(flip, bb, aa) - b = at.switch(flip, aa, bb) - xc = at.switch(flip, value, w) - x = at.switch(flip, w, value) - - tps = incomplete_beta_ps(a, b, x) - tps = at.switch(at.le(tps, machep), one - machep, one - tps) - - # Choose which continued fraction expansion for best convergence. - small = at.lt(x * (a + b - 2.0) - (a - one), 0.0) - cfe = incomplete_beta_cfe(a, b, x, small) - w = at.switch(small, cfe, cfe / xc) - - # Direct incomplete beta accounting for flipped a, b. - t = at.exp( - a * at.log(x) + b * at.log(xc) + gammaln(a + b) - gammaln(a) - gammaln(b) + at.log(w / a) - ) - - t = at.switch(flip, at.switch(at.le(t, machep), one - machep, one - t), t) - return at.switch( - at.and_(flip, at.and_(at.le((b * x), one), at.le(x, 0.95))), - tps, - at.switch(at.and_(at.le(b * value, one), at.le(value, 0.95)), ps, t), - ) - - def clipped_beta_rvs(a, b, size=None, random_state=None, dtype="float64"): """Draw beta distributed random samples in the open :math:`(0, 1)` interval. @@ -672,3 +509,12 @@ def log_i0(x): + 11025.0 / (98304.0 * x ** 4.0) ), ) + + +def incomplete_beta(a, b, value): + warnings.warn( + "incomplete_beta has been deprecated. Use aesara.tensor.betainc instead.", + DeprecationWarning, + stacklevel=2, + ) + return at.betainc(a, b, value) diff --git a/requirements.txt b/requirements.txt index 70db77f661b..9a4f8b3a671 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -aesara>=2.0.9 +aesara>=2.1.0 arviz>=0.11.2 cachetools>=4.2.1 dill From 8eae85fc90a3a29befb0fa9aa6231ecd73697db9 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 3 Jun 2021 19:28:54 +0200 Subject: [PATCH 2/6] Speedup `check_logcdf` and `check_selfconsistency_discrete_logcdf` tests Also reverts reduced test n_samples due to speed issues --- pymc3/tests/test_distributions.py | 76 +++++++++++++++++-------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 5648ae48720..53622c71409 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -36,7 +36,7 @@ import pymc3 as pm -from pymc3.aesaraf import change_rv_size, floatX, intX +from pymc3.aesaraf import floatX, intX from pymc3.distributions import ( AR1, CAR, @@ -102,9 +102,10 @@ continuous, logcdf, logp, + logpt, logpt_sum, ) -from pymc3.math import kronecker, logsumexp +from pymc3.math import kronecker from pymc3.model import Deterministic, Model, Point from pymc3.tests.helpers import select_by_precision from pymc3.vartypes import continuous_types @@ -751,6 +752,10 @@ def check_logcdf( if not skip_paramdomain_inside_edge_test: domains = paramdomains.copy() domains["value"] = domain + + model, param_vars = build_model(pymc3_dist, domain, paramdomains) + pymc3_logcdf = model.fastfn(logpt(model["value"], cdf=True)) + if decimal is None: decimal = select_by_precision(float64=6, float32=3) @@ -758,18 +763,22 @@ def check_logcdf( params = dict(pt) if skip_params_fn(params): continue - scipy_cdf = scipy_logcdf(**params) + + scipy_eval = scipy_logcdf(**params) + value = params.pop("value") - with Model() as m: - dist = pymc3_dist("y", **params) + # Update shared parameter variables in pymc3_logcdf function + for param_name, param_value in params.items(): + param_vars[param_name].set_value(param_value) + pymc3_eval = pymc3_logcdf({"value": value}) + params["value"] = value # for displaying in err_msg - with aesara.config.change_flags(on_opt_error="raise", mode=Mode("py")): - assert_almost_equal( - logcdf(dist, value).eval(), - scipy_cdf, - decimal=decimal, - err_msg=str(params), - ) + assert_almost_equal( + pymc3_eval, + scipy_eval, + decimal=decimal, + err_msg=str(params), + ) valid_value = domain.vals[0] valid_params = {param: paramdomain.vals[0] for param, paramdomain in paramdomains.items()} @@ -849,24 +858,33 @@ def check_selfconsistency_discrete_logcdf( """ Check that logcdf of discrete distributions matches sum of logps up to value """ + # This test only works for scalar random variables + assert distribution.rv_op.ndim_supp == 0 + domains = paramdomains.copy() domains["value"] = domain if decimal is None: decimal = select_by_precision(float64=6, float32=3) + + model, param_vars = build_model(distribution, domain, paramdomains) + dist_logcdf = model.fastfn(logpt(model["value"], cdf=True)) + dist_logp = model.fastfn(logpt(model["value"])) + for pt in product(domains, n_samples=n_samples): params = dict(pt) if skip_params_fn(params): continue value = params.pop("value") values = np.arange(domain.lower, value + 1) - dist = distribution.dist(**params) - # This only works for scalar random variables - assert dist.owner.op.ndim_supp == 0 - values_dist = change_rv_size(dist, values.shape) + + # Update shared parameter variables in logp/logcdf function + for param_name, param_value in params.items(): + param_vars[param_name].set_value(param_value) + with aesara.config.change_flags(mode=Mode("py")): assert_almost_equal( - logcdf(dist, value).eval(), - logsumexp(logp(values_dist, values), keepdims=False).eval(), + dist_logcdf({"value": value}), + scipy.special.logsumexp([dist_logp({"value": value}) for value in values]), decimal=decimal, err_msg=str(pt), ) @@ -1140,13 +1158,17 @@ def test_beta(self): {"alpha": Rplus, "beta": Rplus}, lambda value, alpha, beta: sp.beta.logpdf(value, alpha, beta), ) - self.check_logp(Beta, Unit, {"mu": Unit, "sigma": Rplus}, beta_mu_sigma) + self.check_logp( + Beta, + Unit, + {"mu": Unit, "sigma": Rplus}, + beta_mu_sigma, + ) self.check_logcdf( Beta, Unit, {"alpha": Rplus, "beta": Rplus}, lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta), - n_samples=10, decimal=select_by_precision(float64=5, float32=3), ) @@ -1269,20 +1291,17 @@ def scipy_mu_alpha_logcdf(value, mu, alpha): Nat, {"mu": Rplus, "alpha": Rplus}, scipy_mu_alpha_logcdf, - n_samples=5, ) self.check_logcdf( NegativeBinomial, Nat, {"p": Unit, "n": Rplus}, lambda value, p, n: sp.nbinom.logcdf(value, n, p), - n_samples=5, ) self.check_selfconsistency_discrete_logcdf( NegativeBinomial, Nat, {"mu": Rplus, "alpha": Rplus}, - n_samples=10, ) @pytest.mark.parametrize( @@ -1340,7 +1359,6 @@ def test_lognormal(self): Rplus, {"mu": R, "sigma": Rplusbig}, lambda value, mu, sigma: floatX(sp.lognorm.logpdf(value, sigma, 0, np.exp(mu))), - n_samples=5, # Just testing alternative parametrization ) self.check_logcdf( Lognormal, @@ -1353,7 +1371,6 @@ def test_lognormal(self): Rplus, {"mu": R, "sigma": Rplusbig}, lambda value, mu, sigma: sp.lognorm.logcdf(value, sigma, 0, np.exp(mu)), - n_samples=5, # Just testing alternative parametrization ) def test_t(self): @@ -1368,14 +1385,12 @@ def test_t(self): R, {"nu": Rplus, "mu": R, "sigma": Rplus}, lambda value, nu, mu, sigma: sp.t.logpdf(value, nu, mu, sigma), - n_samples=5, # Just testing alternative parametrization ) self.check_logcdf( StudentT, R, {"nu": Rplus, "mu": R, "lam": Rplus}, lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam ** -0.5), - n_samples=10, # relies on slow incomplete beta ) # TODO: reenable when PR #4736 is merged """ @@ -1384,7 +1399,6 @@ def test_t(self): R, {"nu": Rplus, "mu": R, "sigma": Rplus}, lambda value, nu, mu, sigma: sp.t.logcdf(value, nu, mu, sigma), - n_samples=5, # Just testing alternative parametrization ) """ @@ -1561,13 +1575,11 @@ def test_binomial(self): Nat, {"n": NatSmall, "p": Unit}, lambda value, n, p: sp.binom.logcdf(value, n, p), - n_samples=10, ) self.check_selfconsistency_discrete_logcdf( Binomial, Nat, {"n": NatSmall, "p": Unit}, - n_samples=10, ) @pytest.mark.xfail(reason="checkd tests has not been refactored") @@ -1769,14 +1781,12 @@ def logcdf_fn(value, psi, mu, alpha): Nat, {"psi": Unit, "mu": Rplusbig, "alpha": Rplusbig}, logcdf_fn, - n_samples=10, ) self.check_selfconsistency_discrete_logcdf( ZeroInflatedNegativeBinomial, Nat, {"psi": Unit, "mu": Rplusbig, "alpha": Rplusbig}, - n_samples=10, ) @pytest.mark.xfail(reason="Test not refactored yet") @@ -1809,14 +1819,12 @@ def logcdf_fn(value, psi, n, p): Nat, {"psi": Unit, "n": NatSmall, "p": Unit}, logcdf_fn, - n_samples=10, ) self.check_selfconsistency_discrete_logcdf( ZeroInflatedBinomial, Nat, {"n": NatSmall, "p": Unit, "psi": Unit}, - n_samples=10, ) @pytest.mark.parametrize("n", [1, 2, 3]) From 752c184e6802eee64b676fe8f48c2de7d26af4b9 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Thu, 3 Jun 2021 20:18:45 +0200 Subject: [PATCH 3/6] Float32 skipif on Beta and StudentT logcdf tests --- pymc3/tests/test_distributions.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 53622c71409..02fa6e32133 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1151,7 +1151,7 @@ def test_wald_logp_custom_points(self, value, mu, lam, phi, alpha, logp): decimals = select_by_precision(float64=6, float32=1) assert_almost_equal(model.fastlogp(pt), logp, decimal=decimals, err_msg=str(pt)) - def test_beta(self): + def test_beta_logp(self): self.check_logp( Beta, Unit, @@ -1164,12 +1164,17 @@ def test_beta(self): {"mu": Unit, "sigma": Rplus}, beta_mu_sigma, ) + + @pytest.mark.skipif( + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to numerical issues", + ) + def test_beta_logcdf(self): self.check_logcdf( Beta, Unit, {"alpha": Rplus, "beta": Rplus}, lambda value, alpha, beta: sp.beta.logcdf(value, alpha, beta), - decimal=select_by_precision(float64=5, float32=3), ) def test_kumaraswamy(self): @@ -1373,7 +1378,7 @@ def test_lognormal(self): lambda value, mu, sigma: sp.lognorm.logcdf(value, sigma, 0, np.exp(mu)), ) - def test_t(self): + def test_studentt_logp(self): self.check_logp( StudentT, R, @@ -1386,21 +1391,24 @@ def test_t(self): {"nu": Rplus, "mu": R, "sigma": Rplus}, lambda value, nu, mu, sigma: sp.t.logpdf(value, nu, mu, sigma), ) + + @pytest.mark.skipif( + condition=(aesara.config.floatX == "float32"), + reason="Fails on float32 due to numerical issues", + ) + def test_studentt_logcdf(self): self.check_logcdf( StudentT, R, {"nu": Rplus, "mu": R, "lam": Rplus}, lambda value, nu, mu, lam: sp.t.logcdf(value, nu, mu, lam ** -0.5), ) - # TODO: reenable when PR #4736 is merged - """ self.check_logcdf( StudentT, R, {"nu": Rplus, "mu": R, "sigma": Rplus}, lambda value, nu, mu, sigma: sp.t.logcdf(value, nu, mu, sigma), ) - """ def test_cauchy(self): self.check_logp( From 1db0c19eed30a3c8337b1362909a44cc6d4d2233 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 4 Jun 2021 15:34:04 +0200 Subject: [PATCH 4/6] Remove workaround for HyperGeometric logcdf failure related to aesara/issues/461 --- pymc3/tests/test_distributions.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 02fa6e32133..5af6d3da8df 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -596,7 +596,6 @@ def check_logp( n_samples=100, extra_args=None, scipy_args=None, - skip_params_fn=lambda x: False, ): """ Generic test for PyMC3 logp methods @@ -628,9 +627,6 @@ def check_logp( the pymc3 distribution logp is calculated scipy_args : Dictionary with extra arguments needed to call scipy logp method Usually the same as extra_args - skip_params_fn: Callable - A function that takes a ``dict`` of the test points and returns a - boolean indicating whether or not to perform the test. """ if decimal is None: decimal = select_by_precision(float64=6, float32=3) @@ -652,8 +648,6 @@ def logp_reference(args): domains["value"] = domain for pt in product(domains, n_samples=n_samples): pt = dict(pt) - if skip_params_fn(pt): - continue pt_d = self._model_input_dict(model, param_vars, pt) pt_logp = Point(pt_d, model=model) pt_ref = Point(pt, filter_model_vars=False, model=model) @@ -698,7 +692,6 @@ def check_logcdf( n_samples=100, skip_paramdomain_inside_edge_test=False, skip_paramdomain_outside_edge_test=False, - skip_params_fn=lambda x: False, ): """ Generic test for PyMC3 logcdf methods @@ -739,9 +732,6 @@ def check_logcdf( skip_paramdomain_outside_edge_test : Bool Whether to run test 2., which checks that pymc3 distribution logcdf returns -inf for invalid parameter values outside the supported domain edge - skip_params_fn: Callable - A function that takes a ``dict`` of the test points and returns a - boolean indicating whether or not to perform the test. Returns ------- @@ -761,9 +751,6 @@ def check_logcdf( for pt in product(domains, n_samples=n_samples): params = dict(pt) - if skip_params_fn(params): - continue - scipy_eval = scipy_logcdf(**params) value = params.pop("value") @@ -853,7 +840,6 @@ def check_selfconsistency_discrete_logcdf( paramdomains, decimal=None, n_samples=100, - skip_params_fn=lambda x: False, ): """ Check that logcdf of discrete distributions matches sum of logps up to value @@ -872,8 +858,6 @@ def check_selfconsistency_discrete_logcdf( for pt in product(domains, n_samples=n_samples): params = dict(pt) - if skip_params_fn(params): - continue value = params.pop("value") values = np.arange(domain.lower, value + 1) @@ -1256,20 +1240,17 @@ def modified_scipy_hypergeom_logcdf(value, N, k, n): Nat, {"N": NatSmall, "k": NatSmall, "n": NatSmall}, modified_scipy_hypergeom_logpmf, - skip_params_fn=lambda x: x["N"] < x["n"] or x["N"] < x["k"], ) self.check_logcdf( HyperGeometric, Nat, {"N": NatSmall, "k": NatSmall, "n": NatSmall}, modified_scipy_hypergeom_logcdf, - skip_params_fn=lambda x: x["N"] < x["n"] or x["N"] < x["k"], ) self.check_selfconsistency_discrete_logcdf( HyperGeometric, Nat, {"N": NatSmall, "k": NatSmall, "n": NatSmall}, - skip_params_fn=lambda x: x["N"] < x["n"] or x["N"] < x["k"], ) def test_negative_binomial(self): From e54729fed90f1b15aea69fb96c16ef6f2b44f9df Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 4 Jun 2021 16:08:45 +0200 Subject: [PATCH 5/6] Remove `gammainc(c)` safeguards in `logcdf` methods of `Gamma` and `InverseGamma` Closes #4467 --- pymc3/distributions/continuous.py | 13 ++----------- pymc3/tests/test_distributions.py | 10 ---------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index aeaaae413af..3f4daa35943 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -2370,13 +2370,8 @@ def logcdf(value, alpha, inv_beta): """ beta = at.inv(inv_beta) - # Avoid C-assertion when the gammainc function is called with invalid values (#4340) - safe_alpha = at.switch(at.lt(alpha, 0), 0, alpha) - safe_beta = at.switch(at.lt(beta, 0), 0, beta) - safe_value = at.switch(at.lt(value, 0), 0, value) - return bound( - at.log(at.gammainc(safe_alpha, safe_beta * safe_value)), + at.log(at.gammainc(alpha, beta * value)), 0 <= value, 0 < alpha, 0 < beta, @@ -2518,13 +2513,9 @@ def logcdf(value, alpha, beta): ------- TensorVariable """ - # Avoid C-assertion when the gammaincc function is called with invalid values (#4340) - safe_alpha = at.switch(at.lt(alpha, 0), 0, alpha) - safe_beta = at.switch(at.lt(beta, 0), 0, beta) - safe_value = at.switch(at.lt(value, 0), 0, value) return bound( - at.log(at.gammaincc(safe_alpha, safe_beta / safe_value)), + at.log(at.gammaincc(alpha, beta / value)), 0 <= value, 0 < alpha, 0 < beta, diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 5af6d3da8df..f7d402ba5c2 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -1442,15 +1442,11 @@ def test_fun(value, mu, sigma): reason="Fails on float32 due to numerical issues", ) def test_gamma_logcdf(self): - # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set - # True to avoid triggering a C-level assertion in the Aesara GammaQ function - # in gamma.c file. Can be set back to False (default) once that issue is solved self.check_logcdf( Gamma, Rplus, {"alpha": Rplusbig, "beta": Rplusbig}, lambda value, alpha, beta: sp.gamma.logcdf(value, alpha, scale=1.0 / beta), - skip_paramdomain_outside_edge_test=True, ) def test_inverse_gamma_logp(self): @@ -1460,23 +1456,17 @@ def test_inverse_gamma_logp(self): {"alpha": Rplus, "beta": Rplus}, lambda value, alpha, beta: sp.invgamma.logpdf(value, alpha, scale=beta), ) - # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set - # True to avoid triggering a C-level assertion in the Aesara GammaQ function @pytest.mark.skipif( condition=(aesara.config.floatX == "float32"), reason="Fails on float32 due to numerical issues", ) def test_inverse_gamma_logcdf(self): - # pymc-devs/aesara#224: skip_paramdomain_outside_edge_test has to be set - # True to avoid triggering a C-level assertion in the Aesara GammaQ function - # in gamma.c file. Can be set back to False (default) once that issue is solved self.check_logcdf( InverseGamma, Rplus, {"alpha": Rplus, "beta": Rplus}, lambda value, alpha, beta: sp.invgamma.logcdf(value, alpha, scale=beta), - skip_paramdomain_outside_edge_test=True, ) @pytest.mark.skipif( From 20e5f89bb43c3dfdae2965780efed87301424c08 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Fri, 4 Jun 2021 22:11:55 +0200 Subject: [PATCH 6/6] Update scipy dependency to >= 1.4.1 --- RELEASE-NOTES.md | 1 + pymc3/tests/test_distributions.py | 16 +--------------- pymc3/tests/test_distributions_random.py | 8 -------- requirements.txt | 2 +- 4 files changed, 3 insertions(+), 24 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 937fafd913f..424d077b88b 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -3,6 +3,7 @@ ## PyMC3 vNext (4.0.0) ### Breaking Changes - ⚠ Theano-PyMC has been replaced with Aesara, so all external references to `theano`, `tt`, and `pymc3.theanof` need to be replaced with `aesara`, `at`, and `pymc3.aesaraf` (see [4471](https://github.com/pymc-devs/pymc3/pull/4471)). +- ⚠ PyMC3 now requires Scipy version `>= 1.4.1` (see [4736](https://github.com/pymc-devs/pymc3/pull/4736)). - ArviZ `plots` and `stats` *wrappers* were removed. The functions are now just available by their original names (see [#4549](https://github.com/pymc-devs/pymc3/pull/4471) and `3.11.2` release notes). - The GLM submodule has been removed, please use [Bambi](https://bambinos.github.io/bambi/) instead. - The `Distribution` keyword argument `testval` has been deprecated in favor of `initval`. diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index f7d402ba5c2..5ee6bb813cb 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -29,8 +29,6 @@ from aesara.tensor.var import TensorVariable from numpy import array, inf, log from numpy.testing import assert_allclose, assert_almost_equal, assert_equal -from packaging.version import parse -from scipy import __version__ as scipy_version from scipy import integrate from scipy.special import erf, logit @@ -110,8 +108,6 @@ from pymc3.tests.helpers import select_by_precision from pymc3.vartypes import continuous_types -SCIPY_VERSION = parse(scipy_version) - def get_lkj_cases(): """ @@ -1570,29 +1566,19 @@ def test_beta_binomial_distribution(self): {"alpha": Rplus, "beta": Rplus, "n": NatSmall}, ) - @pytest.mark.skipif( - condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" - ) - def test_beta_binomial_logp(self): + def test_beta_binomial(self): self.check_logp( BetaBinomial, Nat, {"alpha": Rplus, "beta": Rplus, "n": NatSmall}, lambda value, alpha, beta, n: sp.betabinom.logpmf(value, a=alpha, b=beta, n=n), ) - - @pytest.mark.skipif( - condition=(SCIPY_VERSION < parse("1.4.0")), reason="betabinom is new in Scipy 1.4.0" - ) - def test_beta_binomial_logcdf(self): self.check_logcdf( BetaBinomial, Nat, {"alpha": Rplus, "beta": Rplus, "n": NatSmall}, lambda value, alpha, beta, n: sp.betabinom.logcdf(value, a=alpha, b=beta, n=n), ) - - def test_beta_binomial_selfconsistency(self): self.check_selfconsistency_discrete_logcdf( BetaBinomial, Nat, diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py index ea796f863e8..cd5f7fd611c 100644 --- a/pymc3/tests/test_distributions_random.py +++ b/pymc3/tests/test_distributions_random.py @@ -24,8 +24,6 @@ import scipy.stats as st from numpy.testing import assert_almost_equal, assert_array_almost_equal -from packaging.version import parse -from scipy import __version__ as scipy_version from scipy.special import expit import pymc3 as pm @@ -47,8 +45,6 @@ product, ) -SCIPY_VERSION = parse(scipy_version) - def pymc3_random( dist, @@ -1317,10 +1313,6 @@ def seeded_weibul_rng_fn(self): ] -@pytest.mark.skipif( - condition=(SCIPY_VERSION < parse("1.4.0")), - reason="betabinom is new in Scipy 1.4.0", -) class TestBetaBinomial(BaseTestDistribution): pymc_dist = pm.BetaBinomial pymc_dist_params = {"alpha": 2.0, "beta": 1.0, "n": 5} diff --git a/requirements.txt b/requirements.txt index 9a4f8b3a671..2fe08bb2f16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,5 @@ fastprogress>=0.2.0 numpy>=1.15.0 pandas>=0.24.0 patsy>=0.5.1 -scipy>=1.2.0 +scipy>=1.4.1 typing-extensions>=3.7.4