diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index cd37f6e834f..00d2e7760bb 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -8,7 +8,7 @@ - The `Distribution` keyword argument `testval` has been deprecated in favor of `initval`. - `pm.sample` now returns results as `InferenceData` instead of `MultiTrace` by default (see [#4744](https://github.com/pymc-devs/pymc3/pull/4744)). - `pm.sample_prior_predictive` no longer returns transformed variable values by default. Pass them by name in `var_names` if you want to obtain these draws (see [4769](https://github.com/pymc-devs/pymc3/pull/4769)). -... +- ... ### New Features - The `CAR` distribution has been added to allow for use of conditional autoregressions which often are used in spatial and network models. @@ -20,12 +20,14 @@ - Add `logcdf` method to Kumaraswamy distribution (see [#4706](https://github.com/pymc-devs/pymc3/pull/4706)). - The `OrderedMultinomial` distribution has been added for use on ordinal data which are _aggregated_ by trial, like multinomial observations, whereas `OrderedLogistic` only accepts ordinal data in a _disaggregated_ format, like categorical observations (see [#4773](https://github.com/pymc-devs/pymc3/pull/4773)). +- ... ### Maintenance - Remove float128 dtype support (see [#4514](https://github.com/pymc-devs/pymc3/pull/4514)). - Logp method of `Uniform` and `DiscreteUniform` no longer depends on `pymc3.distributions.dist_math.bound` for proper evaluation (see [#4541](https://github.com/pymc-devs/pymc3/pull/4541)). - `Model.RV_dims` and `Model.coords` are now read-only properties. To modify the `coords` dictionary use `Model.add_coord`. Also `dims` or coordinate values that are `None` will be auto-completed (see [#4625](https://github.com/pymc-devs/pymc3/pull/4625)). - The length of `dims` in the model is now tracked symbolically through `Model.dim_lengths` (see [#4625](https://github.com/pymc-devs/pymc3/pull/4625)). +- The `incomplete_beta` function in `pymc3.distributions.dist_math` was replaced by `aesara.tensor.betainc` (see [4857](https://github.com/pymc-devs/pymc3/pull/4857)). - ... ## PyMC3 3.11.2 (14 March 2021) diff --git a/conda-envs/environment-dev-py37.yml b/conda-envs/environment-dev-py37.yml index 7e1ac0ae795..6ac749fa6a5 100644 --- a/conda-envs/environment-dev-py37.yml +++ b/conda-envs/environment-dev-py37.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: -- aesara>=2.0.9 +- aesara>=2.1.0 - arviz>=0.11.2 - cachetools>=4.2.1 - dill diff --git a/conda-envs/environment-dev-py38.yml b/conda-envs/environment-dev-py38.yml index c267d8b8541..21d4f1f7503 100644 --- a/conda-envs/environment-dev-py38.yml +++ b/conda-envs/environment-dev-py38.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: -- aesara>=2.0.9 +- aesara>=2.1.0 - arviz>=0.11.2 - cachetools>=4.2.1 - dill diff --git a/conda-envs/environment-dev-py39.yml b/conda-envs/environment-dev-py39.yml index 98b7788edc1..1f5abdd4d81 100644 --- a/conda-envs/environment-dev-py39.yml +++ b/conda-envs/environment-dev-py39.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: -- aesara>=2.0.9 +- aesara>=2.1.0 - arviz>=0.11.2 - cachetools>=4.2.1 - dill diff --git a/conda-envs/windows-environment-dev-py38.yml b/conda-envs/windows-environment-dev-py38.yml index 9f085f7d475..c3893f172b8 100644 --- a/conda-envs/windows-environment-dev-py38.yml +++ b/conda-envs/windows-environment-dev-py38.yml @@ -4,7 +4,7 @@ channels: - defaults dependencies: # base dependencies (see install guide for Windows) -- aesara>=2.0.9 +- aesara>=2.1.0 - arviz>=0.11.2 - cachetools>=4.2.1 - dill diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py index 86f51e75d4d..aeaaae413af 100644 --- a/pymc3/distributions/continuous.py +++ b/pymc3/distributions/continuous.py @@ -59,7 +59,6 @@ bound, clipped_beta_rvs, i0e, - incomplete_beta, log_i0, log_normal, logpow, @@ -1246,23 +1245,19 @@ def logcdf(value, alpha, beta): Parameters ---------- - value: numeric - Value(s) for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # incomplete_beta function can only handle scalar values (see #4342) - if np.ndim(value): - raise TypeError( - f"Beta.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) return bound( at.switch( at.lt(value, 1), - at.log(incomplete_beta(alpha, beta, value)), + at.log(at.betainc(alpha, beta, value)), 0, ), 0 <= value, @@ -1915,19 +1910,14 @@ def logcdf(value, nu, mu, sigma): Parameters ---------- - value: numeric - Value(s) for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # incomplete_beta function can only handle scalar values (see #4342) - if np.ndim(value): - raise TypeError( - f"StudentT.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - lam, sigma = get_tau_sigma(sigma=sigma) t = (value - mu) / sigma @@ -1935,7 +1925,7 @@ def logcdf(value, nu, mu, sigma): x = (t + sqrt_t2_nu) / (2.0 * sqrt_t2_nu) return bound( - at.log(incomplete_beta(nu / 2.0, nu / 2.0, x)), + at.log(at.betainc(nu / 2.0, nu / 2.0, x)), 0 < nu, 0 < sigma, 0 < lam, diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index 8efeb60bc93..9ffe3b7cce8 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -35,7 +35,6 @@ binomln, bound, factln, - incomplete_beta, log_diff_normal_cdf, logpow, normal_lccdf, @@ -145,25 +144,20 @@ def logcdf(value, n, p): Parameters ---------- - value: numeric - Value for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # incomplete_beta function can only handle scalar values (see #4342) - if np.ndim(value): - raise TypeError( - f"Binomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - value = at.floor(value) return bound( at.switch( at.lt(value, n), - at.log(incomplete_beta(n - value, value + 1, 1 - p)), + at.log(at.betainc(n - value, value + 1, 1 - p)), 0, ), 0 <= value, @@ -732,21 +726,16 @@ def logcdf(value, n, p): Parameters ---------- - value: numeric - Value for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # incomplete_beta function can only handle scalar values (see #4342) - if np.ndim(value): - raise TypeError( - f"NegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - return bound( - at.log(incomplete_beta(n, at.floor(value) + 1, p)), + at.log(at.betainc(n, at.floor(value) + 1, p)), 0 <= value, 0 < n, 0 <= p, @@ -1461,20 +1450,15 @@ def logcdf(value, psi, n, p): Parameters ---------- - value: numeric - Value for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # logcdf can only handle scalar values due to limitation in Binomial.logcdf - if np.ndim(value): - raise TypeError( - f"ZeroInflatedBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - return bound( logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(binomial, value, {}, n, p)), 0 <= value, @@ -1616,19 +1600,14 @@ def logcdf(value, psi, n, p): Parameters ---------- - value: numeric - Value for which log CDF is calculated. + value: numeric or np.ndarray or aesara.tensor + Value(s) for which log CDF is calculated. If the log CDF for multiple + values are desired the values must be provided in a numpy array or Aesara tensor. Returns ------- TensorVariable """ - # logcdf can only handle scalar values due to limitation in NegativeBinomial.logcdf - if np.ndim(value): - raise TypeError( - f"ZeroInflatedNegativeBinomial.logcdf expects a scalar value but received a {np.ndim(value)}-dimensional object." - ) - return bound( logaddexp(at.log1p(-psi), at.log(psi) + _logcdf(nbinom, value, {}, n, p)), 0 <= value, diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py index e8ea88f32a3..b4a34a05b09 100644 --- a/pymc3/distributions/dist_math.py +++ b/pymc3/distributions/dist_math.py @@ -17,18 +17,18 @@ @author: johnsalvatier """ +import warnings + import aesara import aesara.tensor as at import numpy as np import scipy.linalg import scipy.stats -from aesara import scan from aesara.compile.builders import OpFromGraph from aesara.graph.basic import Apply from aesara.graph.op import Op from aesara.scalar import UnaryScalarOp, upgrade_to_float_no_complex -from aesara.scan import until from aesara.tensor import gammaln from aesara.tensor.elemwise import Elemwise from aesara.tensor.slinalg import Cholesky @@ -433,169 +433,6 @@ def zvalue(value, sigma, mu): return (value - mu) / sigma -def incomplete_beta_cfe(a, b, x, small): - """Incomplete beta continued fraction expansions - based on Cephes library by Steve Moshier (incbet.c). - small: Choose element-wise which continued fraction expansion to use. - """ - BIG = at.constant(4.503599627370496e15, dtype="float64") - BIGINV = at.constant(2.22044604925031308085e-16, dtype="float64") - THRESH = at.constant(3.0 * np.MachAr().eps, dtype="float64") - - zero = at.constant(0.0, dtype="float64") - one = at.constant(1.0, dtype="float64") - two = at.constant(2.0, dtype="float64") - - r = one - k1 = a - k3 = a - k4 = a + one - k5 = one - k8 = a + two - - k2 = at.switch(small, a + b, b - one) - k6 = at.switch(small, b - one, a + b) - k7 = at.switch(small, k4, a + one) - k26update = at.switch(small, one, -one) - x = at.switch(small, x, x / (one - x)) - - pkm2 = zero - qkm2 = one - pkm1 = one - qkm1 = one - r = one - - def _step(i, pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r, a, b, x, small): - xk = -(x * k1 * k2) / (k3 * k4) - pk = pkm1 + pkm2 * xk - qk = qkm1 + qkm2 * xk - pkm2 = pkm1 - pkm1 = pk - qkm2 = qkm1 - qkm1 = qk - - xk = (x * k5 * k6) / (k7 * k8) - pk = pkm1 + pkm2 * xk - qk = qkm1 + qkm2 * xk - pkm2 = pkm1 - pkm1 = pk - qkm2 = qkm1 - qkm1 = qk - - old_r = r - r = at.switch(at.eq(qk, zero), r, pk / qk) - - k1 += one - k2 += k26update - k3 += two - k4 += two - k5 += one - k6 -= k26update - k7 += two - k8 += two - - big_cond = at.gt(at.abs_(qk) + at.abs_(pk), BIG) - biginv_cond = at.or_(at.lt(at.abs_(qk), BIGINV), at.lt(at.abs_(pk), BIGINV)) - - pkm2 = at.switch(big_cond, pkm2 * BIGINV, pkm2) - pkm1 = at.switch(big_cond, pkm1 * BIGINV, pkm1) - qkm2 = at.switch(big_cond, qkm2 * BIGINV, qkm2) - qkm1 = at.switch(big_cond, qkm1 * BIGINV, qkm1) - - pkm2 = at.switch(biginv_cond, pkm2 * BIG, pkm2) - pkm1 = at.switch(biginv_cond, pkm1 * BIG, pkm1) - qkm2 = at.switch(biginv_cond, qkm2 * BIG, qkm2) - qkm1 = at.switch(biginv_cond, qkm1 * BIG, qkm1) - - return ( - (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), - until(at.abs_(old_r - r) < (THRESH * at.abs_(r))), - ) - - (pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), _ = scan( - _step, - sequences=[at.arange(0, 300)], - outputs_info=[ - e - for e in at.cast((pkm1, pkm2, qkm1, qkm2, k1, k2, k3, k4, k5, k6, k7, k8, r), "float64") - ], - non_sequences=[a, b, x, small], - ) - - return r[-1] - - -def incomplete_beta_ps(a, b, value): - """Power series for incomplete beta - Use when b*x is small and value not too close to 1. - Based on Cephes library by Steve Moshier (incbet.c) - """ - one = at.constant(1, dtype="float64") - ai = one / a - u = (one - b) * value - t1 = u / (a + one) - t = u - threshold = np.MachAr().eps * ai - s = at.constant(0, dtype="float64") - - def _step(i, t, s, a, b, value): - t *= (i - b) * value / i - step = t / (a + i) - s += step - return ((t, s), until(at.abs_(step) < threshold)) - - (t, s), _ = scan( - _step, - sequences=[at.arange(2, 302)], - outputs_info=[e for e in at.cast((t, s), "float64")], - non_sequences=[a, b, value], - ) - - s = s[-1] + t1 + ai - - t = gammaln(a + b) - gammaln(a) - gammaln(b) + a * at.log(value) + at.log(s) - return at.exp(t) - - -def incomplete_beta(a, b, value): - """Incomplete beta implementation - Power series and continued fraction expansions chosen for best numerical - convergence across the board based on inputs. - """ - machep = at.constant(np.MachAr().eps, dtype="float64") - one = at.constant(1, dtype="float64") - w = one - value - - ps = incomplete_beta_ps(a, b, value) - - flip = at.gt(value, (a / (a + b))) - aa, bb = a, b - a = at.switch(flip, bb, aa) - b = at.switch(flip, aa, bb) - xc = at.switch(flip, value, w) - x = at.switch(flip, w, value) - - tps = incomplete_beta_ps(a, b, x) - tps = at.switch(at.le(tps, machep), one - machep, one - tps) - - # Choose which continued fraction expansion for best convergence. - small = at.lt(x * (a + b - 2.0) - (a - one), 0.0) - cfe = incomplete_beta_cfe(a, b, x, small) - w = at.switch(small, cfe, cfe / xc) - - # Direct incomplete beta accounting for flipped a, b. - t = at.exp( - a * at.log(x) + b * at.log(xc) + gammaln(a + b) - gammaln(a) - gammaln(b) + at.log(w / a) - ) - - t = at.switch(flip, at.switch(at.le(t, machep), one - machep, one - t), t) - return at.switch( - at.and_(flip, at.and_(at.le((b * x), one), at.le(x, 0.95))), - tps, - at.switch(at.and_(at.le(b * value, one), at.le(value, 0.95)), ps, t), - ) - - def clipped_beta_rvs(a, b, size=None, random_state=None, dtype="float64"): """Draw beta distributed random samples in the open :math:`(0, 1)` interval. @@ -672,3 +509,12 @@ def log_i0(x): + 11025.0 / (98304.0 * x ** 4.0) ), ) + + +def incomplete_beta(a, b, value): + warnings.warn( + "incomplete_beta has been deprecated. Use aesara.tensor.betainc instead.", + DeprecationWarning, + stacklevel=2, + ) + return at.betainc(a, b, value) diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py index c9389990edd..b175e1c05e1 100644 --- a/pymc3/tests/test_dist_math.py +++ b/pymc3/tests/test_dist_math.py @@ -34,6 +34,7 @@ clipped_beta_rvs, factln, i0e, + incomplete_beta, multigammaln, ) from pymc3.tests.checks import close_to @@ -262,3 +263,9 @@ def ref_multigammaln(a, b): for x in xvals: if np.all(x > 0.5 * (p - 1)): check_vals(multigammaln_, ref_multigammaln, x, p) + + +def test_incomplete_beta_deprecation(): + with pytest.warns(DeprecationWarning, match="incomplete_beta has been deprecated"): + res = incomplete_beta(3, 5, 0.5).eval() + assert np.isclose(res, at.betainc(3, 5, 0.5).eval()) diff --git a/requirements-dev.txt b/requirements-dev.txt index 22fd046502c..01f9ecc6577 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,7 @@ # This file is auto-generated by scripts/generate_pip_deps_from_conda.py, do not modify. # See that file for comments about the need/usage of each dependency. -aesara>=2.0.9 +aesara>=2.1.0 arviz>=0.11.2 cachetools>=4.2.1 dill diff --git a/requirements.txt b/requirements.txt index 70db77f661b..9a4f8b3a671 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -aesara>=2.0.9 +aesara>=2.1.0 arviz>=0.11.2 cachetools>=4.2.1 dill