From 622058493897cc9f2c360a24d8c0de0ede9d3bef Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 1 Apr 2022 16:01:27 +0200 Subject: [PATCH 1/5] Revert "Use julia implementations for pdfs and some cdf-like functions (#113)" This reverts commit 28d732a17078c87e2fde43da97de8fae310600e0. --- Project.toml | 2 - src/distrs/beta.jl | 74 ++++------------------------------- src/distrs/binom.jl | 32 +++------------ src/distrs/chisq.jl | 29 +++++++++----- src/distrs/fdist.jl | 28 ++++++-------- src/distrs/gamma.jl | 94 ++++----------------------------------------- src/distrs/pois.jl | 20 +++------- src/distrs/tdist.jl | 3 +- test/rmath.jl | 86 +++++++++++++++-------------------------- 9 files changed, 90 insertions(+), 278 deletions(-) diff --git a/Project.toml b/Project.toml index 2f158ea..fcb2ae1 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ version = "0.9.16" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -HypergeometricFunctions = "34004b35-14d8-5ef3-9330-4cdb6864b03a" InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" IrrationalConstants = "92d709cd-6900-40b7-9082-c6be49f344b6" LogExpFunctions = "2ab3a3ac-af41-5b50-aa03-7779005ae688" @@ -14,7 +13,6 @@ SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" [compat] ChainRulesCore = "1" -HypergeometricFunctions = "0.3" InverseFunctions = "0.1" IrrationalConstants = "0.1" LogExpFunctions = "0.3.2" diff --git a/src/distrs/beta.jl b/src/distrs/beta.jl index 1e5670e..6328c4e 100644 --- a/src/distrs/beta.jl +++ b/src/distrs/beta.jl @@ -1,17 +1,14 @@ # functions related to beta distributions -using HypergeometricFunctions: _₂F₁ - # R implementations +# For pdf and logpdf we use the Julia implementation using .RFunctions: - # betapdf, - # betalogpdf, - # betacdf, - # betaccdf, - # betalogcdf, - # betalogccdf, - # betainvcdf, - # betainvccdf, + betacdf, + betaccdf, + betalogcdf, + betalogccdf, + betainvcdf, + betainvccdf, betainvlogcdf, betainvlogccdf @@ -25,60 +22,3 @@ function betalogpdf(α::T, β::T, x::T) where {T<:Real} val = xlogy(α - 1, y) + xlog1py(β - 1, -y) - logbeta(α, β) return x < 0 || x > 1 ? oftype(val, -Inf) : val end - -function betacdf(α::Real, β::Real, x::Real) - # Handle a degenerate case - if iszero(α) && β > 0 - return float(last(promote(α, β, x, x >= 0))) - end - - return first(beta_inc(α, β, clamp(x, 0, 1))) -end - -function betaccdf(α::Real, β::Real, x::Real) - # Handle a degenerate case - if iszero(α) && β > 0 - return float(last(promote(α, β, x, x < 0))) - end - - last(beta_inc(α, β, clamp(x, 0, 1))) -end - -# The log version is currently based on non-log version. When the cdf is very small we shift -# to an implementation based on the hypergeometric function ₂F₁ to avoid underflow. -function betalogcdf(α::T, β::T, x::T) where {T<:Real} - # Handle a degenerate case - if iszero(α) && β > 0 - return log(last(promote(x, x >= 0))) - end - - _x = clamp(x, 0, 1) - p, q = beta_inc(α, β, _x) - if p < floatmin(p) - # see https://dlmf.nist.gov/8.17#E7 - return -log(α) + xlogy(α, _x) + log(_₂F₁(promote(α, 1 - β, α + 1, _x)...)) - logbeta(α, β) - elseif p <= 0.7 - return log(p) - else - return log1p(-q) - end -end -betalogcdf(α::Real, β::Real, x::Real) = betalogcdf(promote(α, β, x)...) - -function betalogccdf(α::Real, β::Real, x::Real) - # Handle a degenerate case - if iszero(α) && β > 0 - return log(last(promote(α, β, x, x < 0))) - end - - p, q = beta_inc(α, β, clamp(x, 0, 1)) - if q < 0.7 - return log(q) - else - return log1p(-p) - end -end - -betainvcdf(α::Real, β::Real, p::Real) = first(beta_inc_inv(α, β, p)) - -betainvccdf(α::Real, β::Real, p::Real) = last(beta_inc_inv(β, α, p)) diff --git a/src/distrs/binom.jl b/src/distrs/binom.jl index 5cb6c8a..73f8185 100644 --- a/src/distrs/binom.jl +++ b/src/distrs/binom.jl @@ -1,18 +1,18 @@ # functions related to binomial distribution # R implementations +# For pdf and logpdf we use the Julia implementation using .RFunctions: - # binompdf, - # binomlogpdf, - # binomcdf, - # binomccdf, - # binomlogcdf, - # binomlogccdf, + binomcdf, + binomccdf, + binomlogcdf, + binomlogccdf, binominvcdf, binominvccdf, binominvlogcdf, binominvlogccdf + # Julia implementations binompdf(n::Real, p::Real, k::Real) = exp(binomlogpdf(n, p, k)) @@ -22,23 +22,3 @@ function binomlogpdf(n::T, p::T, k::T) where {T<:Real} val = min(0, betalogpdf(m + 1, n - m + 1, p) - log(n + 1)) return 0 <= k <= n && isinteger(k) ? val : oftype(val, -Inf) end - -for l in ("", "log"), compl in (false, true) - fbinom = Symbol(string("binom", l, ifelse(compl, "c", "" ), "cdf")) - fbeta = Symbol(string("beta" , l, ifelse(compl, "", "c"), "cdf")) - @eval function ($fbinom)(n::Real, p::Real, k::Real) - if isnan(k) - return last(promote(n, p, k)) - end - res = ($fbeta)(max(0, floor(k) + 1), max(0, n - floor(k)), p) - - # When p == 1 the betaccdf doesn't return the correct result - # so these cases have to be special cased - if isone(p) - newres = oftype(res, $compl ? k < n : k >= n) - return $(l === "" ? :newres : :(log(newres))) - else - return res - end - end -end diff --git a/src/distrs/chisq.jl b/src/distrs/chisq.jl index d249b2a..467fa08 100644 --- a/src/distrs/chisq.jl +++ b/src/distrs/chisq.jl @@ -1,11 +1,22 @@ # functions related to chi-square distribution -# Just use the Gamma definitions -for f in ("pdf", "logpdf", "cdf", "ccdf", "logcdf", "logccdf", "invcdf", "invccdf", "invlogcdf", "invlogccdf") - _chisqf = Symbol("chisq"*f) - _gammaf = Symbol("gamma"*f) - @eval begin - $(_chisqf)(k::Real, x::Real) = $(_chisqf)(promote(k, x)...) - $(_chisqf)(k::T, x::T) where {T<:Real} = $(_gammaf)(k/2, 2, x) - end -end +# R implementations +# For pdf and logpdf we use the Julia implementation +using .RFunctions: + chisqcdf, + chisqccdf, + chisqlogcdf, + chisqlogccdf, + chisqinvcdf, + chisqinvccdf, + chisqinvlogcdf, + chisqinvlogccdf + +# Julia implementations +# promotion ensures that we do forward e.g. `chisqpdf(::Int, ::Float32)` to +# `gammapdf(::Float32, ::Int, ::Float32)` but not `gammapdf(::Float64, ::Int, ::Float32)` +chisqpdf(k::Real, x::Real) = chisqpdf(promote(k, x)...) +chisqpdf(k::T, x::T) where {T<:Real} = gammapdf(k / 2, 2, x) + +chisqlogpdf(k::Real, x::Real) = chisqlogpdf(promote(k, x)...) +chisqlogpdf(k::T, x::T) where {T<:Real} = gammalogpdf(k / 2, 2, x) diff --git a/src/distrs/fdist.jl b/src/distrs/fdist.jl index 17dfe17..1f11c16 100644 --- a/src/distrs/fdist.jl +++ b/src/distrs/fdist.jl @@ -1,5 +1,17 @@ # functions related to F distribution +# R implementations +# For pdf and logpdf we use the Julia implementation +using .RFunctions: + fdistcdf, + fdistccdf, + fdistlogcdf, + fdistlogccdf, + fdistinvcdf, + fdistinvccdf, + fdistinvlogcdf, + fdistinvlogccdf + # Julia implementations fdistpdf(ν1::Real, ν2::Real, x::Real) = exp(fdistlogpdf(ν1, ν2, x)) @@ -11,19 +23,3 @@ function fdistlogpdf(ν1::T, ν2::T, x::T) where {T<:Real} val = (xlogy(ν1, ν1ν2) + xlogy(ν1 - 2, y) - xlogy(ν1 + ν2, 1 + ν1ν2 * y)) / 2 - logbeta(ν1 / 2, ν2 / 2) return x < 0 ? oftype(val, -Inf) : val end - -for f in ("cdf", "ccdf", "logcdf", "logccdf") - ff = Symbol("fdist"*f) - bf = Symbol("beta"*f) - @eval $ff(ν1::T, ν2::T, x::T) where {T<:Real} = $bf(ν1/2, ν2/2, inv(1 + ν2/(ν1*max(0, x)))) - @eval $ff(ν1::Real, ν2::Real, x::Real) = $ff(promote(ν1, ν2, x)...) -end -for f in ("invcdf", "invccdf", "invlogcdf", "invlogccdf") - ff = Symbol("fdist"*f) - bf = Symbol("beta"*f) - @eval function $ff(ν1::T, ν2::T, y::T) where {T<:Real} - x = $bf(ν1/2, ν2/2, y) - return x/(1 - x)*ν2/ν1 - end - @eval $ff(ν1::Real, ν2::Real, y::Real) = $ff(promote(ν1, ν2, y)...) -end diff --git a/src/distrs/gamma.jl b/src/distrs/gamma.jl index cd1dc0c..15b3975 100644 --- a/src/distrs/gamma.jl +++ b/src/distrs/gamma.jl @@ -1,17 +1,14 @@ # functions related to gamma distribution -using HypergeometricFunctions: drummond1F1 - # R implementations +# For pdf and logpdf we use the Julia implementation using .RFunctions: - # gammapdf, - # gammalogpdf, - # gammacdf, - # gammaccdf, - # gammalogcdf, - # gammalogccdf, - # gammainvcdf, - # gammainvccdf, + gammacdf, + gammaccdf, + gammalogcdf, + gammalogccdf, + gammainvcdf, + gammainvccdf, gammainvlogcdf, gammainvlogccdf @@ -25,80 +22,3 @@ function gammalogpdf(k::T, θ::T, x::T) where {T<:Real} val = -loggamma(k) + xlogy(k - 1, xθ) - log(θ) - xθ return x < 0 ? oftype(val, -Inf) : val end - -function gammacdf(k::T, θ::T, x::T) where {T<:Real} - # Handle the degenerate case - if iszero(k) - return float(last(promote(x, x >= 0))) - end - return first(gamma_inc(k, max(0, x)/θ)) -end -gammacdf(k::Real, θ::Real, x::Real) = gammacdf(map(float, promote(k, θ, x))...) - -function gammaccdf(k::T, θ::T, x::T) where {T<:Real} - # Handle the degenerate case - if iszero(k) - return float(last(promote(x, x < 0))) - end - return last(gamma_inc(k, max(0, x)/θ)) -end -gammaccdf(k::Real, θ::Real, x::Real) = gammaccdf(map(float, promote(k, θ, x))...) - -gammalogcdf(k::Real, θ::Real, x::Real) = _gammalogcdf(map(float, promote(k, θ, x))...) - -# Implemented via the non-log version. For tiny values, we recompute the result with -# loggamma. In that situation, there is little risk of significant cancellation. -function _gammalogcdf(k::Float64, θ::Float64, x::Float64) - # Handle the degenerate case - if iszero(k) - return log(x >= 0) - end - - xdθ = max(0, x)/θ - l, u = gamma_inc(k, xdθ) - if l < floatmin(Float64) && isfinite(k) && isfinite(xdθ) - return -log(k) + k*log(xdθ) - xdθ + log(drummond1F1(1.0, 1 + k, xdθ)) - loggamma(k) - elseif l < 0.7 - return log(l) - else - return log1p(-u) - end -end -function _gammalogcdf(k::T, θ::T, x::T) where {T<:Union{Float16,Float32}} - return T(_gammalogcdf(Float64(k), Float64(θ), Float64(x))) -end - -gammalogccdf(k::Real, θ::Real, x::Real) = _gammalogccdf(map(float, promote(k, θ, x))...) - -# Implemented via the non-log version. For tiny values, we recompute the result with -# loggamma. In that situation, there is little risk of significant cancellation. -function _gammalogccdf(k::Float64, θ::Float64, x::Float64) - # Handle the degenerate case - if iszero(k) - return log(x < 0) - end - - xdθ = max(0, x)/θ - l, u = gamma_inc(k, xdθ) - if u < floatmin(Float64) - return loggamma(k, xdθ) - loggamma(k) - elseif u < 0.7 - return log(u) - else - return log1p(-l) - end -end - -function _gammalogccdf(k::T, θ::T, x::T) where {T<:Union{Float16,Float32}} - return T(_gammalogccdf(Float64(k), Float64(θ), Float64(x))) -end - -function gammainvcdf(k::Real, θ::Real, p::Real) - _k, _θ, _p = promote(k, θ, p) - return _θ*gamma_inc_inv(_k, _p, 1 - _p) -end - -function gammainvccdf(k::Real, θ::Real, p::Real) - _k, _θ, _p = promote(k, θ, p) - return _θ*gamma_inc_inv(_k, 1 - _p, _p) -end diff --git a/src/distrs/pois.jl b/src/distrs/pois.jl index 0928fae..ed6192b 100644 --- a/src/distrs/pois.jl +++ b/src/distrs/pois.jl @@ -1,13 +1,12 @@ # functions related to Poisson distribution # R implementations +# For pdf and logpdf we use the Julia implementation using .RFunctions: - # poispdf, - # poislogpdf, - # poiscdf, - # poisccdf, - # poislogcdf, - # poislogccdf, + poiscdf, + poisccdf, + poislogcdf, + poislogccdf, poisinvcdf, poisinvccdf, poisinvlogcdf, @@ -21,12 +20,3 @@ function poislogpdf(λ::T, x::T) where {T <: Real} val = xlogy(x, λ) - λ - loggamma(x + 1) return x >= 0 && isinteger(x) ? val : oftype(val, -Inf) end - -# Just use the Gamma definitions -poiscdf(λ::Real, x::Real) = gammaccdf(max(0, floor(x + 1)), 1, λ) - -poisccdf(λ::Real, x::Real) = gammacdf(max(0, floor(x + 1)), 1, λ) - -poislogcdf(λ::Real, x::Real) = gammalogccdf(max(0, floor(x + 1)), 1, λ) - -poislogccdf(λ::Real, x::Real) = gammalogcdf(max(0, floor(x + 1)), 1, λ) diff --git a/src/distrs/tdist.jl b/src/distrs/tdist.jl index e6a46a3..4544efa 100644 --- a/src/distrs/tdist.jl +++ b/src/distrs/tdist.jl @@ -1,9 +1,8 @@ # functions related to student's T distribution # R implementations +# For pdf and logpdf we use the Julia implementation using .RFunctions: - # tdistpdf, - # tdistlogpdf, tdistcdf, tdistccdf, tdistlogcdf, diff --git a/test/rmath.jl b/test/rmath.jl index 0f17011..2034b3a 100644 --- a/test/rmath.jl +++ b/test/rmath.jl @@ -79,66 +79,47 @@ function rmathcomp(basename, params, X::AbstractArray, rtol) end rmath_rand = has_rand ? get_rmathfun(rand) : nothing - if has_pdf - @testset "pdf with x=$x" for x in X + for x in X + if has_pdf check_rmath(pdf, stats_pdf, rmath_pdf, params, "x", x, true, rtol) - end - @testset "logpdf with x=$x" for x in X check_rmath(logpdf, stats_logpdf, rmath_logpdf, params, "x", x, false, rtol) end - end - @testset "cdf with x=$x" for x in X check_rmath(cdf, stats_cdf, rmath_cdf, params, "x", x, true, rtol) - end - @testset "ccdf with x=$x" for x in X check_rmath(ccdf, stats_ccdf, rmath_ccdf, params, "x", x, true, rtol) - end - @testset "logcdf with x=$x" for x in X check_rmath(logcdf, stats_logcdf, rmath_logcdf, params, "x", x, false, rtol) - end - @testset "logccdf with x=$x" for x in X check_rmath(logccdf, stats_logccdf, rmath_logccdf, params, "x", x, false, rtol) - end - p = rmath_cdf.(params..., X) - cp = rmath_ccdf.(params..., X) - lp = rmath_logcdf.(params..., X) - lcp = rmath_logccdf.(params..., X) + p = rmath_cdf(params..., x) + cp = rmath_ccdf(params..., x) + lp = rmath_logcdf(params..., x) + lcp = rmath_logccdf(params..., x) - @testset "invcdf with q=$_p" for _p in p check_rmath(invcdf, stats_invcdf, rmath_invcdf, - params, "q", _p, false, rtol) - end - @testset "invccdf with q=$_p" for _p in cp + params, "q", p, false, rtol) check_rmath(invccdf, stats_invccdf, rmath_invccdf, - params, "q", _p, false, rtol) - end - @testset "invlogcdf with log(q)=$_p" for _p in lp + params, "q", cp, false, rtol) check_rmath(invlogcdf, stats_invlogcdf, rmath_invlogcdf, - params, "lq", _p, false, rtol) - end - @testset "invlogccdf with log(q)=$_p" for _p in lcp + params, "lq", lp, false, rtol) check_rmath(invlogccdf, stats_invlogccdf, rmath_invlogccdf, - params, "lq", _p, false, rtol) - end + params, "lq", lcp, false, rtol) - # make sure that rand works - if has_rand - rmath_rand(params...) + # make sure that rand works + if has_rand + rmath_rand(params...) + end end end function rmathcomp_tests(basename, configs) - @testset "$basename" begin - @testset "params: $params" for (params, data) in configs - rmathcomp(basename, params, data) - end + println("\ttesting $basename ...") + for (params, data) in configs + rmathcomp(basename, params, data) end end @@ -158,24 +139,21 @@ end ((10, 2), [0, 1]), ((10, 2), 0//1:1//100:1//1), ]) - # It is not possible to maintain a rtol of 1e-14 for the cdf when there is such a large difference - # in the magnitude of the parameters. At least not with the current parameters. Furthermore, it - # seems that Rmath is actually less accurate than we are but since we are comparing against Rmath - # we have to use rtol=1e-12 although we are probably only off by around 1e-13. - @testset "param: (1000, 2)" begin - rmathcomp( - "beta", - (1000, 2), - # We have to drop the 0.48 value since the R quantile function fails while we succeed. - # It's tested separate below. - setdiff(collect(0.0:0.01:1.0), 0.48), - 1e-12) - # Test p=0.48 separately since R fails. (It's pretty slow, though, caused by the cdf being 9.0797754e-317) - @test betainvcdf(1000, 2, betacdf(1000, 2, 0.48)) ≈ 0.48 - end - @testset "$(f)(0, 0, $x) should error" for f in (betacdf, betaccdf, betalogcdf, betalogccdf), - x in (0.0, 0.5, 1.0) - @test_throws DomainError f(0.0, 0.0, x) + + # We test the following extreme parameters separately since + # a slightly larger tolerance is needed. + # + # For `betapdf(1000, 2, 0.58)`: + # StatsFuns: 1.9419987107407202e-231 + # Rmath: 1.941998710740941e-231 + # Mathematica: 1.941998710742487e-231 + # For `betapdf(1000, 2, 0.68)`: + # StatsFuns: 1.5205049885199752e-162 + # Rmath: 1.5205049885200616e-162 + # Mathematica: 1.520504988521358e-162 + @testset "Beta(1000, 2)" begin + rmathcomp("beta", (1000, 2), setdiff(0.0:0.01:1.0, (0.58, 0.68))) + rmathcomp("beta", (1000, 2), [0.58, 0.68], 1e-12) end rmathcomp_tests("binom", [ From 3f3df85e0ec38c48b4b2705e346fb9a09f7a3dfd Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 1 Apr 2022 16:26:17 +0200 Subject: [PATCH 2/5] Fix test errors on Julia 1.0 --- test/misc.jl | 7 ++++++- test/rmath.jl | 10 ++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/test/misc.jl b/test/misc.jl index 1d5d7f0..4095e76 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -42,7 +42,12 @@ end # B₁(a, b) = B(a, b) a = rand(eltya) b = rand(eltyb) - @test logmvbeta(1, a, b) ≈ logbeta(a, b) + # Older SpecialFunctions + Julia versions require slightly larger tolerance + if VERSION < v"1.3" && promote_type(eltya, eltyb) === Float64 + @test logmvbeta(1, a, b) ≈ logbeta(a, b) rtol = 10 * sqrt(eps(Float64)) + else + @test logmvbeta(1, a, b) ≈ logbeta(a, b) + end end end diff --git a/test/rmath.jl b/test/rmath.jl index 2034b3a..809896e 100644 --- a/test/rmath.jl +++ b/test/rmath.jl @@ -151,9 +151,15 @@ end # StatsFuns: 1.5205049885199752e-162 # Rmath: 1.5205049885200616e-162 # Mathematica: 1.520504988521358e-162 + # + # With older SpecialFunctions + Julia versions a slightly larger tolerance is needed @testset "Beta(1000, 2)" begin - rmathcomp("beta", (1000, 2), setdiff(0.0:0.01:1.0, (0.58, 0.68))) - rmathcomp("beta", (1000, 2), [0.58, 0.68], 1e-12) + if VERSION < v"1.3" + rmathcomp("beta", (1000, 2), 0.0:0.01:1.0, 1e-12) + else + rmathcomp("beta", (1000, 2), setdiff(0.0:0.01:1.0, (0.58, 0.68))) + rmathcomp("beta", (1000, 2), [0.58, 0.68], 1e-12) + end end rmathcomp_tests("binom", [ From 9b869a4d1a48bdf955a112122cd4746b69ec5a8f Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 1 Apr 2022 16:27:02 +0200 Subject: [PATCH 3/5] Run CI on Julia 1.0 --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2e67df3..23241c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,6 +26,7 @@ jobs: fail-fast: false matrix: version: + - '1.0' - '1' os: - ubuntu-latest From cede7a9a8d3d43d769a08a342fb4af36c8b291d9 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Fri, 1 Apr 2022 16:27:37 +0200 Subject: [PATCH 4/5] Bump version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index fcb2ae1..5dd53e2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "StatsFuns" uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "0.9.16" +version = "0.9.17" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" From bf538f7ae79e6e3b45ae3508db6746f2a47af6a5 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Sun, 3 Apr 2022 22:41:30 +0200 Subject: [PATCH 5/5] Update Project.toml --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 5dd53e2..997e6f3 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "StatsFuns" uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "0.9.17" +version = "0.9.18" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"