From f72687c58ab72d8b57342120e4c588c592dfb47e Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 17 Jun 2021 00:41:19 +0200 Subject: [PATCH 1/4] Fix `(log)(c)cdf` with `Inf`, `-Inf` and `NaN` --- src/mixtures/mixturemodel.jl | 13 +- src/truncate.jl | 107 +++------ src/univariate/continuous/betaprime.jl | 33 +-- src/univariate/continuous/chi.jl | 15 +- src/univariate/continuous/exponential.jl | 21 +- src/univariate/continuous/frechet.jl | 21 +- .../continuous/generalizedextremevalue.jl | 42 +--- .../continuous/generalizedpareto.jl | 28 +-- src/univariate/continuous/inversegamma.jl | 20 +- src/univariate/continuous/inversegaussian.jl | 82 +++---- src/univariate/continuous/pareto.jl | 16 +- src/univariate/continuous/rayleigh.jl | 7 +- src/univariate/continuous/symtriangular.jl | 42 ++-- src/univariate/continuous/triangular.jl | 26 ++- src/univariate/continuous/vonmises.jl | 6 +- src/univariate/continuous/weibull.jl | 24 +- src/univariate/discrete/betabinomial.jl | 10 + src/univariate/discrete/binomial.jl | 12 - src/univariate/discrete/categorical.jl | 17 +- src/univariate/discrete/dirac.jl | 6 +- .../discrete/discretenonparametric.jl | 66 +++--- src/univariate/discrete/discreteuniform.jl | 16 +- src/univariate/discrete/geometric.jl | 19 +- src/univariate/discrete/hypergeometric.jl | 12 - src/univariate/discrete/negativebinomial.jl | 14 +- .../discrete/noncentralhypergeometric.jl | 15 +- src/univariate/discrete/poisson.jl | 12 - src/univariate/discrete/poissonbinomial.jl | 10 + src/univariate/discrete/skellam.jl | 8 +- src/univariate/discrete/soliton.jl | 2 +- src/univariate/locationscale.jl | 12 +- src/univariates.jl | 209 +++++++++++++----- 32 files changed, 497 insertions(+), 446 deletions(-) diff --git a/src/mixtures/mixturemodel.jl b/src/mixtures/mixturemodel.jl index 93402c14d..b08cd78ce 100644 --- a/src/mixtures/mixturemodel.jl +++ b/src/mixtures/mixturemodel.jl @@ -277,15 +277,12 @@ function insupport(d::AbstractMixtureModel, x::AbstractVector) return any(insupport(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi)) end -function _cdf(d::UnivariateMixture, x::Real) +function cdf(d::UnivariateMixture, x::Real) p = probs(d) r = sum(pi * cdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi)) return r end -cdf(d::UnivariateMixture{Continuous}, x::Real) = _cdf(d, x) -cdf(d::UnivariateMixture{Discrete}, x::Integer) = _cdf(d, x) - function _mixpdf1(d::AbstractMixtureModel, x) p = probs(d) return sum(pi * pdf(component(d, i), x) for (i, pi) in enumerate(p) if !iszero(pi)) @@ -362,10 +359,8 @@ function _mixlogpdf!(r::AbstractArray, d::AbstractMixtureModel, x) return r end -pdf(d::UnivariateMixture{Continuous}, x::Real) = _mixpdf1(d, x) -pdf(d::UnivariateMixture{Discrete}, x::Int) = _mixpdf1(d, x) -logpdf(d::UnivariateMixture{Continuous}, x::Real) = _mixlogpdf1(d, x) -logpdf(d::UnivariateMixture{Discrete}, x::Int) = _mixlogpdf1(d, x) +pdf(d::UnivariateMixture, x::Real) = _mixpdf1(d, x) +logpdf(d::UnivariateMixture, x::Real) = _mixlogpdf1(d, x) _pdf!(r::AbstractArray, d::UnivariateMixture{Discrete}, x::UnitRange) = _mixpdf!(r, d, x) _pdf!(r::AbstractArray, d::UnivariateMixture, x::AbstractArray) = _mixpdf!(r, d, x) @@ -374,7 +369,7 @@ _logpdf!(r::AbstractArray, d::UnivariateMixture, x::AbstractArray) = _mixlogpdf! _pdf(d::MultivariateMixture, x::AbstractVector) = _mixpdf1(d, x) _logpdf(d::MultivariateMixture, x::AbstractVector) = _mixlogpdf1(d, x) _pdf!(r::AbstractArray, d::MultivariateMixture, x::AbstractMatrix) = _mixpdf!(r, d, x) -_lodpdf!(r::AbstractArray, d::MultivariateMixture, x::AbstractMatrix) = _mixlogpdf!(r, d, x) +_logpdf!(r::AbstractArray, d::MultivariateMixture, x::AbstractMatrix) = _mixlogpdf!(r, d, x) ## component-wise pdf and logpdf diff --git a/src/truncate.jl b/src/truncate.jl index c9157b66a..66f6bf763 100644 --- a/src/truncate.jl +++ b/src/truncate.jl @@ -91,94 +91,59 @@ end quantile(d::Truncated, p::Real) = quantile(d.untruncated, d.lcdf + p * d.tp) -function _pdf(d::Truncated, x::T) where {T<:Real} - if d.lower <= x <= d.upper - pdf(d.untruncated, x) / d.tp - else - zero(T) - end -end - -function pdf(d::Truncated{<:ContinuousUnivariateDistribution}, x::T) where {T<:Real} - _pdf(d, float(x)) -end - -function pdf(d::Truncated{D}, x::T) where {D<:DiscreteUnivariateDistribution, T<:Real} - isinteger(x) || return zero(float(T)) - _pdf(d, x) +function pdf(d::Truncated, x::Real) + result = pdf(d.untruncated, x) / d.tp + return d.lower <= x <= d.upper ? result : zero(result) end -function pdf(d::Truncated{D}, x::T) where {D<:DiscreteUnivariateDistribution, T<:Integer} - _pdf(d, float(x)) +function logpdf(d::Truncated, x::Real) + result = logpdf(d.untruncated, x) - d.logtp + return d.lower <= x <= d.upper ? result : oftype(result, -Inf) end -function _logpdf(d::Truncated, x::T) where {T<:Real} - if d.lower <= x <= d.upper - logpdf(d.untruncated, x) - d.logtp +function cdf(d::Truncated, x::Real) + result = (cdf(d.untruncated, x) - d.lcdf) / d.tp + return if x < d.lower + zero(result) + elseif x >= d.upper + one(result) else - TF = float(T) - -TF(Inf) + result end end -function logpdf(d::Truncated{D}, x::T) where {D<:DiscreteUnivariateDistribution, T<:Real} - TF = float(T) - isinteger(x) || return -TF(Inf) - return _logpdf(d, x) -end - -function logpdf(d::Truncated{D}, x::Integer) where {D<:DiscreteUnivariateDistribution} - _logpdf(d, x) -end - -function logpdf(d::Truncated{D, Continuous}, x::T) where {D<:ContinuousUnivariateDistribution, T<:Real} - _logpdf(d, x) -end - -# fallback to avoid method ambiguities -_cdf(d::Truncated, x::T) where {T<:Real} = - x <= d.lower ? zero(T) : - x >= d.upper ? one(T) : - (cdf(d.untruncated, x) - d.lcdf) / d.tp - -cdf(d::Truncated, x::Real) = _cdf(d, x) -cdf(d::Truncated, x::Integer) = _cdf(d, float(x)) # float conversion for stability - -function _logcdf(d::Truncated, x::T) where {T<:Real} - TF = float(T) - if x <= d.lower - -TF(Inf) +function logcdf(d::Truncated, x::Real) + result = logsubexp(logcdf(d.untruncated, x), log(d.lcdf)) - d.logtp + return if x < d.lower + oftype(result, -Inf) elseif x >= d.upper - zero(TF) + zero(result) else - log(cdf(d.untruncated, x) - d.lcdf) - d.logtp + result end end -logcdf(d::Truncated, x::Real) = _logcdf(d, x) -logcdf(d::Truncated, x::Integer) = _logcdf(d, x) - -_ccdf(d::Truncated, x::T) where {T<:Real} = - x <= d.lower ? one(T) : - x >= d.upper ? zero(T) : - (d.ucdf - cdf(d.untruncated, x)) / d.tp - -ccdf(d::Truncated, x::Real) = _ccdf(d, x) -ccdf(d::Truncated, x::Integer) = _ccdf(d, float(x)) - -function _logccdf(d::Truncated, x::T) where {T<:Real} - TF = float(T) - if x <= d.lower - zero(TF) - elseif x >= d.upper - -TF(Inf) +function ccdf(d::Truncated, x::Real) + result = (d.ucdf - cdf(d.untruncated, x)) / d.tp + return if x <= d.lower + one(result) + elseif x > d.upper + zero(result) else - log(d.ucdf - cdf(d.untruncated, x)) - d.logtp + result end end -logccdf(d::Truncated, x::Real) = _logccdf(d, x) -logccdf(d::Truncated, x::Integer) = _logccdf(d, x) +function logccdf(d::Truncated, x::Real) + result = logsubexp(logccdf(d.untruncated, x), log1p(-d.ucdf)) - d.logtp + return if x <= d.lower + zero(result) + elseif x > d.upper + oftype(result, -Inf) + else + result + end +end ## random number generation diff --git a/src/univariate/continuous/betaprime.jl b/src/univariate/continuous/betaprime.jl index 51918387a..e965cf674 100644 --- a/src/univariate/continuous/betaprime.jl +++ b/src/univariate/continuous/betaprime.jl @@ -85,25 +85,28 @@ end #### Evaluation -function logpdf(d::BetaPrime{T}, x::Real) where T<:Real - (α, β) = params(d) - if x < 0 - T(-Inf) - else - (α - 1) * log(x) - (α + β) * log1p(x) - logbeta(α, β) - end +function logpdf(d::BetaPrime, x::Real) + α, β = params(d) + _x = max(0, x) + z = xlogy(α - 1, _x) - (α + β) * log1p(_x) - logbeta(α, β) + return x < 0 ? oftype(z, -Inf) : z end -cdf(d::BetaPrime{T}, x::Real) where {T<:Real} = x <= 0 ? zero(T) : betacdf(d.α, d.β, x / (1 + x)) -ccdf(d::BetaPrime{T}, x::Real) where {T<:Real} = x <= 0 ? one(T) : betaccdf(d.α, d.β, x / (1 + x)) -logcdf(d::BetaPrime{T}, x::Real) where {T<:Real} = x <= 0 ? T(-Inf) : betalogcdf(d.α, d.β, x / (1 + x)) -logccdf(d::BetaPrime{T}, x::Real) where {T<:Real} = x <= 0 ? zero(T) : betalogccdf(d.α, d.β, x / (1 + x)) +function zval(::BetaPrime, x::Real) + y = max(x, 0) + z = y / (1 + y) + # map `Inf` to `Inf` (otherwise it returns `NaN`) + return isinf(x) && x > 0 ? oftype(z, Inf) : z +end +xval(::BetaPrime, z::Real) = z / (1 - z) -quantile(d::BetaPrime, p::Real) = (x = betainvcdf(d.α, d.β, p); x / (1 - x)) -cquantile(d::BetaPrime, p::Real) = (x = betainvccdf(d.α, d.β, p); x / (1 - x)) -invlogcdf(d::BetaPrime, p::Real) = (x = betainvlogcdf(d.α, d.β, p); x / (1 - x)) -invlogccdf(d::BetaPrime, p::Real) = (x = betainvlogccdf(d.α, d.β, p); x / (1 - x)) +for f in (:cdf, :ccdf, :logcdf, :logccdf) + @eval $f(d::BetaPrime, x::Real) = $f(Beta(d.α, d.β; check_args=false), zval(d, x)) +end +for f in (:quantile, :cquantile, :invlogcdf, :invlogccdf) + @eval $f(d::BetaPrime, p::Real) = xval(d, $f(Beta(d.α, d.β; check_args=false), p)) +end #### Sampling diff --git a/src/univariate/continuous/chi.jl b/src/univariate/continuous/chi.jl index ff55afab0..4fa328697 100644 --- a/src/univariate/continuous/chi.jl +++ b/src/univariate/continuous/chi.jl @@ -83,16 +83,13 @@ logpdf(d::Chi, x::Real) = (ν = d.ν; gradlogpdf(d::Chi{T}, x::Real) where {T<:Real} = x >= 0 ? (d.ν - 1) / x - x : zero(T) -cdf(d::Chi, x::Real) = chisqcdf(d.ν, x^2) -ccdf(d::Chi, x::Real) = chisqccdf(d.ν, x^2) -logcdf(d::Chi, x::Real) = chisqlogcdf(d.ν, x^2) -logccdf(d::Chi, x::Real) = chisqlogccdf(d.ν, x^2) - -quantile(d::Chi, p::Real) = sqrt(chisqinvcdf(d.ν, p)) -cquantile(d::Chi, p::Real) = sqrt(chisqinvccdf(d.ν, p)) -invlogcdf(d::Chi, p::Real) = sqrt(chisqinvlogcdf(d.ν, p)) -invlogccdf(d::Chi, p::Real) = sqrt(chisqinvlogccdf(d.ν, p)) +for f in (:cdf, :ccdf, :logcdf, :logccdf) + @eval $f(d::Chi, x::Real) = $f(Chisq(d.ν; check_args=false), max(x, 0)^2) +end +for f in (:quantile, :cquantile, :invlogcdf, :invlogccdf) + @eval $f(d::Chi, p::Real) = sqrt($f(Chisq(d.ν; check_args=false), p)) +end #### Sampling diff --git a/src/univariate/continuous/exponential.jl b/src/univariate/continuous/exponential.jl index 124103cea..6c00447db 100644 --- a/src/univariate/continuous/exponential.jl +++ b/src/univariate/continuous/exponential.jl @@ -62,19 +62,24 @@ entropy(d::Exponential{T}) where {T} = one(T) + log(d.θ) #### Evaluation -zval(d::Exponential, x::Real) = x / d.θ +zval(d::Exponential, x::Real) = max(x / d.θ, 0) xval(d::Exponential, z::Real) = z * d.θ -pdf(d::Exponential, x::Real) = (λ = rate(d); x < 0 ? zero(λ) : λ * exp(-λ * x)) -function logpdf(d::Exponential{T}, x::Real) where T<:Real +function pdf(d::Exponential, x::Real) λ = rate(d) - x < 0 ? -T(Inf) : log(λ) - λ * x + z = λ * exp(-λ * max(x, 0)) + return x < 0 ? zero(z) : z +end +function logpdf(d::Exponential, x::Real) + λ = rate(d) + z = log(λ) - λ * x + return x < 0 ? oftype(z, -Inf) : z end -cdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? -expm1(-zval(d, x)) : zero(T) -ccdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? exp(-zval(d, x)) : zero(T) -logcdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? log1mexp(-zval(d, x)) : -T(Inf) -logccdf(d::Exponential{T}, x::Real) where {T<:Real} = x > 0 ? -zval(d, x) : zero(T) +cdf(d::Exponential, x::Real) = -expm1(-zval(d, x)) +ccdf(d::Exponential, x::Real) = exp(-zval(d, x)) +logcdf(d::Exponential, x::Real) = log1mexp(-zval(d, x)) +logccdf(d::Exponential, x::Real) = -zval(d, x) quantile(d::Exponential, p::Real) = -xval(d, log1p(-p)) cquantile(d::Exponential, p::Real) = -xval(d, log(p)) diff --git a/src/univariate/continuous/frechet.jl b/src/univariate/continuous/frechet.jl index b73c49812..f50092a8c 100644 --- a/src/univariate/continuous/frechet.jl +++ b/src/univariate/continuous/frechet.jl @@ -119,15 +119,18 @@ function logpdf(d::Frechet{T}, x::Real) where T<:Real end end -cdf(d::Frechet{T}, x::Real) where {T<:Real} = x > 0 ? exp(-((d.θ / x) ^ d.α)) : zero(T) -ccdf(d::Frechet{T}, x::Real) where {T<:Real} = x > 0 ? -expm1(-((d.θ / x) ^ d.α)) : one(T) -logcdf(d::Frechet{T}, x::Real) where {T<:Real} = x > 0 ? -(d.θ / x) ^ d.α : -T(Inf) -logccdf(d::Frechet{T}, x::Real) where {T<:Real} = x > 0 ? log1mexp(-((d.θ / x) ^ d.α)) : zero(T) - -quantile(d::Frechet, p::Real) = d.θ * (-log(p)) ^ (-1 / d.α) -cquantile(d::Frechet, p::Real) = d.θ * (-log1p(-p)) ^ (-1 / d.α) -invlogcdf(d::Frechet, lp::Real) = d.θ * (-lp)^(-1 / d.α) -invlogccdf(d::Frechet, lp::Real) = d.θ * (-log1mexp(lp))^(-1 / d.α) +zval(d::Frechet, x::Real) = (d.θ / max(x, 0))^d.α +xval(d::Frechet, z::Real) = d.θ * z^(- 1 / d.α) + +cdf(d::Frechet, x::Real) = exp(- zval(d, x)) +ccdf(d::Frechet, x::Real) = -expm1(- zval(d, x)) +logcdf(d::Frechet, x::Real) = - zval(d, x) +logccdf(d::Frechet, x::Real) = log1mexp(- zval(d, x)) + +quantile(d::Frechet, p::Real) = xval(d, -log(p)) +cquantile(d::Frechet, p::Real) = xval(d, -log1p(-p)) +invlogcdf(d::Frechet, lp::Real) = xval(d, -lp) +invlogccdf(d::Frechet, lp::Real) = xval(d, -log1mexp(lp)) function gradlogpdf(d::Frechet{T}, x::Real) where T<:Real (α, θ) = params(d) diff --git a/src/univariate/continuous/generalizedextremevalue.jl b/src/univariate/continuous/generalizedextremevalue.jl index 8583f5587..9dbd1567b 100644 --- a/src/univariate/continuous/generalizedextremevalue.jl +++ b/src/univariate/continuous/generalizedextremevalue.jl @@ -219,43 +219,21 @@ function pdf(d::GeneralizedExtremeValue{T}, x::Real) where T<:Real end end -function logcdf(d::GeneralizedExtremeValue{T}, x::Real) where T<:Real - if insupport(d, x) - (μ, σ, ξ) = params(d) - - z = (x - μ) / σ # Normalise x. - if abs(ξ) < eps(one(ξ)) # ξ == 0 - return -exp(- z) - else - return - (1 + z * ξ) ^ ( -1/ξ) - end - elseif x <= minimum(d) - return -T(Inf) - else - return zero(T) - end -end - -function cdf(d::GeneralizedExtremeValue{T}, x::Real) where T<:Real - if insupport(d, x) - (μ, σ, ξ) = params(d) - - z = (x - μ) / σ # Normalise x. - if abs(ξ) < eps(one(ξ)) # ξ == 0 - t = exp(-z) - else - t = (1 + z * ξ) ^ (-1/ξ) - end - return exp(- t) - elseif x <= minimum(d) - return zero(T) +function logcdf(d::GeneralizedExtremeValue, x::Real) + μ, σ, ξ = params(d) + z = (x - μ) / σ + return if abs(ξ) < eps(one(ξ)) # ξ == 0 + -exp(- z) else - return one(T) + # y(x) = y(bound) = 0 if x is not in the support with lower/upper bound + y = max(1 + z * ξ, 0) + - y^(-1/ξ) end end +cdf(d::GeneralizedExtremeValue, x::Real) = exp(logcdf(d, x)) -logccdf(d::GeneralizedExtremeValue, x::Real) = log1p(- cdf(d, x)) ccdf(d::GeneralizedExtremeValue, x::Real) = - expm1(logcdf(d, x)) +logccdf(d::GeneralizedExtremeValue, x::Real) = log1mexp(logcdf(d, x)) #### Sampling diff --git a/src/univariate/continuous/generalizedpareto.jl b/src/univariate/continuous/generalizedpareto.jl index a63d6226c..12a6aaa6e 100644 --- a/src/univariate/continuous/generalizedpareto.jl +++ b/src/univariate/continuous/generalizedpareto.jl @@ -132,26 +132,22 @@ function logpdf(d::GeneralizedPareto{T}, x::Real) where T<:Real return p end -function logccdf(d::GeneralizedPareto{T}, x::Real) where T<:Real - (μ, σ, ξ) = params(d) - - # The logccdf is log(0) outside the support range. - p = -T(Inf) - - if x >= μ - z = (x - μ) / σ - if abs(ξ) < eps() - p = -z - elseif ξ > 0 || (ξ < 0 && x < maximum(d)) - p = (-1 / ξ) * log1p(z * ξ) - end +function logccdf(d::GeneralizedPareto, x::Real) + μ, σ, ξ = params(d) + z = max((x - μ) / σ, 0) # z(x) = z(μ) = 0 if x < μ (lower bound) + return if abs(ξ) < eps(one(ξ)) # ξ == 0 + -z + elseif ξ < 0 + # y(x) = y(μ - σ / ξ) = -1 if x > μ - σ / ξ (upper bound) + -log1p(max(z * ξ, -1)) / ξ + else + -log1p(z * ξ) / ξ end - - return p end - ccdf(d::GeneralizedPareto, x::Real) = exp(logccdf(d, x)) + cdf(d::GeneralizedPareto, x::Real) = -expm1(logccdf(d, x)) +logcdf(d::GeneralizedPareto, x::Real) = log1mexp(logccdf(d, x)) function quantile(d::GeneralizedPareto{T}, p::Real) where T<:Real (μ, σ, ξ) = params(d) diff --git a/src/univariate/continuous/inversegamma.jl b/src/univariate/continuous/inversegamma.jl index e68e33004..75e18bd50 100644 --- a/src/univariate/continuous/inversegamma.jl +++ b/src/univariate/continuous/inversegamma.jl @@ -91,15 +91,17 @@ function logpdf(d::InverseGamma, x::Real) α * log(θ) - loggamma(α) - (α + 1) * log(x) - θ / x end -cdf(d::InverseGamma, x::Real) = ccdf(d.invd, 1 / x) -ccdf(d::InverseGamma, x::Real) = cdf(d.invd, 1 / x) -logcdf(d::InverseGamma, x::Real) = logccdf(d.invd, 1 / x) -logccdf(d::InverseGamma, x::Real) = logcdf(d.invd, 1 / x) - -quantile(d::InverseGamma, p::Real) = 1 / cquantile(d.invd, p) -cquantile(d::InverseGamma, p::Real) = 1 / quantile(d.invd, p) -invlogcdf(d::InverseGamma, p::Real) = 1 / invlogccdf(d.invd, p) -invlogccdf(d::InverseGamma, p::Real) = 1 / invlogcdf(d.invd, p) +zval(::InverseGamma, x::Real) = inv(max(x, 0)) + +cdf(d::InverseGamma, x::Real) = ccdf(d.invd, zval(d, x)) +ccdf(d::InverseGamma, x::Real) = cdf(d.invd, zval(d, x)) +logcdf(d::InverseGamma, x::Real) = logccdf(d.invd, zval(d, x)) +logccdf(d::InverseGamma, x::Real) = logcdf(d.invd, zval(d, x)) + +quantile(d::InverseGamma, p::Real) = inv(cquantile(d.invd, p)) +cquantile(d::InverseGamma, p::Real) = inv(quantile(d.invd, p)) +invlogcdf(d::InverseGamma, p::Real) = inv(invlogccdf(d.invd, p)) +invlogccdf(d::InverseGamma, p::Real) = inv(invlogcdf(d.invd, p)) function mgf(d::InverseGamma{T}, t::Real) where T<:Real (a, b) = params(d) diff --git a/src/univariate/continuous/inversegaussian.jl b/src/univariate/continuous/inversegaussian.jl index 498b454f9..89a19b74e 100644 --- a/src/univariate/continuous/inversegaussian.jl +++ b/src/univariate/continuous/inversegaussian.jl @@ -92,52 +92,54 @@ function logpdf(d::InverseGaussian{T}, x::Real) where T<:Real end end -function cdf(d::InverseGaussian{T}, x::Real) where T<:Real - if x > 0 - μ, λ = params(d) - u = sqrt(λ / x) - v = x / μ - return normcdf(u * (v - 1)) + exp(2λ / μ) * normcdf(-u * (v + 1)) - else - return zero(T) - end +function cdf(d::InverseGaussian, x::Real) + μ, λ = params(d) + y = max(x, 0) + u = sqrt(λ / y) + v = y / μ + z = normcdf(u * (v - 1)) + exp(2λ / μ) * normcdf(-u * (v + 1)) + + # otherwise `NaN` is returned for `+Inf` + return isinf(x) && x > 0 ? one(z) : z end -function ccdf(d::InverseGaussian{T}, x::Real) where T<:Real - if x > 0 - μ, λ = params(d) - u = sqrt(λ / x) - v = x / μ - normccdf(u * (v - 1)) - exp(2λ / μ) * normcdf(-u * (v + 1)) - else - return one(T) - end +function ccdf(d::InverseGaussian, x::Real) + μ, λ = params(d) + y = max(x, 0) + u = sqrt(λ / y) + v = y / μ + z = normccdf(u * (v - 1)) - exp(2λ / μ) * normcdf(-u * (v + 1)) + + # otherwise `NaN` is returned for `+Inf` + return isinf(x) && x > 0 ? zero(z) : z end -function logcdf(d::InverseGaussian{T}, x::Real) where T<:Real - if x > 0 - μ, λ = params(d) - u = sqrt(λ / x) - v = x / μ - a = normlogcdf(u * (v -1)) - b = 2λ / μ + normlogcdf(-u * (v + 1)) - a + log1pexp(b - a) - else - return -T(Inf) - end +function logcdf(d::InverseGaussian, x::Real) + μ, λ = params(d) + y = max(x, 0) + u = sqrt(λ / y) + v = y / μ + + a = normlogcdf(u * (v - 1)) + b = 2λ / μ + normlogcdf(-u * (v + 1)) + z = logaddexp(a, b) + + # otherwise `NaN` is returned for `+Inf` + return isinf(x) && x > 0 ? zero(z) : z end -function logccdf(d::InverseGaussian{T}, x::Real) where T<:Real - if x > 0 - μ, λ = params(d) - u = sqrt(λ / x) - v = x / μ - a = normlogccdf(u * (v - 1)) - b = 2λ / μ + normlogcdf(-u * (v + 1)) - a + log1mexp(b - a) - else - return zero(T) - end +function logccdf(d::InverseGaussian, x::Real) + μ, λ = params(d) + y = max(x, 0) + u = sqrt(λ / y) + v = y / μ + + a = normlogccdf(u * (v - 1)) + b = 2λ / μ + normlogcdf(-u * (v + 1)) + z = logsubexp(a, b) + + # otherwise `NaN` is returned for `+Inf` + return isinf(x) && x > 0 ? oftype(z, -Inf) : z end @quantile_newton InverseGaussian diff --git a/src/univariate/continuous/pareto.jl b/src/univariate/continuous/pareto.jl index 77d158b56..00901b876 100644 --- a/src/univariate/continuous/pareto.jl +++ b/src/univariate/continuous/pareto.jl @@ -91,19 +91,17 @@ function logpdf(d::Pareto{T}, x::Real) where T<:Real x >= θ ? log(α) + α * log(θ) - (α + 1) * log(x) : -T(Inf) end -function ccdf(d::Pareto{T}, x::Real) where T<:Real - (α, θ) = params(d) - x >= θ ? (θ / x)^α : one(T) +function ccdf(d::Pareto, x::Real) + α, θ = params(d) + return (θ / max(x, θ))^α end - cdf(d::Pareto, x::Real) = 1 - ccdf(d, x) -function logccdf(d::Pareto{T}, x::Real) where T<:Real - (α, θ) = params(d) - x >= θ ? α * log(θ / x) : zero(T) +function logccdf(d::Pareto, x::Real) + α, θ = params(d) + return xlogy(α, θ / max(x, θ)) end - -logcdf(d::Pareto, x::Real) = log1p(-ccdf(d, x)) +logcdf(d::Pareto, x::Real) = log1mexp(logccdf(d, x)) cquantile(d::Pareto, p::Real) = d.θ / p^(1 / d.α) quantile(d::Pareto, p::Real) = cquantile(d, 1 - p) diff --git a/src/univariate/continuous/rayleigh.jl b/src/univariate/continuous/rayleigh.jl index 374d65ac2..0b93dcdc5 100644 --- a/src/univariate/continuous/rayleigh.jl +++ b/src/univariate/continuous/rayleigh.jl @@ -77,10 +77,13 @@ function logpdf(d::Rayleigh{T}, x::Real) where T<:Real x > 0 ? log(x / σ2) - (x^2) / (2σ2) : -T(Inf) end -logccdf(d::Rayleigh{T}, x::Real) where {T<:Real} = x > 0 ? - (x^2) / (2d.σ^2) : zero(T) +function logccdf(d::Rayleigh, x::Real) + z = - x^2 / (2 * d.σ^2) + return x > 0 ? z : isnan(x) ? oftype(z, NaN) : zero(x) +end ccdf(d::Rayleigh, x::Real) = exp(logccdf(d, x)) -cdf(d::Rayleigh, x::Real) = 1 - ccdf(d, x) +cdf(d::Rayleigh, x::Real) = -expm1(logccdf(d, x)) logcdf(d::Rayleigh, x::Real) = log1mexp(logccdf(d, x)) quantile(d::Rayleigh, p::Real) = sqrt(-2d.σ^2 * log1p(-p)) diff --git a/src/univariate/continuous/symtriangular.jl b/src/univariate/continuous/symtriangular.jl index dd12f06e5..2f15f99f7 100644 --- a/src/univariate/continuous/symtriangular.jl +++ b/src/univariate/continuous/symtriangular.jl @@ -68,42 +68,30 @@ entropy(d::SymTriangularDist) = 1//2 + log(d.σ) #### Evaluation -zval(d::SymTriangularDist, x::Real) = (x - d.μ) / d.σ +zval(d::SymTriangularDist, x::Real) = min(abs(x - d.μ) / d.σ, 1) xval(d::SymTriangularDist, z::Real) = d.μ + z * d.σ +pdf(d::SymTriangularDist, x::Real) = (1 - zval(d, x)) / scale(d) +logpdf(d::SymTriangularDist, x::Real) = log(pdf(d, x)) -pdf(d::SymTriangularDist{T}, x::Real) where {T<:Real} = insupport(d, x) ? (1 - abs(zval(d, x))) / scale(d) : zero(T) - -function logpdf(d::SymTriangularDist{T}, x::Real) where T<:Real - insupport(d, x) ? log((1 - abs(zval(d, x))) / scale(d)) : -convert(T, T(Inf)) +function cdf(d::SymTriangularDist, x::Real) + r = (1 - zval(d, x))^2/2 + return x < d.μ ? r : 1 - r end -function cdf(d::SymTriangularDist{T}, x::Real) where T<:Real - (μ, σ) = params(d) - x <= μ - σ ? zero(T) : - x <= μ ? (1 + zval(d, x))^2/2 : - x < μ + σ ? 1 - (1 - zval(d, x))^2/2 : one(T) +function ccdf(d::SymTriangularDist, x::Real) + r = (1 - zval(d, x))^2/2 + return x < d.μ ? 1 - r : r end -function ccdf(d::SymTriangularDist{T}, x::Real) where T<:Real - (μ, σ) = params(d) - x <= μ - σ ? one(T) : - x <= μ ? 1 - (1 + zval(d, x))^2/2 : - x < μ + σ ? (1 - zval(d, x))^2/2 : zero(T) +function logcdf(d::SymTriangularDist, x::Real) + log_r = 2 * log1p(- zval(d, x)) + loghalf + return x < d.μ ? log_r : log1mexp(log_r) end -function logcdf(d::SymTriangularDist{T}, x::Real) where T<:Real - (μ, σ) = params(d) - x <= μ - σ ? -T(Inf) : - x <= μ ? loghalf + 2*log1p(zval(d, x)) : - x < μ + σ ? log1p(-1/2 * (1 - zval(d, x))^2) : zero(T) -end - -function logccdf(d::SymTriangularDist{T}, x::Real) where T<:Real - (μ, σ) = params(d) - x <= μ - σ ? zero(T) : - x <= μ ? log1p(-1/2 * (1 + zval(d, x))^2) : - x < μ + σ ? loghalf + 2*log1p(-zval(d, x)) : -T(Inf) +function logccdf(d::SymTriangularDist, x::Real) + log_r = 2 * log1p(- zval(d, x)) + loghalf + return x < d.μ ? log1mexp(log_r) : log_r end quantile(d::SymTriangularDist, p::Real) = p < 1/2 ? xval(d, sqrt(2p) - 1) : diff --git a/src/univariate/continuous/triangular.jl b/src/univariate/continuous/triangular.jl index 257a20695..15b430c2a 100644 --- a/src/univariate/continuous/triangular.jl +++ b/src/univariate/continuous/triangular.jl @@ -90,21 +90,23 @@ entropy(d::TriangularDist{T}) where {T<:Real} = one(T)/2 + log((d.b - d.a) / 2) #### Evaluation -function pdf(d::TriangularDist{T}, x::Real) where T<:Real - (a, b, c) = params(d) - x <= a ? zero(T) : - x < c ? 2 * (x - a) / ((b - a) * (c - a)) : - x == c ? 2 / (b - a) : - x <= b ? 2 * (b - x) / ((b - a) * (b - c)) : zero(T) +function pdf(d::TriangularDist, x::Real) + a, b, c = params(d) + return if x < c + 2 * max(x - a, 0) / ((b - a) * (c - a)) + else + 2 * max(b - x, 0) / ((b - a) * (b - c)) + end end logpdf(d::TriangularDist, x::Real) = log(pdf(d, x)) -function cdf(d::TriangularDist{T}, x::Real) where T<:Real - (a, b, c) = params(d) - x <= a ? zero(T) : - x < c ? (x - a)^2 / ((b - a) * (c - a)) : - x == c ? (c - a) / (b - a) : - x <= b ? 1 - (b - x)^2 / ((b - a) * (b - c)) : one(T) +function cdf(d::TriangularDist, x::Real) + a, b, c = params(d) + return if x < c + max(x - a, 0)^2 / ((b - a) * (c - a)) + else + 1 - max(b - x, 0)^2 / ((b - a) * (b - c)) + end end function quantile(d::TriangularDist, p::Real) diff --git a/src/univariate/continuous/vonmises.jl b/src/univariate/continuous/vonmises.jl index 605aefc47..eb55c5b64 100644 --- a/src/univariate/continuous/vonmises.jl +++ b/src/univariate/continuous/vonmises.jl @@ -70,7 +70,11 @@ pdf(d::VonMises{T}, x::Real) where T<:Real = logpdf(d::VonMises{T}, x::Real) where T<:Real = minimum(d) ≤ x ≤ maximum(d) ? d.κ * (cos(x - d.μ) - 1) - log(d.I0κx) - log2π : -T(Inf) -cdf(d::VonMises, x::Real) = _vmcdf(d.κ, d.I0κx, x - d.μ, 1e-15) +function cdf(d::VonMises, x::Real) + # handle `±Inf` for which `sin` can't be evaluated + z = clamp(x, extrema(d)...) + return _vmcdf(d.κ, d.I0κx, z - d.μ, 1e-15) +end function _vmcdf(κ::Real, I0κx::Real, x::Real, tol::Real) tol *= exp(-κ) diff --git a/src/univariate/continuous/weibull.jl b/src/univariate/continuous/weibull.jl index 40f50533a..e36631c31 100644 --- a/src/univariate/continuous/weibull.jl +++ b/src/univariate/continuous/weibull.jl @@ -113,18 +113,18 @@ function logpdf(d::Weibull{T}, x::Real) where T<:Real end end -zv(d::Weibull, x::Real) = (x / d.θ) ^ d.α -xv(d::Weibull, z::Real) = d.θ * z ^ (1 / d.α) - -cdf(d::Weibull{T}, x::Real) where {T<:Real} = x > 0 ? -expm1(-zv(d, x)) : zero(T) -ccdf(d::Weibull{T}, x::Real) where {T<:Real} = x > 0 ? exp(-zv(d, x)) : one(T) -logcdf(d::Weibull{T}, x::Real) where {T<:Real} = x > 0 ? log1mexp(-zv(d, x)) : -T(Inf) -logccdf(d::Weibull{T}, x::Real) where {T<:Real} = x > 0 ? -zv(d, x) : zero(T) - -quantile(d::Weibull, p::Real) = xv(d, -log1p(-p)) -cquantile(d::Weibull, p::Real) = xv(d, -log(p)) -invlogcdf(d::Weibull, lp::Real) = xv(d, -log1mexp(lp)) -invlogccdf(d::Weibull, lp::Real) = xv(d, -lp) +zval(d::Weibull, x::Real) = (max(x, 0) / d.θ) ^ d.α +xval(d::Weibull, z::Real) = d.θ * z ^ (1 / d.α) + +cdf(d::Weibull, x::Real) = -expm1(- zval(d, x)) +ccdf(d::Weibull, x::Real) = exp(- zval(d, x)) +logcdf(d::Weibull, x::Real) = log1mexp(- zval(d, x)) +logccdf(d::Weibull, x::Real) = - zval(d, x) + +quantile(d::Weibull, p::Real) = xval(d, -log1p(-p)) +cquantile(d::Weibull, p::Real) = xval(d, -log(p)) +invlogcdf(d::Weibull, lp::Real) = xval(d, -log1mexp(lp)) +invlogccdf(d::Weibull, lp::Real) = xval(d, -lp) function gradlogpdf(d::Weibull{T}, x::Real) where T<:Real if insupport(Weibull, x) diff --git a/src/univariate/discrete/betabinomial.jl b/src/univariate/discrete/betabinomial.jl index f03ccf4f1..f4177632e 100644 --- a/src/univariate/discrete/betabinomial.jl +++ b/src/univariate/discrete/betabinomial.jl @@ -92,6 +92,16 @@ function logpdf(d::BetaBinomial, k::Real) return _insupport ? result : oftype(result, -Inf) end +# sum values of the probability mass function +cdf(d::BetaBinomial, k::Int) = integerunitrange_cdf(d, k) + +for f in (:cdf, :logcdf, :logccdf) + @eval begin + $f(d::BetaBinomial, k::Real) = $(Symbol(f, :_int))(d, k) + $f(d::BetaBinomial, k::Int) = $(Symbol(:integerunitrange_, f))(d, k) + end +end + entropy(d::BetaBinomial) = entropy(Categorical(pdf.(Ref(d),support(d)))) median(d::BetaBinomial) = median(Categorical(pdf.(Ref(d),support(d)))) - 1 mode(d::BetaBinomial) = argmax(pdf.(Ref(d),support(d))) - 1 diff --git a/src/univariate/discrete/binomial.jl b/src/univariate/discrete/binomial.jl index bc495bcb7..afdf89472 100644 --- a/src/univariate/discrete/binomial.jl +++ b/src/univariate/discrete/binomial.jl @@ -110,18 +110,6 @@ end @_delegate_statsfuns Binomial binom n p -function pdf(d::Binomial, x::Real) - _insupport = insupport(d, x) - s = pdf(d, _insupport ? round(Int, x) : 0) - return _insupport ? s : zero(s) -end - -function logpdf(d::Binomial, x::Real) - _insupport = insupport(d, x) - s = logpdf(d, _insupport ? round(Int, x) : 0) - return _insupport ? s : oftype(s, -Inf) -end - function rand(rng::AbstractRNG, d::Binomial) p, n = d.p, d.n if p <= 0.5 diff --git a/src/univariate/discrete/categorical.jl b/src/univariate/discrete/categorical.jl index 36e806c76..851881172 100644 --- a/src/univariate/discrete/categorical.jl +++ b/src/univariate/discrete/categorical.jl @@ -68,17 +68,12 @@ end ### Evaluation -function cdf(d::Categorical{T}, x::Int) where T<:Real - k = ncategories(d) - p = probs(d) - x < 1 && return zero(T) - x >= k && return one(T) - c = p[1] - for i = 2:x - @inbounds c += p[i] - end - return c -end +# the fallbacks are overridden by `DiscreteNonParameteric` +cdf(d::Categorical, x::Real) = cdf_int(d, x) +ccdf(d::Categorical, x::Real) = ccdf_int(d, x) + +cdf(d::Categorical, x::Int) = integerunitrange_cdf(d, x) +ccdf(d::Categorical, x::Int) = integerunitrange_ccdf(d, x) function pdf(d::Categorical, x::Real) ps = probs(d) diff --git a/src/univariate/discrete/dirac.jl b/src/univariate/discrete/dirac.jl index 108a5609b..7be41459f 100644 --- a/src/univariate/discrete/dirac.jl +++ b/src/univariate/discrete/dirac.jl @@ -42,8 +42,10 @@ entropy(d::Dirac{T}) where {T} = zero(T) pdf(d::Dirac, x::Real) = insupport(d, x) ? 1.0 : 0.0 logpdf(d::Dirac, x::Real) = insupport(d, x) ? 0.0 : -Inf -cdf(d::Dirac, x::Real) = x < d.value ? 0.0 : 1.0 -cdf(d::Dirac, x::Integer) = x < d.value ? 0.0 : 1.0 +cdf(d::Dirac, x::Real) = x < d.value ? 0.0 : isnan(x) ? NaN : 1.0 +logcdf(d::Dirac, x::Real) = x < d.value ? -Inf : isnan(x) ? NaN : 0.0 +ccdf(d::Dirac, x::Real) = x < d.value ? 1.0 : isnan(x) ? NaN : 0.0 +logccdf(d::Dirac, x::Real) = x < d.value ? 0.0 : isnan(x) ? NaN : -Inf quantile(d::Dirac{T}, p::Real) where {T} = 0 <= p <= 1 ? d.value : T(NaN) diff --git a/src/univariate/discrete/discretenonparametric.jl b/src/univariate/discrete/discretenonparametric.jl index 940166cd1..590fa54af 100644 --- a/src/univariate/discrete/discretenonparametric.jl +++ b/src/univariate/discrete/discretenonparametric.jl @@ -105,43 +105,57 @@ function pdf(d::DiscreteNonParametric, x::Real) end logpdf(d::DiscreteNonParametric, x::Real) = log(pdf(d, x)) -# Helper functions for cdf and ccdf required to fix ambiguous method -# error involving [c]cdf(::DisceteUnivariateDistribution, ::Int) -function _cdf(d::DiscreteNonParametric{T,P}, x::T) where {T,P} - x > maximum(d) && return 1.0 - s = zero(P) +function cdf(d::DiscreteNonParametric, x::Real) ps = probs(d) + P = float(eltype(ps)) + + # trivial cases + x < minimum(d) && return zero(P) + x >= maximum(d) && return one(P) + isnan(x) && return P(NaN) + + n = length(ps) stop_idx = searchsortedlast(support(d), x) - for i in 1:stop_idx - s += ps[i] + s = zero(P) + if stop_idx < div(n, 2) + @inbounds for i in 1:stop_idx + s += ps[i] + end + else + @inbounds for i in (stop_idx + 1):n + s += ps[i] + end + s = 1 - s end + return s end -cdf(d::DiscreteNonParametric{T}, x::Integer) where T = _cdf(d, convert(T, x)) -cdf(d::DiscreteNonParametric{T}, x::Real) where T = _cdf(d, convert(T, x)) -function _ccdf(d::DiscreteNonParametric{T,P}, x::T) where {T,P} - x < minimum(d) && return 1.0 - s = zero(P) +function ccdf(d::DiscreteNonParametric, x::Real) ps = probs(d) + P = float(eltype(ps)) + + # trivial cases + x < minimum(d) && return one(P) + x >= maximum(d) && return zero(P) + isnan(x) && return P(NaN) + + n = length(ps) stop_idx = searchsortedlast(support(d), x) - for i in (stop_idx+1):length(ps) - s += ps[i] + s = zero(P) + if stop_idx < div(n, 2) + @inbounds for i in 1:stop_idx + s += ps[i] + end + s = 1 - s + else + @inbounds for i in (stop_idx + 1):n + s += ps[i] + end end + return s end -ccdf(d::DiscreteNonParametric{T}, x::Integer) where T = _ccdf(d, convert(T, x)) -ccdf(d::DiscreteNonParametric{T}, x::Real) where T = _ccdf(d, convert(T, x)) - -# fix incorrect defaults -for f in (:cdf, :ccdf) - _f = Symbol(:_, f) - logf = Symbol(:log, f) - @eval begin - $logf(d::DiscreteNonParametric{T}, x::Integer) where T = log($_f(d, convert(T, x))) - $logf(d::DiscreteNonParametric{T}, x::Real) where T = log($_f(d, convert(T, x))) - end -end function quantile(d::DiscreteNonParametric, q::Real) 0 <= q <= 1 || throw(DomainError()) diff --git a/src/univariate/discrete/discreteuniform.jl b/src/univariate/discrete/discreteuniform.jl index cc04abc1b..bab5393ec 100644 --- a/src/univariate/discrete/discreteuniform.jl +++ b/src/univariate/discrete/discreteuniform.jl @@ -70,13 +70,21 @@ modes(d::DiscreteUniform) = [d.a:d.b] ### Evaluation -cdf(d::DiscreteUniform, x::Int) = (x < d.a ? 0.0 : - x > d.b ? 1.0 : - (floor(Int,x) - d.a + 1.0) * d.pv) - pdf(d::DiscreteUniform, x::Real) = insupport(d, x) ? d.pv : zero(d.pv) logpdf(d::DiscreteUniform, x::Real) = log(pdf(d, x)) +function cdf(d::DiscreteUniform, x::Int) + a = d.a + result = (x - a + 1) * d.pv + return if x < a + zero(result) + elseif x >= d.b + one(result) + else + result + end +end + quantile(d::DiscreteUniform, p::Float64) = d.a + floor(Int,p * span(d)) function mgf(d::DiscreteUniform, t::T) where {T <: Real} diff --git a/src/univariate/discrete/geometric.jl b/src/univariate/discrete/geometric.jl index fece8275a..64cdb4ba3 100644 --- a/src/univariate/discrete/geometric.jl +++ b/src/univariate/discrete/geometric.jl @@ -73,25 +73,24 @@ function logpdf(d::Geometric, x::Real) insupport(d, x) ? log(d.p) + log1p(-d.p) * x : log(zero(d.p)) end -function cdf(d::Geometric{T}, x::Int) where T<:Real - x < 0 && return zero(T) +function cdf(d::Geometric, x::Int) p = succprob(d) - n = x + 1 + n = max(x + 1, 0) p < 1/2 ? -expm1(log1p(-p)*n) : 1 - (1 - p)^n end -function ccdf(d::Geometric{T}, x::Int) where T<:Real - x < 0 && return one(T) +ccdf(d::Geometric, x::Real) = ccdf_int(d, x) +function ccdf(d::Geometric, x::Int) p = succprob(d) - n = x + 1 + n = max(x + 1, 0) p < 1/2 ? exp(log1p(-p)*n) : (1 - p)^n end -function logcdf(d::Geometric{T}, x::Int) where T<:Real - x < 0 ? -T(Inf) : log1mexp(log1p(-d.p) * (x + 1)) -end +logcdf(d::Geometric, x::Real) = logcdf_int(d, x) +logcdf(d::Geometric, x::Int) = log1mexp(log1p(-d.p) * max(x + 1, 0)) -logccdf(d::Geometric, x::Int) = x < 0 ? zero(d.p) : log1p(-d.p) * (x + 1) +logccdf(d::Geometric, x::Real) = logccdf_int(d, x) +logccdf(d::Geometric, x::Int) = log1p(-d.p) * max(x + 1, 0) quantile(d::Geometric, p::Real) = invlogccdf(d, log1p(-p)) diff --git a/src/univariate/discrete/hypergeometric.jl b/src/univariate/discrete/hypergeometric.jl index 39f1a41bf..fe4b6ada0 100644 --- a/src/univariate/discrete/hypergeometric.jl +++ b/src/univariate/discrete/hypergeometric.jl @@ -79,18 +79,6 @@ entropy(d::Hypergeometric) = entropy(pdf.(Ref(d), support(d))) @_delegate_statsfuns Hypergeometric hyper ns nf n -function pdf(d::Hypergeometric, x::Real) - _insupport = insupport(d, x) - s = pdf(d, _insupport ? round(Int, x) : 0) - return _insupport ? s : zero(s) -end - -function logpdf(d::Hypergeometric, x::Real) - _insupport = insupport(d, x) - s = logpdf(d, _insupport ? round(Int, x) : 0) - return _insupport ? s : oftype(s, -Inf) -end - ## sampling # TODO: remove RFunctions dependency. Implement: diff --git a/src/univariate/discrete/negativebinomial.jl b/src/univariate/discrete/negativebinomial.jl index 7809f3691..2b0be1f58 100644 --- a/src/univariate/discrete/negativebinomial.jl +++ b/src/univariate/discrete/negativebinomial.jl @@ -100,13 +100,13 @@ function logpdf(d::NegativeBinomial, k::Real) end # cdf and quantile functions are more involved so we still rely on Rmath -cdf( d::NegativeBinomial, x::Int) = nbinomcdf( d.r, d.p, x) -ccdf( d::NegativeBinomial, x::Int) = nbinomccdf( d.r, d.p, x) -logcdf( d::NegativeBinomial, x::Int) = nbinomlogcdf( d.r, d.p, x) -logccdf( d::NegativeBinomial, x::Int) = nbinomlogccdf( d.r, d.p, x) -quantile( d::NegativeBinomial, q::Real) = convert(Int, nbinominvcdf( d.r, d.p, q)) -cquantile( d::NegativeBinomial, q::Real) = convert(Int, nbinominvccdf( d.r, d.p, q)) -invlogcdf( d::NegativeBinomial, lq::Real) = convert(Int, nbinominvlogcdf( d.r, d.p, lq)) +cdf(d::NegativeBinomial, x::Real) = nbinomcdf(d.r, d.p, x) +ccdf(d::NegativeBinomial, x::Real) = nbinomccdf(d.r, d.p, x) +logcdf(d::NegativeBinomial, x::Real) = nbinomlogcdf(d.r, d.p, x) +logccdf(d::NegativeBinomial, x::Real) = nbinomlogccdf(d.r, d.p, x) +quantile(d::NegativeBinomial, q::Real) = convert(Int, nbinominvcdf(d.r, d.p, q)) +cquantile(d::NegativeBinomial, q::Real) = convert(Int, nbinominvccdf(d.r, d.p, q)) +invlogcdf(d::NegativeBinomial, lq::Real) = convert(Int, nbinominvlogcdf(d.r, d.p, lq)) invlogccdf(d::NegativeBinomial, lq::Real) = convert(Int, nbinominvlogccdf(d.r, d.p, lq)) ## sampling diff --git a/src/univariate/discrete/noncentralhypergeometric.jl b/src/univariate/discrete/noncentralhypergeometric.jl index 9e25939da..04abab549 100644 --- a/src/univariate/discrete/noncentralhypergeometric.jl +++ b/src/univariate/discrete/noncentralhypergeometric.jl @@ -119,7 +119,7 @@ function pdf(d::FisherNoncentralHypergeometric, k::Integer) return fₖ/s end -logpdf(d::FisherNoncentralHypergeometric, k::Integer) = log(pdf(d, k)) +logpdf(d::FisherNoncentralHypergeometric, k::Real) = log(pdf(d, k)) function cdf(d::FisherNoncentralHypergeometric, k::Integer) ω, _ = promote(d.ω, float(k)) @@ -167,8 +167,6 @@ function cdf(d::FisherNoncentralHypergeometric, k::Integer) return Fₖ/s end -logcdf(d::FisherNoncentralHypergeometric, k::Integer) = log(cdf(d, k)) - function _expectation(f, d::FisherNoncentralHypergeometric) ω = float(d.ω) l = max(0, d.n - d.nf) @@ -259,3 +257,14 @@ function logpdf(d::WalleniusNoncentralHypergeometric, k::Real) return log(zero(k)) end end + +cdf(d::WalleniusNoncentralHypergeometric, k::Integer) = integerunitrange_cdf(d, k) + +for f in (:ccdf, :logcdf, :logccdf) + @eval begin + $f(d::WalleniusNoncentralHypergeometric, k::Real) = $(Symbol(f, :_int))(d, k) + function $f(d::WalleniusNoncentralHypergeometric, k::Integer) + return $(Symbol(:integerunitrange_, f))(d, k) + end + end +end diff --git a/src/univariate/discrete/poisson.jl b/src/univariate/discrete/poisson.jl index d971aee4e..edce7dc0d 100644 --- a/src/univariate/discrete/poisson.jl +++ b/src/univariate/discrete/poisson.jl @@ -89,18 +89,6 @@ end @_delegate_statsfuns Poisson pois λ -function pdf(d::Poisson, x::Real) - _insupport = insupport(d, x) - s = pdf(d, _insupport ? round(Int, x) : 0) - return _insupport ? s : zero(s) -end - -function logpdf(d::Poisson, x::Real) - _insupport = insupport(d, x) - s = logpdf(d, _insupport ? round(Int, x) : 0) - return _insupport ? s : oftype(s, -Inf) -end - function mgf(d::Poisson, t::Real) λ = rate(d) return exp(λ * (exp(t) - 1)) diff --git a/src/univariate/discrete/poissonbinomial.jl b/src/univariate/discrete/poissonbinomial.jl index 841238de5..a8ed3c0fe 100644 --- a/src/univariate/discrete/poissonbinomial.jl +++ b/src/univariate/discrete/poissonbinomial.jl @@ -126,6 +126,16 @@ end pdf(d::PoissonBinomial, k::Real) = insupport(d, k) ? d.pmf[Int(k+1)] : zero(eltype(d.pmf)) logpdf(d::PoissonBinomial, k::Real) = log(pdf(d, k)) +cdf(d::PoissonBinomial, k::Int) = integerunitrange_cdf(d, k) + +# leads to numerically more accurate results +for f in (:ccdf, :logcdf, :logccdf) + @eval begin + $f(d::PoissonBinomial, k::Real) = $(Symbol(f, :_int))(d, k) + $f(d::PoissonBinomial, k::Int) = $(Symbol(:integerunitrange_, f))(d, k) + end +end + # Computes the pdf of a poisson-binomial random variable using # simple, fast recursive formula # diff --git a/src/univariate/discrete/skellam.jl b/src/univariate/discrete/skellam.jl index 72fb085cc..c352f3835 100644 --- a/src/univariate/discrete/skellam.jl +++ b/src/univariate/discrete/skellam.jl @@ -100,11 +100,13 @@ Computing cdf of the Skellam distribution. """ function cdf(d::Skellam, t::Integer) μ1, μ2 = params(d) - (t < 0) ? nchisqcdf(-2*t, 2*μ1, 2*μ2) : 1.0 - nchisqcdf(2*(t+1), 2*μ2, 2*μ1) + return if t < 0 + nchisqcdf(-2*t, 2*μ1, 2*μ2) + else + 1 - nchisqcdf(2*(t+1), 2*μ2, 2*μ1) + end end -cdf(d::Skellam, t::Real) = cdf(d, floor(Int, t)) - #### Sampling rand(rng::AbstractRNG, d::Skellam) = rand(rng, Poisson(d.μ1)) - rand(rng, Poisson(d.μ2)) diff --git a/src/univariate/discrete/soliton.jl b/src/univariate/discrete/soliton.jl index 35b87516c..f13d9e3df 100644 --- a/src/univariate/discrete/soliton.jl +++ b/src/univariate/discrete/soliton.jl @@ -97,7 +97,7 @@ function pdf(Ω::Soliton, i::Real) end logpdf(Ω::Soliton, i::Real) = log(pdf(Ω, i)) -function Distributions.cdf(Ω::Soliton, i::Integer) +function cdf(Ω::Soliton, i::Integer) i < Ω.degrees[1] && return 0.0 i > Ω.degrees[end] && return 1.0 j = searchsortedfirst(Ω.degrees, i) diff --git a/src/univariate/locationscale.jl b/src/univariate/locationscale.jl index 24eae55e8..ba61c8d90 100644 --- a/src/univariate/locationscale.jl +++ b/src/univariate/locationscale.jl @@ -103,21 +103,14 @@ mgf(d::LocationScale,t::Real) = exp(d.μ*t) * mgf(d.ρ,d.σ*t) #### Evaluation & Sampling -pdf(d::ContinuousLocationScale,x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ +pdf(d::ContinuousLocationScale, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) / d.σ pdf(d::DiscreteLocationScale, x::Real) = pdf(d.ρ,(x-d.μ)/d.σ) logpdf(d::ContinuousLocationScale,x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) - log(d.σ) logpdf(d::DiscreteLocationScale, x::Real) = logpdf(d.ρ,(x-d.μ)/d.σ) -# additional definitions are required to fix ambiguity errors and incorrect defaults for f in (:cdf, :ccdf, :logcdf, :logccdf) - _f = Symbol(:_, f) - @eval begin - $f(d::LocationScale, x::Real) = $_f(d, x) - $f(d::DiscreteLocationScale, x::Real) = $_f(d, x) - $f(d::DiscreteLocationScale, x::Integer) = $_f(d, x) - $_f(d::LocationScale, x::Real) = $f(d.ρ, (x - d.μ) / d.σ) - end + @eval $f(d::LocationScale, x::Real) = $f(d.ρ, (x - d.μ) / d.σ) end quantile(d::LocationScale,q::Real) = d.μ + d.σ * quantile(d.ρ,q) @@ -134,4 +127,3 @@ Base.:*(x::Real, d::UnivariateDistribution) = LocationScale(zero(x), x, d) Base.:*(d::UnivariateDistribution, x::Real) = x * d Base.:-(d::UnivariateDistribution, x::Real) = d + -x Base.:/(d::UnivariateDistribution, x::Real) = inv(x) * d - diff --git a/src/univariates.jl b/src/univariates.jl index 833cb9e88..2b52c3007 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -20,6 +20,12 @@ isupperbounded(d::Union{D,Type{D}}) where {D<:UnivariateDistribution} = maximum( hasfinitesupport(d::Union{D,Type{D}}) where {D<:DiscreteUnivariateDistribution} = isbounded(d) hasfinitesupport(d::Union{D,Type{D}}) where {D<:ContinuousUnivariateDistribution} = false +struct HasNonIntegerSupport end +struct HasIntegerSupport end +struct HasIntegerUnitrangeSupport end + +supporttype(::UnivariateDistribution) = HasNonIntegerSupport() + """ params(d::UnivariateDistribution) @@ -338,43 +344,19 @@ Evaluate the cumulative probability at `x`. See also [`ccdf`](@ref), [`logcdf`](@ref), and [`logccdf`](@ref). """ cdf(d::UnivariateDistribution, x::Real) -cdf(d::DiscreteUnivariateDistribution, x::Integer) = cdf(d, x, FiniteSupport{hasfinitesupport(d)}) - -# Discrete univariate with infinite support -function cdf(d::DiscreteUnivariateDistribution, x::Integer, ::Type{FiniteSupport{false}}) - c = 0.0 - for y = minimum(d):x - c += pdf(d, y) - end - return c -end - -# Discrete univariate with finite support -function cdf(d::DiscreteUnivariateDistribution, x::Integer, ::Type{FiniteSupport{true}}) - # calculate from left if x < (min + max)/2 - # (same as infinite support version) - x <= div(minimum(d) + maximum(d),2) && return cdf(d, x, FiniteSupport{false}) - - # otherwise, calculate from the right - c = 1.0 - for y = x+1:maximum(d) - c -= pdf(d, y) - end - return c -end - -cdf(d::DiscreteUnivariateDistribution, x::Real) = cdf(d, floor(Int,x)) -cdf(d::ContinuousUnivariateDistribution, x::Real) = throw(MethodError(cdf, (d, x))) +# fallback for discrete distribution: +# base computation on `cdf(d, floor(Int, x))` and handle `NaN` and `±Inf` +# this is only correct for distributions with integer-valued support but will error if +# `cdf(d, ::Int)` is not defined (so it should not return incorrect values silently) +cdf(d::DiscreteUnivariateDistribution, x::Real) = cdf_int(d, x) """ ccdf(d::UnivariateDistribution, x::Real) The complementary cumulative function evaluated at `x`, i.e. `1 - cdf(d, x)`. """ -ccdf(d::UnivariateDistribution, x::Real) = 1.0 - cdf(d, x) -ccdf(d::DiscreteUnivariateDistribution, x::Integer) = 1.0 - cdf(d, x) -ccdf(d::DiscreteUnivariateDistribution, x::Real) = ccdf(d, floor(Int,x)) +ccdf(d::UnivariateDistribution, x::Real) = 1 - cdf(d, x) """ logcdf(d::UnivariateDistribution, x::Real) @@ -382,8 +364,6 @@ ccdf(d::DiscreteUnivariateDistribution, x::Real) = ccdf(d, floor(Int,x)) The logarithm of the cumulative function value(s) evaluated at `x`, i.e. `log(cdf(x))`. """ logcdf(d::UnivariateDistribution, x::Real) = log(cdf(d, x)) -logcdf(d::DiscreteUnivariateDistribution, x::Integer) = log(cdf(d, x)) -logcdf(d::DiscreteUnivariateDistribution, x::Real) = logcdf(d, floor(Int,x)) """ logdiffcdf(d::UnivariateDistribution, x::Real, y::Real) @@ -405,8 +385,6 @@ end The logarithm of the complementary cumulative function values evaluated at x, i.e. `log(ccdf(x))`. """ logccdf(d::UnivariateDistribution, x::Real) = log(ccdf(d, x)) -logccdf(d::DiscreteUnivariateDistribution, x::Integer) = log(ccdf(d, x)) -logccdf(d::DiscreteUnivariateDistribution, x::Real) = logccdf(d, floor(Int,x)) """ quantile(d::UnivariateDistribution, q::Real) @@ -522,12 +500,137 @@ Here `x` can be a single scalar sample or an array of samples. loglikelihood(d::UnivariateDistribution, X::AbstractArray) = sum(x -> logpdf(d, x), X) loglikelihood(d::UnivariateDistribution, x::Real) = logpdf(d, x) +### special definitions for distributions with integer-valued support + +function cdf_int(d::DiscreteUnivariateDistribution, x::Real) + # handle `NaN` and `±Inf` which can't be truncated to `Int` + isfinite_x = isfinite(x) + _x = isfinite_x ? x : zero(x) + c = float(cdf(d, floor(Int, _x))) + return if isfinite_x + c + elseif isnan(x) + oftype(c, NaN) + elseif x < 0 + zero(c) + else + one(c) + end +end + +function ccdf_int(d::DiscreteUnivariateDistribution, x::Real) + # handle `NaN` and `±Inf` which can't be truncated to `Int` + isfinite_x = isfinite(x) + _x = isfinite_x ? x : zero(x) + c = float(ccdf(d, floor(Int, _x))) + return if isfinite_x + c + elseif isnan(x) + oftype(c, NaN) + elseif x < 0 + one(c) + else + zero(c) + end +end + +function logcdf_int(d::DiscreteUnivariateDistribution, x::Real) + # handle `NaN` and `±Inf` which can't be truncated to `Int` + isfinite_x = isfinite(x) + _x = isfinite_x ? x : zero(x) + c = float(logcdf(d, floor(Int, _x))) + return if isfinite_x + c + elseif isnan(x) + oftype(c, NaN) + elseif x < 0 + oftype(c, -Inf) + else + zero(c) + end +end + +function logccdf_int(d::DiscreteUnivariateDistribution, x::Real) + # handle `NaN` and `±Inf` which can't be truncated to `Int` + isfinite_x = isfinite(x) + _x = isfinite_x ? x : zero(x) + c = float(logccdf(d, floor(Int, _x))) + return if isfinite_x + c + elseif isnan(x) + oftype(c, NaN) + elseif x < 0 + zero(c) + else + oftype(c, -Inf) + end +end + +# implementation of the cdf for distributions whose support is a unitrange of integers +# note: incorrect for discrete distributions whose support includes non-integer numbers +function integerunitrange_cdf(d::DiscreteUnivariateDistribution, x::Integer) + minimum_d, maximum_d = extrema(d) + isfinite(minimum_d) || isfinite(maximum_d) || error("support is unbounded") + + result = if isfinite(minimum_d) && !(isfinite(maximum_d) && x >= div(minimum_d + maximum_d, 2)) + c = sum(Base.Fix1(pdf, d), minimum_d:(max(x, minimum_d))) + x < minimum_d ? zero(c) : c + else + c = 1 - sum(Base.Fix1(pdf, d), (min(x + 1, maximum_d)):maximum_d) + x >= maximum_d ? one(c) : c + end + + return result +end + +function integerunitrange_ccdf(d::DiscreteUnivariateDistribution, x::Integer) + minimum_d, maximum_d = extrema(d) + isfinite(minimum_d) || isfinite(maximum_d) || error("support is unbounded") + + result = if isfinite(minimum_d) && !(isfinite(maximum_d) && x >= div(minimum_d + maximum_d, 2)) + c = 1 - sum(Base.Fix1(pdf, d), minimum_d:(max(x, minimum_d))) + x < minimum_d ? one(c) : c + else + c = sum(Base.Fix1(pdf, d), (min(x + 1, maximum_d)):maximum_d) + x >= maximum_d ? zero(c) : c + end + + return result +end + +function integerunitrange_logcdf(d::DiscreteUnivariateDistribution, x::Integer) + minimum_d, maximum_d = extrema(d) + isfinite(minimum_d) || isfinite(maximum_d) || error("support is unbounded") + + result = if isfinite(minimum_d) && !(isfinite(maximum_d) && x >= div(minimum_d + maximum_d, 2)) + c = logsumexp(logpdf(d, y) for y in minimum_d:(max(x, minimum_d))) + x < minimum_d ? oftype(c, -Inf) : c + else + c = log1mexp(logsumexp(logpdf(d, y) for y in (min(x + 1, maximum_d)):maximum_d)) + x >= maximum_d ? zero(c) : c + end + + return result +end + +function integerunitrange_logccdf(d::DiscreteUnivariateDistribution, x::Integer) + minimum_d, maximum_d = extrema(d) + isfinite(minimum_d) || isfinite(maximum_d) || error("support is unbounded") + + result = if isfinite(minimum_d) && !(isfinite(maximum_d) && x >= div(minimum_d + maximum_d, 2)) + c = log1mexp(logsumexp(logpdf(d, y) for y in minimum_d:(max(x, minimum_d)))) + x < minimum_d ? zero(c) : c + else + c = logsumexp(logpdf(d, y) for y in (min(x + 1, maximum_d)):maximum_d) + x >= maximum_d ? oftype(c, -Inf) : c + end + + return result +end + ### macros to use StatsFuns for method implementation macro _delegate_statsfuns(D, fpre, psyms...) - dt = eval(D) - T = dt <: DiscreteUnivariateDistribution ? :Int : :Real - # function names from StatsFuns fpdf = Symbol(fpre, "pdf") flogpdf = Symbol(fpre, "logpdf") @@ -543,22 +646,24 @@ macro _delegate_statsfuns(D, fpre, psyms...) # parameter fields pargs = [Expr(:(.), :d, Expr(:quote, s)) for s in psyms] - esc(quote - pdf(d::$D, x::$T) = $(fpdf)($(pargs...), x) - logpdf(d::$D, x::$T) = $(flogpdf)($(pargs...), x) - - cdf(d::$D, x::$T) = $(fcdf)($(pargs...), x) - ccdf(d::$D, x::$T) = $(fccdf)($(pargs...), x) - logcdf(d::$D, x::$T) = $(flogcdf)($(pargs...), x) - logccdf(d::$D, x::$T) = $(flogccdf)($(pargs...), x) - - quantile(d::$D, q::Real) = convert($T, $(finvcdf)($(pargs...), q)) - cquantile(d::$D, q::Real) = convert($T, $(finvccdf)($(pargs...), q)) - invlogcdf(d::$D, lq::Real) = convert($T, $(finvlogcdf)($(pargs...), lq)) - invlogccdf(d::$D, lq::Real) = convert($T, $(finvlogccdf)($(pargs...), lq)) - end) -end + # output type of `quantile` etc. + T = :($D isa DiscreteUnivariateDistribution ? Int : Real) + + return quote + $Distributions.pdf(d::$D, x::Real) = $(fpdf)($(pargs...), x) + $Distributions.logpdf(d::$D, x::Real) = $(flogpdf)($(pargs...), x) + $Distributions.cdf(d::$D, x::Real) = $(fcdf)($(pargs...), x) + $Distributions.logcdf(d::$D, x::Real) = $(flogcdf)($(pargs...), x) + $Distributions.ccdf(d::$D, x::Real) = $(fccdf)($(pargs...), x) + $Distributions.logccdf(d::$D, x::Real) = $(flogccdf)($(pargs...), x) + + $Distributions.quantile(d::$D, q::Real) = convert($T, $(finvcdf)($(pargs...), q)) + $Distributions.cquantile(d::$D, q::Real) = convert($T, $(finvccdf)($(pargs...), q)) + $Distributions.invlogcdf(d::$D, lq::Real) = convert($T, $(finvlogcdf)($(pargs...), lq)) + $Distributions.invlogccdf(d::$D, lq::Real) = convert($T, $(finvlogccdf)($(pargs...), lq)) + end +end ##### specific distributions ##### From e313c9d326dd22cbafd0d165251edd3e29a842e3 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 17 Jun 2021 00:42:30 +0200 Subject: [PATCH 2/4] Add tests --- test/categorical.jl | 27 ++++++++++++++++++++------- test/dirac.jl | 21 +++++++++++++++++++++ test/discretenonparametric.jl | 26 ++++++++++++++++++++++++++ test/poissonbinomial.jl | 18 ++++++++++++++++-- test/testutils.jl | 23 +++++++++++++++++++++++ test/univariate_bounds.jl | 30 ++++++++++++++++++++++++++++-- 6 files changed, 134 insertions(+), 11 deletions(-) diff --git a/test/categorical.jl b/test/categorical.jl index e9d9c41ba..972b1c450 100644 --- a/test/categorical.jl +++ b/test/categorical.jl @@ -25,21 +25,34 @@ for p in Any[ c = 0.0 for i = 1:k c += p[i] - @test pdf(d, i) == p[i] + @test @inferred(pdf(d, i)) == p[i] + @test @inferred(pdf(d, float(i))) == p[i] @test @inferred(logpdf(d, i)) === log(p[i]) @test @inferred(logpdf(d, float(i))) === log(p[i]) - @test cdf(d, i) ≈ c - @test ccdf(d, i) ≈ 1.0 - c + @test @inferred(cdf(d, i)) ≈ c + @test @inferred(cdf(d, i + 0.5)) ≈ c + @test @inferred(ccdf(d, i)) ≈ 1 - c + @test @inferred(ccdf(d, i + 0.5)) ≈ 1 - c + @test @inferred(logcdf(d, i)) ≈ log(c) + @test @inferred(logcdf(d, i + 0.5)) ≈ log(c) + @test @inferred(logccdf(d, i)) ≈ log1p(-c) + @test @inferred(logccdf(d, i + 0.5)) ≈ log1p(-c) end @test pdf(d, 0) == 0 @test pdf(d, k+1) == 0 @test logpdf(d, 0) == -Inf @test logpdf(d, k+1) == -Inf - @test cdf(d, 0) == 0.0 - @test cdf(d, k+1) == 1.0 - @test ccdf(d, 0) == 1.0 - @test ccdf(d, k+1) == 0.0 + @test iszero(cdf(d, -Inf)) + @test iszero(cdf(d, 0)) + @test isone(cdf(d, k+1)) + @test isone(cdf(d, Inf)) + @test isnan(cdf(d, NaN)) + @test isone(ccdf(d, -Inf)) + @test isone(ccdf(d, 0)) + @test iszero(ccdf(d, k+1)) + @test iszero(ccdf(d, Inf)) + @test isnan(ccdf(d, NaN)) @test pdf.(d, support(d)) == p @test pdf.(d, 1:k) == p diff --git a/test/dirac.jl b/test/dirac.jl index 7e2053435..d3b0c7c6b 100644 --- a/test/dirac.jl +++ b/test/dirac.jl @@ -20,9 +20,30 @@ using Test @test iszero(logpdf(d, val)) @test logpdf(d, nextfloat(float(val))) == -Inf + @test iszero(cdf(d, -Inf)) @test iszero(cdf(d, prevfloat(float(val)))) @test isone(cdf(d, val)) @test isone(cdf(d, nextfloat(float(val)))) + @test isone(cdf(d, Inf)) + @test logcdf(d, -Inf) == -Inf + @test logcdf(d, prevfloat(float(val))) == -Inf + @test iszero(logcdf(d, val)) + @test iszero(logcdf(d, nextfloat(float(val)))) + @test iszero(logcdf(d, Inf)) + @test isone(ccdf(d, -Inf)) + @test isone(ccdf(d, prevfloat(float(val)))) + @test iszero(ccdf(d, val)) + @test iszero(ccdf(d, nextfloat(float(val)))) + @test iszero(ccdf(d, Inf)) + @test iszero(logccdf(d, -Inf)) + @test iszero(logccdf(d, prevfloat(float(val)))) + @test logccdf(d, val) == -Inf + @test logccdf(d, nextfloat(float(val))) == -Inf + @test logccdf(d, Inf) == -Inf + + for f in (cdf, ccdf, logcdf, logccdf) + @test isnan(f(d, NaN)) + end @test quantile(d, 0) == val @test quantile(d, 0.5) == val diff --git a/test/discretenonparametric.jl b/test/discretenonparametric.jl index 828863603..504fa84fa 100644 --- a/test/discretenonparametric.jl +++ b/test/discretenonparametric.jl @@ -39,19 +39,45 @@ test_params(d) @test pdf(d, 100.) == 0. @test pdf(d, 120.) == .1 +@test iszero(cdf(d, -Inf)) @test cdf(d, -100.) == 0. @test cdf(d, -100) == 0. @test cdf(d, 0.) ≈ .2 @test cdf(d, 100.) ≈ .9 @test cdf(d, 100) ≈ .9 @test cdf(d, 150.) == 1. +@test isone(cdf(d, Inf)) +@test isnan(cdf(d, NaN)) +@test isone(ccdf(d, -Inf)) @test ccdf(d, -100.) == 1. @test ccdf(d, -100) == 1. @test ccdf(d, 0.) ≈ .8 @test ccdf(d, 100.) ≈ .1 @test ccdf(d, 100) ≈ .1 @test ccdf(d, 150.) == 0 +@test iszero(ccdf(d, Inf)) +@test isnan(ccdf(d, NaN)) + +@test logcdf(d, -Inf) == -Inf +@test logcdf(d, -100.) == -Inf +@test logcdf(d, -100) == -Inf +@test logcdf(d, 0.) ≈ log(0.2) +@test logcdf(d, 100.) ≈ log(0.9) +@test logcdf(d, 100) ≈ log(0.9) +@test iszero(logcdf(d, 150.)) +@test iszero(logcdf(d, Inf)) +@test isnan(logcdf(d, NaN)) + +@test iszero(logccdf(d, -Inf)) +@test iszero(logccdf(d, -100.)) +@test iszero(logccdf(d, -100)) +@test logccdf(d, 0.) ≈ log(0.8) +@test logccdf(d, 100.) ≈ log(0.1) +@test logccdf(d, 100) ≈ log(0.1) +@test logccdf(d, 150.) == -Inf +@test logccdf(d, Inf) == -Inf +@test isnan(logccdf(d, NaN)) @test quantile(d, 0) == -60 @test quantile(d, .1) == -60 diff --git a/test/poissonbinomial.jl b/test/poissonbinomial.jl index b1adb3eb3..dec55cf46 100644 --- a/test/poissonbinomial.jl +++ b/test/poissonbinomial.jl @@ -62,14 +62,28 @@ for (p, n) in [(0.8, 6), (0.5, 10), (0.04, 20)] @test @inferred(quantile(d, i)) ≈ quantile(dref, i) end for i=0:n - @test @inferred(cdf(d, i)) ≈ cdf(dref, i) atol=1e-15 - @test @inferred(cdf(d, i//1)) ≈ cdf(dref, i) atol=1e-15 @test @inferred(pdf(d, i)) ≈ pdf(dref, i) atol=1e-15 @test @inferred(pdf(d, i//1)) ≈ pdf(dref, i) atol=1e-15 @test @inferred(logpdf(d, i)) ≈ logpdf(dref, i) @test @inferred(logpdf(d, i//1)) ≈ logpdf(dref, i) + for f in (cdf, ccdf, logcdf, logccdf) + @test @inferred(f(d, i)) ≈ f(dref, i) rtol=1e-6 + @test @inferred(f(d, i//1)) ≈ f(dref, i) rtol=1e-6 + @test @inferred(f(d, i + 0.5)) ≈ f(dref, i) rtol=1e-6 + end end + @test iszero(@inferred(cdf(d, -Inf))) + @test isone(@inferred(cdf(d, Inf))) + @test @inferred(logcdf(d, -Inf)) == -Inf + @test iszero(@inferred(logcdf(d, Inf))) + @test isone(@inferred(ccdf(d, -Inf))) + @test iszero(@inferred(ccdf(d, Inf))) + @test iszero(@inferred(logccdf(d, -Inf))) + @test @inferred(logccdf(d, Inf)) == -Inf + for f in (cdf, ccdf, logcdf, logccdf) + @test isnan(f(d, NaN)) + end end # Test against a sum of three Binomial distributions diff --git a/test/testutils.jl b/test/testutils.jl index 35483aa0c..fea1fc55f 100644 --- a/test/testutils.jl +++ b/test/testutils.jl @@ -35,6 +35,7 @@ function test_distr(distr::DiscreteUnivariateDistribution, n::Int; test_support(distr, vs) test_evaluation(distr, vs, testquan) test_range_evaluation(distr) + test_nonfinite(distr) test_stats(distr, vs) test_samples(distr, n) @@ -52,6 +53,7 @@ function test_distr(distr::ContinuousUnivariateDistribution, n::Int; test_support(distr, vs) test_evaluation(distr, vs, testquan) + test_nonfinite(distr) if isa(distr, StudentizedRange) n = 2000 # must use fewer values due to performance @@ -462,6 +464,27 @@ function test_evaluation(d::ContinuousUnivariateDistribution, vs::AbstractVector @test logccdf.(Ref(d), vs) ≈ lcc end +function test_nonfinite(distr::UnivariateDistribution) + # non-finite bounds + @test iszero(@inferred(cdf(distr, -Inf))) + @test isone(@inferred(cdf(distr, Inf))) + @test isone(@inferred(ccdf(distr, -Inf))) + @test iszero(@inferred(ccdf(distr, Inf))) + @test @inferred(logcdf(distr, -Inf)) == -Inf + @test iszero(@inferred(logcdf(distr, Inf))) + @test iszero(@inferred(logccdf(distr, -Inf))) + @test @inferred(logccdf(distr, Inf)) == -Inf + + # NaN + for f in (cdf, ccdf, logcdf, logccdf) + if distr isa NoncentralT + # broken in StatsFuns/R + @test_broken isnan(f(distr, NaN)) + else + @test isnan(f(distr, NaN)) + end + end +end #### Testing statistics methods diff --git a/test/univariate_bounds.jl b/test/univariate_bounds.jl index bc3769700..89008a272 100644 --- a/test/univariate_bounds.jl +++ b/test/univariate_bounds.jl @@ -66,23 +66,49 @@ filter!(x -> isbounded(x()), dists) ub = nextfloat(ub) @test iszero(cdf(d, lb)) @test isone(cdf(d, ub)) + @test iszero(cdf(d, -Inf)) + @test isone(cdf(d, Inf)) + @test isnan(cdf(d, NaN)) lb_lcdf = logcdf(d,lb) @test isinf(lb_lcdf) & (lb_lcdf < 0) @test iszero(logcdf(d, ub)) + @test logcdf(d, -Inf) == -Inf + @test iszero(logcdf(d, Inf)) + @test isnan(logcdf(d, NaN)) @test isone(ccdf(d, lb)) @test iszero(ccdf(d, ub)) + @test isone(ccdf(d, -Inf)) + @test iszero(ccdf(d, Inf)) + @test isnan(ccdf(d, NaN)) ub_lccdf = logccdf(d,ub) @test isinf(ub_lccdf) & (ub_lccdf < 0) @test iszero(logccdf(d, lb)) + @test iszero(logccdf(d, -Inf)) + @test logccdf(d, Inf) == -Inf + @test isnan(logccdf(d, NaN)) @test iszero(pdf(d, lb)) - @test iszero(pdf(d, ub)) + if dist === Binomial + # https://github.com/JuliaStats/StatsFuns.jl/issues/115 + @test_broken iszero(pdf(d, ub)) + @test iszero(pdf(d, ub + 1e-6)) + else + @test iszero(pdf(d, ub)) + end lb_lpdf = logpdf(d, lb) @test isinf(lb_lpdf) & (lb_lpdf < 0) ub_lpdf = logpdf(d, ub) - @test isinf(ub_lpdf) & (ub_lpdf < 0) + if dist === Binomial + # https://github.com/JuliaStats/StatsFuns.jl/issues/115 + @test_broken isinf(ub_lpdf) & (ub_lpdf < 0) + @test logpdf(d, ub + 1e-6) == -Inf + else + @test isinf(ub_lpdf) & (ub_lpdf < 0) + end + @test logpdf(d, -Inf) == -Inf + @test logpdf(d, Inf) == -Inf end From 322c3f57d1e805eb13c68d19fc399bc540ed6fa4 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 17 Jun 2021 01:38:38 +0200 Subject: [PATCH 3/4] Fix missing method --- src/univariate/continuous/weibull.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/univariate/continuous/weibull.jl b/src/univariate/continuous/weibull.jl index e36631c31..f1fb19ff5 100644 --- a/src/univariate/continuous/weibull.jl +++ b/src/univariate/continuous/weibull.jl @@ -138,4 +138,4 @@ end #### Sampling -rand(rng::AbstractRNG, d::Weibull) = xv(d, randexp(rng)) +rand(rng::AbstractRNG, d::Weibull) = xval(d, randexp(rng)) From fd5489ee28fc4965b92127e4ab621b84c4010f09 Mon Sep 17 00:00:00 2001 From: David Widmann Date: Thu, 1 Jul 2021 23:23:33 +0200 Subject: [PATCH 4/4] Remove traits draft --- src/univariates.jl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/univariates.jl b/src/univariates.jl index 2b52c3007..a01f98b2e 100644 --- a/src/univariates.jl +++ b/src/univariates.jl @@ -20,12 +20,6 @@ isupperbounded(d::Union{D,Type{D}}) where {D<:UnivariateDistribution} = maximum( hasfinitesupport(d::Union{D,Type{D}}) where {D<:DiscreteUnivariateDistribution} = isbounded(d) hasfinitesupport(d::Union{D,Type{D}}) where {D<:ContinuousUnivariateDistribution} = false -struct HasNonIntegerSupport end -struct HasIntegerSupport end -struct HasIntegerUnitrangeSupport end - -supporttype(::UnivariateDistribution) = HasNonIntegerSupport() - """ params(d::UnivariateDistribution)