diff --git a/src/Distributions.jl b/src/Distributions.jl index 3cf0a2812..dd07fbfc0 100644 --- a/src/Distributions.jl +++ b/src/Distributions.jl @@ -98,6 +98,7 @@ export FullNormalCanon, Gamma, DiscreteNonParametric, + GeneralDiscreteNonParametric, GeneralizedPareto, GeneralizedExtremeValue, Geometric, @@ -128,6 +129,7 @@ export MixtureModel, Multinomial, MultivariateNormal, + MvDiscreteNonParametric, MvLogNormal, MvNormal, MvNormalCanon, diff --git a/src/multivariate/generaldiscretenonparametric.jl b/src/multivariate/generaldiscretenonparametric.jl new file mode 100644 index 000000000..3486df6ce --- /dev/null +++ b/src/multivariate/generaldiscretenonparametric.jl @@ -0,0 +1,79 @@ +struct GeneralDiscreteNonParametric{VF,T,P <: Real,Ts <: AbstractVector{T},Ps <: AbstractVector{P},} <: Distribution{VF,Discrete} + support::Ts + p::Ps + + function GeneralDiscreteNonParametric{VF,T,P,Ts,Ps}( + support::Ts, + p::Ps; + check_args=true, + ) where {VF,T,P <: Real,Ts <: AbstractVector{T},Ps <: AbstractVector{P}} + if check_args + length(support) == length(p) || + error("length of `support` and `p` must be equal") + isprobvec(p) || error("`p` must be a probability vector") + allunique(support) || error("`support` must contain only unique values") + end + new{VF,T,P,Ts,Ps}(support, p) + end +end + +function rand(rng::AbstractRNG, d::GeneralDiscreteNonParametric) + x = support(d) + p = probs(d) + n = length(p) + draw = rand(rng, float(eltype(p))) + cp = p[1] + i = 1 + while cp <= draw && i < n + @inbounds cp += p[i +=1] + end + return x[i] +end + +""" + support(d::MvDiscreteNonParametric) +Get a sorted AbstractVector defining the support of `d`. +""" +support(d::GeneralDiscreteNonParametric) = d.support + +""" + probs(d::MvDiscreteNonParametric) +Get the vector of probabilities associated with the support of `d`. +""" +probs(d::GeneralDiscreteNonParametric) = d.p + + +Base.length(d::GeneralDiscreteNonParametric) = length(first(d.support)) + +function _rand!( + rng::AbstractRNG, + d::GeneralDiscreteNonParametric, + x::AbstractVector{T}, + ) where {T<:Real} + + length(x) == length(d) || throw(DimensionMismatch("Invalid argument dimension.")) + s = d.support + p = d.p + + n = length(p) + draw = Base.rand(rng, float(eltype(p))) + cp = p[1] + i = 1 + while cp <= draw && i < n + @inbounds cp += p[i+=1] + end + copyto!(x, s[i]) + return x +end + + +function _logpdf(d::GeneralDiscreteNonParametric, x::AbstractVector{T}) where {T<:Real} + s = support(d) + p = probs(d) + for i = 1:length(p) + if s[i] == x + return log(p[i]) + end + end + return log(zero(eltype(p))) +end diff --git a/src/multivariate/mvdiscretenonparametric.jl b/src/multivariate/mvdiscretenonparametric.jl new file mode 100644 index 000000000..3e6e2947e --- /dev/null +++ b/src/multivariate/mvdiscretenonparametric.jl @@ -0,0 +1,55 @@ +const MvDiscreteNonParametric{ + T<:AbstractVector{<:Real}, + P<:Real, + Ts<:AbstractVector{T}, + Ps<:AbstractVector{P}, +} = GeneralDiscreteNonParametric{Multivariate,T,P,Ts,Ps} + +""" + MvDiscreteNonParametric( + support::AbstractVector, + p::AbstractVector{<:Real}=fill(inv(length(support)), length(support)), + ) + +Construct a multivariate discrete nonparametric probability distribution with `support` and corresponding +probabilities `p`. If the probability vector argument is not passed, then +equal probability is assigned to each entry in the support. + +# Examples +```julia +# rows correspond to samples +x = collect(eachrow(rand(10,2))) +μ = MvDiscreteNonParametric(x) + +# columns correspond to samples +y = collect(eachcol(rand(7,12))) +ν = MvDiscreteNonParametric(y) +``` +""" +function MvDiscreteNonParametric( + support::AbstractArray{<:AbstractVector{<:Real}}, + p::AbstractVector{<:Real} = fill(inv(length(support)), length(support)), +) + return MvDiscreteNonParametric{eltype(support),eltype(p),typeof(support),typeof(p)}( + support, + p, + ) +end + +Base.eltype(::Type{<:MvDiscreteNonParametric{T}}) where T = Base.eltype(T) + +function mean(d::MvDiscreteNonParametric) + return StatsBase.mean(hcat(d.support...), Weights(d.p, one(eltype(d.p))),dims=2) +end + +function var(d::MvDiscreteNonParametric) + x = hcat(support(d)...) + p = probs(d) + return StatsBase.var(x, Weights(p, one(eltype(p))), 2,corrected = false) +end + +function cov(d::MvDiscreteNonParametric) + x = hcat(support(d)...) + p = probs(d) + return cov(x, Weights(p, one(eltype(p))), 2,corrected = false) +end diff --git a/src/multivariates.jl b/src/multivariates.jl index 1a087f1ba..e48742810 100644 --- a/src/multivariates.jl +++ b/src/multivariates.jl @@ -112,6 +112,8 @@ end for fname in ["dirichlet.jl", "multinomial.jl", "dirichletmultinomial.jl", + "generaldiscretenonparametric.jl", + "mvdiscretenonparametric.jl", "mvnormal.jl", "mvnormalcanon.jl", "mvlognormal.jl", diff --git a/src/univariate/discrete/discretenonparametric.jl b/src/univariate/discrete/discretenonparametric.jl index 987f351a4..5f7537bec 100644 --- a/src/univariate/discrete/discretenonparametric.jl +++ b/src/univariate/discrete/discretenonparametric.jl @@ -69,19 +69,6 @@ Base.isapprox(c1::D, c2::D) where D<:DiscreteNonParametric = # Sampling -function rand(rng::AbstractRNG, d::DiscreteNonParametric) - x = support(d) - p = probs(d) - n = length(p) - draw = rand(rng, float(eltype(p))) - cp = p[1] - i = 1 - while cp <= draw && i < n - @inbounds cp += p[i +=1] - end - return x[i] -end - sampler(d::DiscreteNonParametric) = DiscreteNonParametricSampler(support(d), probs(d)) diff --git a/test/generaldiscretenonparametric.jl b/test/generaldiscretenonparametric.jl new file mode 100644 index 000000000..c743aaf0f --- /dev/null +++ b/test/generaldiscretenonparametric.jl @@ -0,0 +1,101 @@ +using Distributions +using StatsBase +using LinearAlgebra +using Random +using Test + + +@testset "GeneralDiscreteNonParametric" begin + + @testset "Declaring MvDiscreteNonParametric" begin + + Random.seed!(7) + n = 4 + m = 2 + A = collect(eachrow(rand(n, m))) + p = normalize!(rand(n), 1) + + # Passing probabilities + μ = @inferred(MvDiscreteNonParametric(A, p)) + @test support(μ) == A + @test length(μ) == m + # @test size(μ) == (m, n) + @test probs(μ) == p + + # Without passing probabilities + μ = @inferred(MvDiscreteNonParametric(A)) + @test support(μ) == A + @test length(μ) == m + # @test size(μ) == (m, n) + @test probs(μ) == fill(1 / n, n) + + # Array of arrays without ArraysOfArrays.jl + n, m = 3, 2 + p = ([3 / 5, 1 / 5, 1 / 5]) + A = [[1,0],[1,1],[0,1]] + μ = @inferred(MvDiscreteNonParametric(A, p)) + + @test support(μ) == A + @test length(μ) == m + @test probs(μ) == p + + end + + + @testset "Functionalities" begin + + function variance(d) + v = zeros(length(d)) + for i in 1:length(d) + s = hcat(μ.support...)[i,:] + mₛ = mean(d)[i] + v[i] = sum(abs2.(s .- mₛ), Weights(d.p)) + end + return v + end + + function covariance(d) + n = length(d) + v = zeros(n, n) + for i in 1:n, j in 1:n + s = hcat(μ.support...)[i,:] + mₛ = mean(d)[i] + + u = hcat(μ.support...)[j,:] + mᵤ = mean(d)[j] + + v[i,j] = sum((s .- mₛ) .* (u .- mᵤ), Weights(d.p)) + end + return v + end + + Random.seed!(7) + n, m = 7, 9 + + A = collect(eachrow(rand(n, m))) + p = normalize!(rand(n), 1) + μ = @inferred(MvDiscreteNonParametric(A, p)) + + @test mean(μ) ≈ mean(hcat(μ.support...), Weights(p), dims=2)[:] + @test var(μ) ≈ variance(μ) + @test cov(μ) ≈ covariance(μ) + @test pdf(μ, μ.support) ≈ μ.p + @test pdf(μ, zeros(m)) == 0.0 + # @test entropy(μ) == entropy(μ.p) + # @test entropy(μ, 2) == entropy(μ.p, 2) + + end + + @testset "Sampling" begin + Random.seed!(7) + A = collect(eachrow(rand(3, 2))) + μ = MvDiscreteNonParametric(A, [0.9,0.1,0.0]) + + for i in 1:3 + samples = rand(μ, 10000) + @test abs(mean([s == A[i] for s in eachcol(samples)]) - μ.p[i]) < 0.05 + end + end + +end + diff --git a/test/runtests.jl b/test/runtests.jl index 18bcff72d..6194d953a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -54,6 +54,8 @@ const tests = [ "pgeneralizedgaussian", "product", "discretenonparametric", + "generaldiscretenonparametric", + "functionals", "chernoff", "univariate_bounds", "negativebinomial",