Skip to content

Commit

Permalink
Clean up Binomial distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
lindahua committed Nov 18, 2014
1 parent 9a9a2a1 commit 4c55ac1
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 66 deletions.
1 change: 1 addition & 0 deletions src/Distributions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ export
moment, # moments of distribution
nsamples, # get the number of samples contained in an array
ncategories, # the number of categories in a Categorical distribution
ntrials, # the number of trials being performed in the experiment
pdf, # probability density function (ContinuousDistribution)
pmf, # probability mass function (DiscreteDistribution)
probs, # Get the vector of probabilities
Expand Down
3 changes: 2 additions & 1 deletion src/univariate/discrete/bernoulli.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ end
@distr_support Bernoulli 0 1


#### parameters
#### Parameters

succprob(d::Bernoulli) = d.p
failprob(d::Bernoulli) = 1.0 - d.p

params(d::Bernoulli) = (d.p,)


#### Properties

mean(d::Bernoulli) = succprob(d)
Expand Down
88 changes: 52 additions & 36 deletions src/univariate/discrete/binomial.jl
Original file line number Diff line number Diff line change
@@ -1,30 +1,60 @@

immutable Binomial <: DiscreteUnivariateDistribution
size::Int
prob::Float64
function Binomial(n::Real, p::Real)
n >= zero(n) || error("size must be positive")
zero(p) <= p <= one(p) || error("prob must be in [0, 1]")
new(int(n), float64(p))
n::Int
p::Float64

function Binomial(n::Int, p::Float64)
n >= 0 || error("n must be non-negative.")
0.0 <= p <= 1.0 || error("p must be in [0, 1]")
new(n, p)
end

Binomial(n::Integer, p::Real) = Binomial(int(n), float64(p))
Binomial(n::Integer) = Binomial(int(n), 0.5)
Binomial() = new(1, 0.5)
end

Binomial(size::Integer) = Binomial(size, 0.5)
Binomial() = Binomial(1, 0.5)
@distr_support Binomial 0 d.n

@distr_support Binomial 0 d.size
@_jl_dist_2p Binomial binom


@_jl_dist_2p Binomial binom
#### parameters

function entropy(d::Binomial; approx::Bool=false)
n = d.size
p1 = d.prob
ntrials(d::Binomial) = d.n
succprob(d::Binomial) = d.p
failprob(d::Binomial) = 1.0 - d.p

(p1 == 0.0 || p1 == 1.0 || n == 0) && return 0.0
params(d::Binomial) = (d.n, d.p)


#### Properties

mean(d::Binomial) = ntrials(d) * succprob(d)
var(d::Binomial) = ntrials(d) * succprob(d) * failprob(d)
mode(d::Binomial) = ((n, p) = params(d); n > 0 ? iround((n + 1) * d.prob) : 0)
modes(d::Binomial) = Int[mode(d)]

median(d::Binomial) = iround(mean(d))

function skewness(d::Binomial)
n, p1 = params(d)
p0 = 1.0 - p1
(p0 - p1) / sqrt(n * p0 * p1)
end

function kurtosis(d::Binomial)
n, p = params(d)
u = p * (1.0 - p)
(1.0 - 6.0 * u) / (n * u)
end

function entropy(d::Binomial; approx::Bool=false)
n, p1 = params(d)
(p1 == 0.0 || p1 == 1.0 || n == 0) && return 0.0
p0 = 1.0 - p1
if approx
return 0.5 * (log(2.0pi * n * p0 * p1) + 1.0)
return 0.5 * (log(twoπ * n * p0 * p1) + 1.0)
else
lg = log(p1 / p0)
lp = n * log(p0)
Expand All @@ -37,45 +67,31 @@ function entropy(d::Binomial; approx::Bool=false)
end
end

kurtosis(d::Binomial) = (1.0 - 6.0 * d.prob * (1.0 - d.prob)) / var(d)

mean(d::Binomial) = d.size * d.prob

var(d::Binomial) = d.size * d.prob * (1.0 - d.prob)

skewness(d::Binomial) = (1.0 - 2.0 * d.prob) / std(d)

median(d::Binomial) = iround(d.size * d.prob)

# TODO: May need to subtract 1 sometimes
# two modes possible e.g. size odd, p = 0.5
mode(d::Binomial) = d.size > 0 ? iround((d.size + 1.0) * d.prob) : 0

#### Evaluation

immutable RecursiveBinomProbEvaluator <: RecursiveProbabilityEvaluator
n::Int
coef::Float64 # p / (1 - p)
end

RecursiveBinomProbEvaluator(d::Binomial) = (p = d.prob; RecursiveBinomProbEvaluator(d.size, p / (1.0 - p)))
RecursiveBinomProbEvaluator(d::Binomial) = RecursiveBinomProbEvaluator(d.n, d.p / (1.0 - d.p))
nextpdf(s::RecursiveBinomProbEvaluator, p::Float64, x::Integer) = ((s.n - x + 1) / x) * s.coef * p
_pdf!(r::AbstractArray, d::Binomial, rgn::UnitRange) = _pdf!(r, d, rgn, RecursiveBinomProbEvaluator(d))


function mgf(d::Binomial, t::Real)
p = d.prob
(1.0 - p + p * exp(t))^d.size
n, p = params(d)
(1.0 - p + p * exp(t)) ^ n
end

function cf(d::Binomial, t::Real)
p = d.prob
(1.0 - p + p * exp(im * t))^d.size
n, p = params(d)
(1.0 - p + p * exp(im * t)) ^ n
end

modes(d::Binomial) = iround([d.size * d.prob])


## Fit model
#### Fit model

immutable BinomialStats <: SufficientStats
ns::Float64 # the total number of successes
Expand Down
8 changes: 4 additions & 4 deletions test/conjugates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ p = posterior(pri, Binomial, (10, x))

f = fit_map(pri, Binomial, (10, x))
@test isa(f, Binomial)
@test f.size == 10
@test_approx_eq f.prob mode(p)
@test ntrials(f) == 10
@test_approx_eq succprob(f) mode(p)

p = posterior(pri, Binomial, (10, x), w)
@test isa(p, Beta)
Expand All @@ -90,8 +90,8 @@ p = posterior(pri, Binomial, (10, x), w)

f = fit_map(pri, Binomial, (10, x), w)
@test isa(f, Binomial)
@test f.size == 10
@test_approx_eq f.prob mode(p)
@test ntrials(f) == 10
@test_approx_eq succprob(f) mode(p)


# Dirichlet - Categorical
Expand Down
54 changes: 45 additions & 9 deletions test/discrete.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,21 +65,17 @@ end

n_tsamples = 10^6

for rentry in R
println(" testing $(rentry.distr)")
verify(rentry)
test_distr(rentry.distr, n_tsamples)
end
pdtitle(d::DiscreteUnivariateDistribution) = println(" testing $d")

# for Bernoulli
### Bernoulli

for (d, p) in [ (Bernoulli(), 0.5),
(Bernoulli(0.25), 0.25),
(Bernoulli(0.75), 0.75),
(Bernoulli(0.00), 0.00),
(Bernoulli(1.00), 1.00) ]

println(" testing $d")
pdtitle(d)

@test isa(d, Bernoulli)
@test succprob(d) == p
Expand All @@ -100,7 +96,36 @@ for (d, p) in [ (Bernoulli(), 0.5),
test_distr(d, n_tsamples)
end

# for Categorical

### Binomial

for (d, n, p) in [ (Binomial(), 1, 0.5),
(Binomial(3), 3, 0.5),
(Binomial(5, 0.4), 5, 0.4),
(Binomial(6, 0.8), 6, 0.8),
(Binomial(100, 0.1), 100, 0.1),
(Binomial(100, 0.9), 100, 0.9),
(Binomial(10, 0.0), 10, 0.0),
(Binomial(10, 1.0), 10, 1.0) ]

pdtitle(d)

@test isa(d, Binomial)
@test succprob(d) == p
@test failprob(d) == 1.0 - p
@test minimum(d) == 0
@test maximum(d) == n
@test_approx_eq mean(d) n * p
@test_approx_eq var(d) n * p * (1.0 - p)

# TODO: current implementation has some problem when p = 1.0
if p < 1.0
test_distr(d, n_tsamples)
end
end


### Categorical

for distr in [
Categorical([0.1, 0.9]),
Expand All @@ -114,7 +139,18 @@ for distr in [
@test pdf(distr, i) == distr.prob[i]
end

println(" testing $(distr)")
pdtitle(distr)
test_distr(distr, n_tsamples)
end


### Others

for rentry in R
pdtitle(rentry.distr)
verify(rentry)
test_distr(rentry.distr, n_tsamples)
end



5 changes: 0 additions & 5 deletions test/discrete_ref.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
distr, mean, var, entropy, x25, x50, x75, lp25, lp50, lp75
"Binomial(1, 0.5)", 5.0000000000000000e-01, 2.5000000000000000e-01, 6.9314718055994529e-01, 0, 0, 1, -6.9314718055994529e-01, -6.9314718055994529e-01, -6.9314718055994529e-01
"Binomial(5, 0.4)", 2.0000000000000000e+00, 1.2000000000000000e+00, 1.4979981829038540e+00, 1, 2, 3, -1.3501553145040179e+00, -1.0624732420522367e+00, -1.4679383501604009e+00
"Binomial(6, 0.8)", 4.8000000000000007e+00, 9.5999999999999996e-01, 1.3425774508710622e+00, 4, 5, 6, -1.4033998290228298e+00, -9.3339619977709365e-01, -1.3388613078852583e+00
"Binomial(100, 0.1)", 1.0000000000000000e+01, 9.0000000000000000e+00, 2.5112331161825550e+00, 8, 10, 12, -2.1643632354294020e+00, -2.0259739768661866e+00, -2.3147790140625677e+00
"Binomial(100, 0.9)", 9.0000000000000000e+01, 8.9999999999999982e+00, 2.5112331161825514e+00, 88, 90, 92, -2.3147790140625695e+00, -2.0259739768661902e+00, -2.1643632354294020e+00
"DiscreteUniform(0, 4)", 2.0000000000000000e+00, 2.0000000000000000e+00, 1.6094379124341003e+00, 1, 2, 3, -1.6094379124341003e+00, -1.6094379124341003e+00, -1.6094379124341003e+00
"DiscreteUniform(2, 8)", 5.0000000000000000e+00, 4.0000000000000000e+00, 1.9459101490553132e+00, 3, 5, 7, -1.9459101490553135e+00, -1.9459101490553135e+00, -1.9459101490553135e+00
"Geometric(0.1)", 9.0000000000000000e+00, 9.0000000000000000e+01, 3.2508297339144798e+00, 2, 6, 13, -2.5133061243096981e+00, -2.9347481869410030e+00, -3.6722717965457869e+00
Expand Down
5 changes: 0 additions & 5 deletions test/discrete_ref.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,3 @@
Binomial(1, 0.5)
Binomial(5, 0.4)
Binomial(6, 0.8)
Binomial(100, 0.1)
Binomial(100, 0.9)
DiscreteUniform(0, 4)
DiscreteUniform(2, 8)
Geometric(0.1)
Expand Down
12 changes: 6 additions & 6 deletions test/fit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,18 +80,18 @@ ss = suffstats(Binomial, (100, x), w)

d = fit(Binomial, (100, x))
@test isa(d, Binomial)
@test d.size == 100
@test_approx_eq d.prob sum(x) / (n0 * 100)
@test ntrials(d) == 100
@test_approx_eq succprob(d) sum(x) / (n0 * 100)

d = fit(Binomial, (100, x), w)
@test isa(d, Binomial)
@test d.size == 100
@test_approx_eq d.prob dot(x, w) / (sum(w) * 100)
@test ntrials(d) == 100
@test_approx_eq succprob(d) dot(x, w) / (sum(w) * 100)

d = fit(Binomial, 100, rand(Binomial(100, 0.3), N))
@test isa(d, Binomial)
@test d.size == 100
@test_approx_eq_eps d.prob 0.3 0.01
@test ntrials(d) == 100
@test_approx_eq_eps succprob(d) 0.3 0.01


# Categorical
Expand Down

0 comments on commit 4c55ac1

Please sign in to comment.