Clean up Binomial distribution

JuliaStats · Nov 18, 2014 · 4c55ac1 · 4c55ac1
1 parent 9a9a2a1
commit 4c55ac1
Show file tree

Hide file tree

Showing 8 changed files with 110 additions and 66 deletions.
diff --git a/src/Distributions.jl b/src/Distributions.jl
@@ -201,6 +201,7 @@ export
     moment,             # moments of distribution
     nsamples,           # get the number of samples contained in an array
     ncategories,        # the number of categories in a Categorical distribution
+    ntrials,            # the number of trials being performed in the experiment
     pdf,                # probability density function (ContinuousDistribution)
     pmf,                # probability mass function (DiscreteDistribution)
     probs,              # Get the vector of probabilities

diff --git a/src/univariate/discrete/bernoulli.jl b/src/univariate/discrete/bernoulli.jl
@@ -18,13 +18,14 @@ end
 @distr_support Bernoulli 0 1 
 
 
-#### parameters
+#### Parameters
 
 succprob(d::Bernoulli) = d.p
 failprob(d::Bernoulli) = 1.0 - d.p
 
 params(d::Bernoulli) = (d.p,)
 
+
 #### Properties
 
 mean(d::Bernoulli) = succprob(d)

diff --git a/src/univariate/discrete/binomial.jl b/src/univariate/discrete/binomial.jl
@@ -1,30 +1,60 @@
+
 immutable Binomial <: DiscreteUnivariateDistribution
-    size::Int
-    prob::Float64
-    function Binomial(n::Real, p::Real)
-        n >= zero(n) || error("size must be positive")
-        zero(p) <= p <= one(p) || error("prob must be in [0, 1]")
-        new(int(n), float64(p))
+    n::Int
+    p::Float64
+
+    function Binomial(n::Int, p::Float64)
+        n >= 0 || error("n must be non-negative.")
+        0.0 <= p <= 1.0 || error("p must be in [0, 1]")
+        new(n, p)
     end
+
+    Binomial(n::Integer, p::Real) = Binomial(int(n), float64(p))
+    Binomial(n::Integer) = Binomial(int(n), 0.5)
+    Binomial() = new(1, 0.5)
 end
 
-Binomial(size::Integer) = Binomial(size, 0.5)
-Binomial() = Binomial(1, 0.5)
+@distr_support Binomial 0 d.n
 
-@distr_support Binomial 0 d.size
+@_jl_dist_2p Binomial binom
 
 
-@_jl_dist_2p Binomial binom
+#### parameters
 
-function entropy(d::Binomial; approx::Bool=false)
-    n = d.size
-    p1 = d.prob
+ntrials(d::Binomial) = d.n
+succprob(d::Binomial) = d.p
+failprob(d::Binomial) = 1.0 - d.p
 
-    (p1 == 0.0 || p1 == 1.0 || n == 0) && return 0.0
+params(d::Binomial) = (d.n, d.p)
+
+
+#### Properties
+
+mean(d::Binomial) = ntrials(d) * succprob(d)
+var(d::Binomial) = ntrials(d) * succprob(d) * failprob(d)
+mode(d::Binomial) = ((n, p) = params(d); n > 0 ? iround((n + 1) * d.prob) : 0)
+modes(d::Binomial) = Int[mode(d)]
+
+median(d::Binomial) = iround(mean(d))
+
+function skewness(d::Binomial) 
+    n, p1 = params(d)
     p0 = 1.0 - p1
+    (p0 - p1) / sqrt(n * p0 * p1)
+end
+
+function kurtosis(d::Binomial)
+    n, p = params(d)
+    u = p * (1.0 - p)
+    (1.0 - 6.0 * u) / (n * u) 
+end
 
+function entropy(d::Binomial; approx::Bool=false)
+    n, p1 = params(d)
+    (p1 == 0.0 || p1 == 1.0 || n == 0) && return 0.0
+    p0 = 1.0 - p1
     if approx 
-        return 0.5 * (log(2.0pi * n * p0 * p1) + 1.0) 
+        return 0.5 * (log(twoπ * n * p0 * p1) + 1.0) 
     else
         lg = log(p1 / p0)        
         lp = n * log(p0)
@@ -37,45 +67,31 @@ function entropy(d::Binomial; approx::Bool=false)
     end
 end
 
-kurtosis(d::Binomial) = (1.0 - 6.0 * d.prob * (1.0 - d.prob)) / var(d)
-
-mean(d::Binomial) = d.size * d.prob
-
-var(d::Binomial) = d.size * d.prob * (1.0 - d.prob)
-
-skewness(d::Binomial) = (1.0 - 2.0 * d.prob) / std(d)
-
-median(d::Binomial) = iround(d.size * d.prob)
-
-# TODO: May need to subtract 1 sometimes
-# two modes possible e.g. size odd, p = 0.5
-mode(d::Binomial) = d.size > 0 ? iround((d.size + 1.0) * d.prob) : 0
 
+#### Evaluation
 
 immutable RecursiveBinomProbEvaluator <: RecursiveProbabilityEvaluator
     n::Int
     coef::Float64   # p / (1 - p)
 end
 
-RecursiveBinomProbEvaluator(d::Binomial) = (p = d.prob; RecursiveBinomProbEvaluator(d.size, p / (1.0 - p)))
+RecursiveBinomProbEvaluator(d::Binomial) = RecursiveBinomProbEvaluator(d.n, d.p / (1.0 - d.p))
 nextpdf(s::RecursiveBinomProbEvaluator, p::Float64, x::Integer) = ((s.n - x + 1) / x) * s.coef * p
 _pdf!(r::AbstractArray, d::Binomial, rgn::UnitRange) = _pdf!(r, d, rgn, RecursiveBinomProbEvaluator(d))
 
 
 function mgf(d::Binomial, t::Real)
-    p = d.prob
-    (1.0 - p + p * exp(t))^d.size
+    n, p = params(d)
+    (1.0 - p + p * exp(t)) ^ n
 end
 
 function cf(d::Binomial, t::Real)
-    p = d.prob
-    (1.0 - p + p * exp(im * t))^d.size
+    n, p = params(d)
+    (1.0 - p + p * exp(im * t)) ^ n
 end
 
-modes(d::Binomial) = iround([d.size * d.prob])
-
 
-## Fit model
+#### Fit model
 
 immutable BinomialStats <: SufficientStats
     ns::Float64   # the total number of successes

diff --git a/test/conjugates.jl b/test/conjugates.jl
@@ -80,8 +80,8 @@ p = posterior(pri, Binomial, (10, x))
 
 f = fit_map(pri, Binomial, (10, x))
 @test isa(f, Binomial)
-@test f.size == 10
-@test_approx_eq f.prob mode(p)
+@test ntrials(f) == 10
+@test_approx_eq succprob(f) mode(p)
 
 p = posterior(pri, Binomial, (10, x), w)
 @test isa(p, Beta)
@@ -90,8 +90,8 @@ p = posterior(pri, Binomial, (10, x), w)
 
 f = fit_map(pri, Binomial, (10, x), w)
 @test isa(f, Binomial)
-@test f.size == 10
-@test_approx_eq f.prob mode(p)
+@test ntrials(f) == 10
+@test_approx_eq succprob(f) mode(p)
 
 
 # Dirichlet - Categorical

diff --git a/test/discrete.jl b/test/discrete.jl
@@ -65,21 +65,17 @@ end
 
 n_tsamples = 10^6
 
-for rentry in R
-    println("    testing $(rentry.distr)")
-    verify(rentry)
-    test_distr(rentry.distr, n_tsamples)
-end
+pdtitle(d::DiscreteUnivariateDistribution) = println("    testing $d")
 
-# for Bernoulli
+### Bernoulli
 
 for (d, p) in [ (Bernoulli(), 0.5), 
                 (Bernoulli(0.25), 0.25), 
                 (Bernoulli(0.75), 0.75),
                 (Bernoulli(0.00), 0.00),
                 (Bernoulli(1.00), 1.00) ]
 
-    println("    testing $d")
+    pdtitle(d)
 
     @test isa(d, Bernoulli)
     @test succprob(d) == p
@@ -100,7 +96,36 @@ for (d, p) in [ (Bernoulli(), 0.5),
     test_distr(d, n_tsamples)
 end
 
-# for Categorical
+
+### Binomial
+
+for (d, n, p) in [ (Binomial(), 1, 0.5), 
+                   (Binomial(3), 3, 0.5),
+                   (Binomial(5, 0.4), 5, 0.4),
+                   (Binomial(6, 0.8), 6, 0.8),
+                   (Binomial(100, 0.1), 100, 0.1),
+                   (Binomial(100, 0.9), 100, 0.9),
+                   (Binomial(10, 0.0), 10, 0.0),
+                   (Binomial(10, 1.0), 10, 1.0) ]
+
+    pdtitle(d)
+
+    @test isa(d, Binomial)
+    @test succprob(d) == p
+    @test failprob(d) == 1.0 - p
+    @test minimum(d) == 0
+    @test maximum(d) == n
+    @test_approx_eq mean(d) n * p
+    @test_approx_eq var(d) n * p * (1.0 - p)
+
+    # TODO: current implementation has some problem when p = 1.0
+    if p < 1.0  
+        test_distr(d, n_tsamples)
+    end
+end
+
+
+### Categorical
 
 for distr in [
     Categorical([0.1, 0.9]),
@@ -114,7 +139,18 @@ for distr in [
         @test pdf(distr, i) == distr.prob[i]
     end
 
-    println("    testing $(distr)")
+    pdtitle(distr)
     test_distr(distr, n_tsamples)
 end
 
+
+### Others
+
+for rentry in R
+    pdtitle(rentry.distr)
+    verify(rentry)
+    test_distr(rentry.distr, n_tsamples)
+end
+
+
+
diff --git a/test/discrete_ref.csv b/test/discrete_ref.csv
@@ -1,9 +1,4 @@
 distr, mean, var, entropy, x25, x50, x75, lp25, lp50, lp75
-"Binomial(1, 0.5)", 5.0000000000000000e-01, 2.5000000000000000e-01, 6.9314718055994529e-01, 0, 0, 1, -6.9314718055994529e-01, -6.9314718055994529e-01, -6.9314718055994529e-01
-"Binomial(5, 0.4)", 2.0000000000000000e+00, 1.2000000000000000e+00, 1.4979981829038540e+00, 1, 2, 3, -1.3501553145040179e+00, -1.0624732420522367e+00, -1.4679383501604009e+00
-"Binomial(6, 0.8)", 4.8000000000000007e+00, 9.5999999999999996e-01, 1.3425774508710622e+00, 4, 5, 6, -1.4033998290228298e+00, -9.3339619977709365e-01, -1.3388613078852583e+00
-"Binomial(100, 0.1)", 1.0000000000000000e+01, 9.0000000000000000e+00, 2.5112331161825550e+00, 8, 10, 12, -2.1643632354294020e+00, -2.0259739768661866e+00, -2.3147790140625677e+00
-"Binomial(100, 0.9)", 9.0000000000000000e+01, 8.9999999999999982e+00, 2.5112331161825514e+00, 88, 90, 92, -2.3147790140625695e+00, -2.0259739768661902e+00, -2.1643632354294020e+00
 "DiscreteUniform(0, 4)", 2.0000000000000000e+00, 2.0000000000000000e+00, 1.6094379124341003e+00, 1, 2, 3, -1.6094379124341003e+00, -1.6094379124341003e+00, -1.6094379124341003e+00
 "DiscreteUniform(2, 8)", 5.0000000000000000e+00, 4.0000000000000000e+00, 1.9459101490553132e+00, 3, 5, 7, -1.9459101490553135e+00, -1.9459101490553135e+00, -1.9459101490553135e+00
 "Geometric(0.1)", 9.0000000000000000e+00, 9.0000000000000000e+01, 3.2508297339144798e+00, 2, 6, 13, -2.5133061243096981e+00, -2.9347481869410030e+00, -3.6722717965457869e+00

diff --git a/test/discrete_ref.txt b/test/discrete_ref.txt
@@ -1,8 +1,3 @@
-Binomial(1, 0.5)
-Binomial(5, 0.4)
-Binomial(6, 0.8)
-Binomial(100, 0.1)
-Binomial(100, 0.9)
 DiscreteUniform(0, 4)
 DiscreteUniform(2, 8)
 Geometric(0.1)

diff --git a/test/fit.jl b/test/fit.jl
@@ -80,18 +80,18 @@ ss = suffstats(Binomial, (100, x), w)
 
 d = fit(Binomial, (100, x))
 @test isa(d, Binomial)
-@test d.size == 100
-@test_approx_eq d.prob sum(x) / (n0 * 100)
+@test ntrials(d) == 100
+@test_approx_eq succprob(d) sum(x) / (n0 * 100)
 
 d = fit(Binomial, (100, x), w)
 @test isa(d, Binomial)
-@test d.size == 100
-@test_approx_eq d.prob dot(x, w) / (sum(w) * 100)
+@test ntrials(d) == 100
+@test_approx_eq succprob(d) dot(x, w) / (sum(w) * 100)
 
 d = fit(Binomial, 100, rand(Binomial(100, 0.3), N))
 @test isa(d, Binomial)
-@test d.size == 100
-@test_approx_eq_eps d.prob 0.3 0.01
+@test ntrials(d) == 100
+@test_approx_eq_eps succprob(d) 0.3 0.01
 
 
 # Categorical