Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add functions for the hypergeometric distribution #256

Merged
merged 1 commit into from
Jul 19, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion doc/source/univariate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,11 @@ A `Geometric distribution <http://en.wikipedia.org/wiki/Geometric_distribution>`
Hypergeometric Distribution
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

A `Hypergeometric distribution <http://en.wikipedia.org/wiki/Hypergeometric_distribution>`_ describes the number of successes in *n* draws without replacement from a finite population containing *s* successes and *f* failures.
A `Hypergeometric distribution <http://en.wikipedia.org/wiki/Hypergeometric_distribution>`_ describes the number of successes in *n* draws without replacement from a finite population containing *s* successes and *f* failures. The probability mass function is:

.. math::

P(X=x) = {{{s \choose x} {f \choose {n-x}}}\over {s+f \choose n}}, \quad x \in [\max(0, n - f), \min(n, s)]

.. code-block:: julia

Expand Down
26 changes: 23 additions & 3 deletions src/univariate/hypergeometric.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,34 @@ end

@_jl_dist_3p Hypergeometric hyper

function insupport(d::Hypergeometric, x::Real)
isinteger(x) && zero(x) <= x <= d.n && (d.n - d.nf) <= x <= d.ns
end
# handling support
minimum(d::Hypergeometric) = max(0, d.n - d.nf)
maximum(d::Hypergeometric) = min(d.ns, d.n)
support(d::Hypergeometric) = minimum(d):maximum(d)

# properties
mean(d::Hypergeometric) = d.n * d.ns / (d.ns + d.nf)

function var(d::Hypergeometric)
N = d.ns + d.nf
p = d.ns / N
d.n * p * (1.0 - p) * (N - d.n) / (N - 1.0)
end
mode(d::Hypergeometric) = int(floor((d.n+1) * (d.ns+1) / (d.ns+d.nf+2)))

function modes(d::Hypergeometric)
if (d.ns == d.nf) && mod(d.n, 2) == 1
[(d.n-1)/2, (d.n+1)/2]
else
[mode(d)]
end
end

skewness(d::Hypergeometric) = (d.nf-d.ns)*sqrt(d.ns+d.nf-1)*(d.ns+d.nf-2*d.n)/sqrt(d.n*d.ns*d.nf*(d.ns+d.nf-d.n))/(d.ns+d.nf-2)
function kurtosis(d::Hypergeometric)
N = d.ns + d.nf
a = (N-1) * N^2 * (N * (N+1) - 6*d.ns * (N-d.ns) - 6*d.n*(N-d.n)) + 6*d.n*d.ns*(d.nf)*(N-d.n)*(5*N-6)
b = (d.n*d.ns*(N-d.ns) * (N-d.n)*(N-2)*(N-3))
a/b
end

1 change: 1 addition & 0 deletions src/univariates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ proper_kurtosis(d::Distribution) = kurtosis(d, false)
## pdf, cdf, and friends

logpdf(d::UnivariateDistribution, x::Number) = log(pdf(d, x))
cdf(d::DiscreteUnivariateDistribution, k::Real) = sum([pdf(d,i) for i in minimum(d):k])
ccdf(d::UnivariateDistribution, q::Real) = 1.0 - cdf(d, q)
cquantile(d::UnivariateDistribution, p::Real) = quantile(d, 1.0 - p)

Expand Down
46 changes: 46 additions & 0 deletions test/hypergeometric.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using Distributions
using Base.Test

# simple example
ns = 4
nf = 6
n = 3

d = Hypergeometric(ns, nf, n)

@test_approx_eq mean(d) 6.0/5.0
@test_approx_eq var(d) 14.0/25.0
@test_approx_eq skewness(d) 1.0/(2.0*sqrt(14.0))
@test_approx_eq kurtosis(d) 128.0/49.0-3
@test mode(d) == 1

@test quantile(d, 0.05) == 0
@test quantile(d, 0.95) == 2

expected = [1/3 1 3/5 1/15]/2
@test_approx_eq logpdf(d, 0:3) log(expected)
@test_approx_eq pdf(d, 0:3) expected
@test_approx_eq cdf(d, 0:3) cumsum(expected,2)


# http://en.wikipedia.org/wiki/Fisher's_noncentral_hypergeometric_distribution
# (distributions are both equal to the (central) hypergeometric distribution when the odds ratio is 1.)
ns = 80
nf = 60
n = 100

d = Hypergeometric(ns,nf,n)

@test_approx_eq mean(d) 400.0/7
@test_approx_eq var(d) 48000.0/6811.0
@test_approx_eq skewness(d) sqrt(139/30)/92
@test_approx_eq kurtosis(d) 44952739/15124800-3
@test mode(d) == 57

@test quantile(d, 0.05) == 53
@test quantile(d, 0.95) == 62

expected = [0.10934614 0.13729286 0.14961947 0.14173721 0.11682889 0.08382473]
@test_approx_eq_eps logpdf(d, 55:60) log(expected) 1e-7
@test_approx_eq_eps pdf(d, 55:60) expected 1e-7
@test_approx_eq_eps cdf(d, 55:60) cumsum(expected,2)+cdf(d, 54) 1e-7
4 changes: 2 additions & 2 deletions test/noncentralhypergeometric.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ ref = Hypergeometric(ns,nf,n)
@test_approx_eq_eps cdf(d, 51) cdf(ref, 51) 1e-7
@test_approx_eq_eps quantile(d, 0.05) quantile(ref, 0.05) 1e-7
@test_approx_eq_eps quantile(d, 0.95) quantile(ref, 0.95) 1e-7
@test mode(d) == 57
@test mode(d) == mode(ref)

## Wallenius' noncentral hypergeometric distribution
ns = 80
Expand Down Expand Up @@ -81,4 +81,4 @@ ref = Hypergeometric(ns,nf,n)
@test_approx_eq_eps cdf(d, 51) cdf(ref, 51) 1e-7
@test_approx_eq_eps quantile(d, 0.05) quantile(ref, 0.05) 1e-7
@test_approx_eq_eps quantile(d, 0.95) quantile(ref, 0.95) 1e-7
@test mode(d) == 57
@test mode(d) == mode(ref)
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ tests = [
"truncate",
"multinomial",
"dirichlet",
"hypergeometric",
"multivariate",
"mvnormal",
"mvtdist",
Expand Down
1 change: 1 addition & 0 deletions test/univariate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ distlist = [Arcsine(),
Hypergeometric(3.0, 2.0, 2.0),
Hypergeometric(2.0, 3.0, 2.0),
Hypergeometric(2.0, 2.0, 3.0),
Hypergeometric(60.0, 80.0, 100.0),
InverseGaussian(1.0,1.0),
InverseGaussian(2.0,7.0),
InverseGamma(1.0, 1.0),
Expand Down