Add more discrete distributions (#25)

* distributions: discrete: Negative binomial. Add negative binomial distribution and unit tests. * distribution: discrete: Geometric distribution. This change adds the geometric distribution to the list of discrete functions. * distribution: discrete: Bernoulli distribution. * distribution: discrete: LogSeries distribution. * gem: Bump to version 2.0.5. * distribution: beta: Add uncovered case where alpha + beta == 0.
estebanz01 · Jul 4, 2018 · 4ca57b1 · 4ca57b1
1 parent c131d90
commit 4ca57b1
Show file tree

Hide file tree

Showing 10 changed files with 620 additions and 2 deletions.
diff --git a/lib/statistics/distribution/bernoulli.rb b/lib/statistics/distribution/bernoulli.rb
@@ -0,0 +1,35 @@
+module Statistics
+ module Distribution
+ class Bernoulli
+ def self.density_function(n, p)
+ return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}.
+
+ case n
+ when 0 then 1.0 - p
+ when 1 then p
+ end
+ end
+
+ def self.cumulative_function(n, p)
+ return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}.
+
+ case n
+ when 0 then 1.0 - p
+ when 1 then 1.0
+ end
+ end
+
+ def self.variance(p)
+ p * (1.0 - p)
+ end
+
+ def self.skewness(p)
+ (1.0 - 2.0*p).to_f / Math.sqrt(p * (1.0 - p))
+ end
+
+ def self.kurtosis(p)
+ (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p))
+ end
+ end
+ end
+end
diff --git a/lib/statistics/distribution/geometric.rb b/lib/statistics/distribution/geometric.rb
@@ -0,0 +1,76 @@
+module Statistics
+ module Distribution
+ class Geometric
+ attr_accessor :probability_of_success, :always_success_allowed
+
+ def initialize(p, always_success: false)
+ self.probability_of_success = p.to_f
+ self.always_success_allowed = always_success
+ end
+
+ def density_function(k)
+ k = k.to_i
+
+ if always_success_allowed
+ return if k < 0
+
+ ((1.0 - probability_of_success) ** k) * probability_of_success
+ else
+ return if k <= 0
+
+ ((1.0 - probability_of_success) ** (k - 1.0)) * probability_of_success
+ end
+ end
+
+ def cumulative_function(k)
+ k = k.to_i
+
+ if always_success_allowed
+ return if k < 0
+
+ 1.0 - ((1.0 - probability_of_success) ** (k + 1.0))
+ else
+ return if k <= 0
+
+ 1.0 - ((1.0 - probability_of_success) ** k)
+ end
+ end
+
+ def mean
+ if always_success_allowed
+ (1.0 - probability_of_success) / probability_of_success
+ else
+ 1.0 / probability_of_success
+ end
+ end
+
+ def median
+ if always_success_allowed
+ (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - 1.0
+ else
+ (-1.0 / Math.log2(1.0 - probability_of_success)).ceil
+ end
+ end
+
+ def mode
+ if always_success_allowed
+ 0.0
+ else
+ 1.0
+ end
+ end
+
+ def variance
+ (1.0 - probability_of_success) / (probability_of_success ** 2)
+ end
+
+ def skewness
+ (2.0 - probability_of_success) / Math.sqrt(1.0 - probability_of_success)
+ end
+
+ def kurtosis
+ 6.0 + ((probability_of_success ** 2) / (1.0 - probability_of_success))
+ end
+ end
+ end
+end
diff --git a/lib/statistics/distribution/logseries.rb b/lib/statistics/distribution/logseries.rb
@@ -0,0 +1,51 @@
+module Statistics
+ module Distribution
+ class LogSeries
+ def self.density_function(k, p)
+ return if k <= 0
+ k = k.to_i
+
+ left = (-1.0 / Math.log(1.0 - p))
+ right = (p ** k).to_f
+
+ left * right / k
+ end
+
+ def self.cumulative_function(k, p)
+ return if k <= 0
+
+ # Sadly, the incomplete beta function is converging
+ # too fast to zero and breaking the calculation on logs.
+ # So, we default to the basic definition of the CDF which is
+ # the integral (-Inf, K) of the PDF, with P(X <= x) which can
+ # be solved as a summation of all PDFs from 1 to K. Note that the summation approach
+ # only applies to discrete distributions.
+ #
+ # right = Math.incomplete_beta_function(p, (k + 1).floor, 0) / Math.log(1.0 - p)
+ # 1.0 + right
+
+ result = 0.0
+ 1.upto(k) do |number|
+ result += self.density_function(number, p)
+ end
+
+ result
+ end
+
+ def self.mode
+ 1.0
+ end
+
+ def self.mean(p)
+ (-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p))
+ end
+
+ def self.variance(p)
+ up = p + Math.log(1.0 - p)
+ down = ((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2)
+
+ (-1.0 * p) * (up / down.to_f)
+ end
+ end
+ end
+end
diff --git a/lib/statistics/distribution/negative_binomial.rb b/lib/statistics/distribution/negative_binomial.rb
@@ -0,0 +1,51 @@
+module Statistics
+ module Distribution
+ class NegativeBinomial
+ attr_accessor :number_of_failures, :probability_per_trial
+
+ def initialize(r, p)
+ self.number_of_failures = r.to_i
+ self.probability_per_trial = p
+ end
+
+ def probability_mass_function(k)
+ return if number_of_failures < 0 || k < 0 || k > number_of_failures
+
+ left = Math.combination(k + number_of_failures - 1, k)
+ right = ((1 - probability_per_trial) ** number_of_failures) * (probability_per_trial ** k)
+
+ left * right
+ end
+
+ def cumulative_function(k)
+ return if k < 0 || k > number_of_failures
+ k = k.to_i
+
+ 1.0 - Math.incomplete_beta_function(probability_per_trial, k + 1, number_of_failures)
+ end
+
+ def mean
+ (probability_per_trial * number_of_failures)/(1 - probability_per_trial).to_f
+ end
+
+ def variance
+ (probability_per_trial * number_of_failures)/((1 - probability_per_trial) ** 2).to_f
+ end
+
+ def skewness
+ (1 + probability_per_trial).to_f / Math.sqrt(probability_per_trial * number_of_failures)
+ end
+
+ def mode
+ if number_of_failures > 1
+ up = probability_per_trial * (number_of_failures - 1)
+ down = (1 - probability_per_trial).to_f
+
+ (up/down).floor
+ elsif number_of_failures <= 1
+ 0.0
+ end
+ end
+ end
+ end
+end
diff --git a/lib/statistics/version.rb b/lib/statistics/version.rb
@@ -1,3 +1,3 @@
 module Statistics
- VERSION = "2.0.4"
+ VERSION = "2.0.5"
 end
diff --git a/spec/statistics/distribution/bernoulli_spec.rb b/spec/statistics/distribution/bernoulli_spec.rb
@@ -0,0 +1,62 @@
+require 'spec_helper'
+
+describe Statistics::Distribution::Bernoulli do
+ describe '.density_function' do
+ it 'is not defined when the outcome is different from zero or one' do
+ expect(described_class.density_function(rand(2..10), rand)).to be_nil
+ expect(described_class.density_function(rand(-5..-1), rand)).to be_nil
+ end
+
+ it 'returns the expected value when the outcome is zero' do
+ p = rand
+ expect(described_class.density_function(0, p)).to eq (1.0 - p)
+ end
+
+ it 'returns the expected value when the outcome is one' do
+ p = rand
+ expect(described_class.density_function(1, p)).to eq p
+ end
+ end
+
+ describe '.cumulative_function' do
+ it 'is not defined when the outcome is different from zero or one' do
+ expect(described_class.cumulative_function(rand(2..10), rand)).to be_nil
+ expect(described_class.density_function(rand(-5..-1), rand)).to be_nil
+ end
+
+ it 'returns the expected value when the outcome is zero' do
+ p = rand
+ expect(described_class.cumulative_function(0, p)).to eq (1.0 - p)
+ end
+
+ it 'returns the expected value when the outcome is one' do
+ expect(described_class.cumulative_function(1, rand)).to eq 1.0
+ end
+ end
+
+ describe '.variance' do
+ it 'returns the expected value for the bernoulli distribution' do
+ p = rand
+
+ expect(described_class.variance(p)).to eq p * (1.0 - p)
+ end
+ end
+
+ describe '.skewness' do
+ it 'returns the expected value for the bernoulli distribution' do
+ p = rand
+ expected_value = (1.0 - 2.0*p).to_f / Math.sqrt(p * (1.0 - p))
+
+ expect(described_class.skewness(p)).to eq expected_value
+ end
+ end
+
+ describe '.kurtosis' do
+ it 'returns the expected value for the bernoulli distribution' do
+ p = rand
+ expected_value = (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p))
+
+ expect(described_class.kurtosis(p)).to eq expected_value
+ end
+ end
+end
diff --git a/spec/statistics/distribution/beta_spec.rb b/spec/statistics/distribution/beta_spec.rb
@@ -62,11 +62,21 @@
  expect(described_class.new(alpha, beta).mean).to be_nil
  end
 
+ it 'returns nil if the sum of alpha and beta is zero' do
+ alpha = -1
+ beta = 1
+
+ expect(described_class.new(alpha, beta).mean).to be_nil
+ end
+
  it 'calculates the expected mean for the beta distribution' do
  alpha = rand(-5..5)
  beta = rand(-5..5)
 
- alpha = 1 if alpha + beta == 0 # To avoid NaN results.
+ if alpha + beta == 0 # To avoid NaN results.
+ alpha = 1
+ beta = 1
+ end
 
  expect(described_class.new(alpha, beta).mean).to eq alpha.to_f/(alpha.to_f + beta.to_f)
  end