diff --git a/lib/statistics/distribution/bernoulli.rb b/lib/statistics/distribution/bernoulli.rb new file mode 100644 index 0000000..30ab789 --- /dev/null +++ b/lib/statistics/distribution/bernoulli.rb @@ -0,0 +1,35 @@ +module Statistics + module Distribution + class Bernoulli + def self.density_function(n, p) + return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}. + + case n + when 0 then 1.0 - p + when 1 then p + end + end + + def self.cumulative_function(n, p) + return if n != 0 && n != 1 # The support of the distribution is n = {0, 1}. + + case n + when 0 then 1.0 - p + when 1 then 1.0 + end + end + + def self.variance(p) + p * (1.0 - p) + end + + def self.skewness(p) + (1.0 - 2.0*p).to_f / Math.sqrt(p * (1.0 - p)) + end + + def self.kurtosis(p) + (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p)) + end + end + end +end diff --git a/lib/statistics/distribution/geometric.rb b/lib/statistics/distribution/geometric.rb new file mode 100644 index 0000000..fe59eb5 --- /dev/null +++ b/lib/statistics/distribution/geometric.rb @@ -0,0 +1,76 @@ +module Statistics + module Distribution + class Geometric + attr_accessor :probability_of_success, :always_success_allowed + + def initialize(p, always_success: false) + self.probability_of_success = p.to_f + self.always_success_allowed = always_success + end + + def density_function(k) + k = k.to_i + + if always_success_allowed + return if k < 0 + + ((1.0 - probability_of_success) ** k) * probability_of_success + else + return if k <= 0 + + ((1.0 - probability_of_success) ** (k - 1.0)) * probability_of_success + end + end + + def cumulative_function(k) + k = k.to_i + + if always_success_allowed + return if k < 0 + + 1.0 - ((1.0 - probability_of_success) ** (k + 1.0)) + else + return if k <= 0 + + 1.0 - ((1.0 - probability_of_success) ** k) + end + end + + def mean + if always_success_allowed + (1.0 - probability_of_success) / probability_of_success + else + 1.0 / probability_of_success + end + end + + def median + if always_success_allowed + (-1.0 / Math.log2(1.0 - probability_of_success)).ceil - 1.0 + else + (-1.0 / Math.log2(1.0 - probability_of_success)).ceil + end + end + + def mode + if always_success_allowed + 0.0 + else + 1.0 + end + end + + def variance + (1.0 - probability_of_success) / (probability_of_success ** 2) + end + + def skewness + (2.0 - probability_of_success) / Math.sqrt(1.0 - probability_of_success) + end + + def kurtosis + 6.0 + ((probability_of_success ** 2) / (1.0 - probability_of_success)) + end + end + end +end diff --git a/lib/statistics/distribution/logseries.rb b/lib/statistics/distribution/logseries.rb new file mode 100644 index 0000000..359f9c6 --- /dev/null +++ b/lib/statistics/distribution/logseries.rb @@ -0,0 +1,51 @@ +module Statistics + module Distribution + class LogSeries + def self.density_function(k, p) + return if k <= 0 + k = k.to_i + + left = (-1.0 / Math.log(1.0 - p)) + right = (p ** k).to_f + + left * right / k + end + + def self.cumulative_function(k, p) + return if k <= 0 + + # Sadly, the incomplete beta function is converging + # too fast to zero and breaking the calculation on logs. + # So, we default to the basic definition of the CDF which is + # the integral (-Inf, K) of the PDF, with P(X <= x) which can + # be solved as a summation of all PDFs from 1 to K. Note that the summation approach + # only applies to discrete distributions. + # + # right = Math.incomplete_beta_function(p, (k + 1).floor, 0) / Math.log(1.0 - p) + # 1.0 + right + + result = 0.0 + 1.upto(k) do |number| + result += self.density_function(number, p) + end + + result + end + + def self.mode + 1.0 + end + + def self.mean(p) + (-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p)) + end + + def self.variance(p) + up = p + Math.log(1.0 - p) + down = ((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2) + + (-1.0 * p) * (up / down.to_f) + end + end + end +end diff --git a/lib/statistics/distribution/negative_binomial.rb b/lib/statistics/distribution/negative_binomial.rb new file mode 100644 index 0000000..34dc451 --- /dev/null +++ b/lib/statistics/distribution/negative_binomial.rb @@ -0,0 +1,51 @@ +module Statistics + module Distribution + class NegativeBinomial + attr_accessor :number_of_failures, :probability_per_trial + + def initialize(r, p) + self.number_of_failures = r.to_i + self.probability_per_trial = p + end + + def probability_mass_function(k) + return if number_of_failures < 0 || k < 0 || k > number_of_failures + + left = Math.combination(k + number_of_failures - 1, k) + right = ((1 - probability_per_trial) ** number_of_failures) * (probability_per_trial ** k) + + left * right + end + + def cumulative_function(k) + return if k < 0 || k > number_of_failures + k = k.to_i + + 1.0 - Math.incomplete_beta_function(probability_per_trial, k + 1, number_of_failures) + end + + def mean + (probability_per_trial * number_of_failures)/(1 - probability_per_trial).to_f + end + + def variance + (probability_per_trial * number_of_failures)/((1 - probability_per_trial) ** 2).to_f + end + + def skewness + (1 + probability_per_trial).to_f / Math.sqrt(probability_per_trial * number_of_failures) + end + + def mode + if number_of_failures > 1 + up = probability_per_trial * (number_of_failures - 1) + down = (1 - probability_per_trial).to_f + + (up/down).floor + elsif number_of_failures <= 1 + 0.0 + end + end + end + end +end diff --git a/lib/statistics/version.rb b/lib/statistics/version.rb index 9cc9d3a..bf1bdf4 100644 --- a/lib/statistics/version.rb +++ b/lib/statistics/version.rb @@ -1,3 +1,3 @@ module Statistics - VERSION = "2.0.4" + VERSION = "2.0.5" end diff --git a/spec/statistics/distribution/bernoulli_spec.rb b/spec/statistics/distribution/bernoulli_spec.rb new file mode 100644 index 0000000..f39bb12 --- /dev/null +++ b/spec/statistics/distribution/bernoulli_spec.rb @@ -0,0 +1,62 @@ +require 'spec_helper' + +describe Statistics::Distribution::Bernoulli do + describe '.density_function' do + it 'is not defined when the outcome is different from zero or one' do + expect(described_class.density_function(rand(2..10), rand)).to be_nil + expect(described_class.density_function(rand(-5..-1), rand)).to be_nil + end + + it 'returns the expected value when the outcome is zero' do + p = rand + expect(described_class.density_function(0, p)).to eq (1.0 - p) + end + + it 'returns the expected value when the outcome is one' do + p = rand + expect(described_class.density_function(1, p)).to eq p + end + end + + describe '.cumulative_function' do + it 'is not defined when the outcome is different from zero or one' do + expect(described_class.cumulative_function(rand(2..10), rand)).to be_nil + expect(described_class.density_function(rand(-5..-1), rand)).to be_nil + end + + it 'returns the expected value when the outcome is zero' do + p = rand + expect(described_class.cumulative_function(0, p)).to eq (1.0 - p) + end + + it 'returns the expected value when the outcome is one' do + expect(described_class.cumulative_function(1, rand)).to eq 1.0 + end + end + + describe '.variance' do + it 'returns the expected value for the bernoulli distribution' do + p = rand + + expect(described_class.variance(p)).to eq p * (1.0 - p) + end + end + + describe '.skewness' do + it 'returns the expected value for the bernoulli distribution' do + p = rand + expected_value = (1.0 - 2.0*p).to_f / Math.sqrt(p * (1.0 - p)) + + expect(described_class.skewness(p)).to eq expected_value + end + end + + describe '.kurtosis' do + it 'returns the expected value for the bernoulli distribution' do + p = rand + expected_value = (6.0 * (p ** 2) - (6 * p) + 1) / (p * (1.0 - p)) + + expect(described_class.kurtosis(p)).to eq expected_value + end + end +end diff --git a/spec/statistics/distribution/beta_spec.rb b/spec/statistics/distribution/beta_spec.rb index 921ba81..6fa6f2d 100644 --- a/spec/statistics/distribution/beta_spec.rb +++ b/spec/statistics/distribution/beta_spec.rb @@ -62,11 +62,21 @@ expect(described_class.new(alpha, beta).mean).to be_nil end + it 'returns nil if the sum of alpha and beta is zero' do + alpha = -1 + beta = 1 + + expect(described_class.new(alpha, beta).mean).to be_nil + end + it 'calculates the expected mean for the beta distribution' do alpha = rand(-5..5) beta = rand(-5..5) - alpha = 1 if alpha + beta == 0 # To avoid NaN results. + if alpha + beta == 0 # To avoid NaN results. + alpha = 1 + beta = 1 + end expect(described_class.new(alpha, beta).mean).to eq alpha.to_f/(alpha.to_f + beta.to_f) end diff --git a/spec/statistics/distribution/geometric_spec.rb b/spec/statistics/distribution/geometric_spec.rb new file mode 100644 index 0000000..fd38af7 --- /dev/null +++ b/spec/statistics/distribution/geometric_spec.rb @@ -0,0 +1,170 @@ +require 'spec_helper' + +describe Statistics::Distribution::Geometric do + describe '#initialize' do + it 'creates a distribution that does not allow probabilities of 100 % chance by default' do + expect(described_class.new(rand).always_success_allowed).to be false + end + + it 'creates a distribution that allows 100 % probabilities when specified' do + expect(described_class.new(rand, always_success: true).always_success_allowed).to be true + end + end + + context 'when a probability of 100 % chance is allowed' do + let(:p) { rand } + let(:geometric) { described_class.new(p, always_success: true) } + + describe '#mean' do + it 'returns the expected value for the geometric distribution' do + expect(geometric.mean).to eq (1.0 - p) / p + end + end + + describe '#median' do + it 'returns the expected value for the geometric distribution' do + expect(geometric.median).to eq (-1.0 / Math.log2(1.0 - p)).ceil - 1.0 + end + end + + describe '#mode' do + it 'returns the expected value for the geometric distribution' do + expect(geometric.mode).to be_zero + end + end + + describe '#cumulative_function' do + it 'is not defined if the number of trials is negative' do + expect(geometric.cumulative_function(rand(-10...0))).to be_nil + end + + it 'is defined if the number of trials is zero' do + expect(geometric.cumulative_function(0)).not_to be_nil + end + + it 'returns the expected values for the geometric distribution' do + # Results from R: + # pgeom(c(1,2,3,4,5), 0.5) + # [1] 0.750000 0.875000 0.937500 0.968750 0.984375 + + geometric.probability_of_success = 0.5 + results = [0.750000, 0.875000, 0.937500, 0.968750, 0.984375] + + 1.upto(5) do |trial| + expect(geometric.cumulative_function(trial)).to eq results[trial - 1] + end + end + end + + describe '#density_function' do + it 'is not defined if the number of trials is negative' do + expect(geometric.density_function(rand(-10...0))).to be_nil + end + + it 'is defined if the number of trials is zero' do + expect(geometric.density_function(0)).not_to be_nil + end + + it 'returns the expected values for the geometric distribution' do + # Results from R: + # dgeom(c(1,2,3,4,5), 0.5) + # [1] 0.250000 0.125000 0.062500 0.031250 0.015625 + + geometric.probability_of_success = 0.5 + results = [0.250000, 0.125000, 0.062500, 0.031250, 0.015625] + + 1.upto(5) do |trial| + expect(geometric.density_function(trial)).to eq results[trial - 1] + end + end + end + end + + context 'when a probability of 100 % chance is not allowed' do + let(:p) { rand } + let(:geometric) { described_class.new(p, always_success: false) } + + describe '#mean' do + it 'returns the expected value for the geometric distribution' do + expect(geometric.mean).to eq 1.0 / p + end + end + + describe '#median' do + it 'returns the expected value for the geometric distribution' do + expect(geometric.median).to eq (-1.0 / Math.log2(1.0 - p)).ceil + end + end + + describe '#mode' do + it 'returns the expected value for the geometric distribution' do + expect(geometric.mode).to eq 1.0 + end + end + + describe '#cumulative_function' do + it 'is not defined if the number of trials is negative' do + expect(geometric.cumulative_function(rand(-10...0))).to be_nil + end + + it 'is not defined if the number of trials is zero' do + expect(geometric.cumulative_function(0)).to be_nil + end + + it 'returns the expected values for the geometric distribution' do + ## We don't have a way to compare against R results because + # the geometric distribution in R is calculated with p <= 1 + + k = rand(1..10) + + expect(geometric.cumulative_function(k)).to eq (1.0 - ((1.0 - p) ** k)) + end + end + + describe '#density_function' do + it 'is not defined if the number of trials is negative' do + expect(geometric.density_function(rand(-10...0))).to be_nil + end + + it 'is not defined if the number of trials is zero' do + expect(geometric.density_function(0)).to be_nil + end + + it 'returns the expected values for the geometric distribution' do + ## We don't have a way to compare against R results because + # the geometric distribution in R is calculated with p <= 1 + + k = rand(1..10) + + expect(geometric.density_function(k)).to eq ((1.0 - p) ** (k - 1.0)) * p + end + end + end + + describe '#variance' do + it 'returns the expected value for the geometric distribution' do + p = rand + geometric = described_class.new(p) + + expect(geometric.variance).to eq (1.0 - p) / (p ** 2) + end + end + + describe '#skewness' do + it 'returns the expected value for the geometric distribution' do + p = rand + geometric = described_class.new(p) + + expect(geometric.skewness).to eq (2.0 - p) / Math.sqrt(1.0 - p) + end + end + + describe '#kurtosis' do + it 'returns the expected value for the geometric distribution' do + p = rand + geometric = described_class.new(p) + + expect(geometric.kurtosis).to eq (6.0 + ((p ** 2) / (1.0 - p))) + end + end +end diff --git a/spec/statistics/distribution/logseries_spec.rb b/spec/statistics/distribution/logseries_spec.rb new file mode 100644 index 0000000..a3eea05 --- /dev/null +++ b/spec/statistics/distribution/logseries_spec.rb @@ -0,0 +1,67 @@ +require 'spec_helper' + +describe Statistics::Distribution::LogSeries do + describe '.density_function' do + it 'is not defined for negative events' do + expect(described_class.density_function(rand(-5..-1), rand)).to be_nil + end + + it 'is not defined for zero events' do + expect(described_class.density_function(0, rand)).to be_nil + end + + it 'returns the expected result for the logarithmic distribution' do + # Results extracted from http://calculator.vhex.net/calculator/probability/log-series-discrete-probability-mass + # for x = 1, 2, 3, 4, 5 and p = 0.5 + + results = [0.721348, 0.180337, 0.060112, 0.022542, 0.009017] + + 1.upto(5) do |number| + expect(described_class.density_function(number, 0.5).round(6)).to eq results[number - 1] + end + end + end + + describe '.cumulative_function' do + it 'is not defined for negative events' do + expect(described_class.cumulative_function(rand(-5..-1), rand)).to be_nil + end + + it 'is not defined for zero events' do + expect(described_class.cumulative_function(0, rand)).to be_nil + end + + it 'returns the expected result for the logarithmic distribution' do + # Results extracted from http://calculator.vhex.net/calculator/probability/log-series-discrete-cumulative-distribution + # for x = 1, 2, 3, 4, 5 and p = 0.5 + + results = [0.721348, 0.901684, 0.961797, 0.984339, 0.993356] + + 1.upto(5) do |number| + expect(described_class.cumulative_function(number, 0.5).round(6)).to eq results[number - 1] + end + end + end + + describe '.mode' do + it 'returns 1.0' do + expect(described_class.mode).to eq 1.0 + end + end + + describe '.mean' do + it 'returns the expected value for the logseries distribution' do + p = rand + expect(described_class.mean(p)).to eq ((-1.0 / Math.log(1.0 - p)) * (p / (1.0 - p))) + end + end + + describe '.variance' do + it 'returns the expected value for the logseries distribution' do + p = rand + result = (-1.0 * p) * ((p + Math.log(1.0 - p)) / (((1.0 - p) ** 2) * (Math.log(1.0 - p) ** 2))) + + expect(described_class.variance(p)).to eq result + end + end +end diff --git a/spec/statistics/distribution/negative_binomial_spec.rb b/spec/statistics/distribution/negative_binomial_spec.rb new file mode 100644 index 0000000..25de899 --- /dev/null +++ b/spec/statistics/distribution/negative_binomial_spec.rb @@ -0,0 +1,96 @@ +require 'spec_helper' + +describe Statistics::Distribution::NegativeBinomial do + describe '#probability_mass_function' do + it 'is not defined for negative values' do + expect(described_class.new(0, 0).probability_mass_function(rand(-10...0))).to be_nil + end + + it 'is not defined for values greater than the number of defined failures before success' do + k = rand(10) + p = rand + expect(described_class.new(k, p).probability_mass_function(k + 1)).to be_nil + end + + it 'returns the expected results for the negative binomial distribution' do + # Results from R: + # dnbinom(c(1,2,3,4,5), size = 10, prob = 0.5) + # [1] 0.004882812 0.013427734 0.026855469 0.043640137 0.061096191 + + results = [0.00488281, 0.01342773, 0.02685547, 0.04364014, 0.06109619] + binomial = described_class.new(10, 0.5) # Number of failures: 10, probability per trial: 0.5 + + (1..5).each_with_index do |number, index| + expect(binomial.probability_mass_function(number).round(8)).to eq results[index] + end + end + end + + describe '#cumulative_function' do + it 'is not defined for negative values' do + expect(described_class.new(0, 0).cumulative_function(rand(-10...0))).to be_nil + end + + it 'is not defined for values greater than the number of defined failures before success' do + k = rand(10) + p = rand + expect(described_class.new(k, p).cumulative_function(k + 1)).to be_nil + end + + it 'returns the expected results for the negative binomial distribution' do + # Results from R: + # pnbinom(c(1,2,3,4,5), size = 10, prob = 0.5) + # [1] 0.005859375 0.019287109 0.046142578 0.089782715 0.150878906 + + results = [0.005859375, 0.019287109, 0.046142578, 0.089782715, 0.150878906] + binomial = described_class.new(10, 0.5) # Number of trials: 10, probability per trial: 0.5 + + (1..5).each_with_index do |number, index| + expect(binomial.cumulative_function(number).round(9)).to eq results[index] + end + end + end + + describe '#mean' do + it 'returns the expected mean for the specified values' do + n = rand(10) + p = rand + + expect(described_class.new(n, p).mean).to eq (p * n)/(1 - p).to_f + end + end + + describe '#variance' do + it 'returns the expected variance for the specified values' do + n = rand(10) + p = rand + + expect(described_class.new(n, p).variance).to eq (p * n)/((1 - p) ** 2).to_f + end + end + + describe '#mode' do + it 'returns 0.0 if the number of failures before a success is less or equal than one' do + n = rand(-10..1) + p = rand + + expect(described_class.new(n, p).mode).to eq 0.0 + end + + it 'calculates the mode if the number of failures before a success is greather than one' do + n = 2 + rand(10) + p = rand + + expect(described_class.new(n, p).mode).to eq ((p * (n - 1))/(1 - p).to_f).floor + end + end + + describe '#skewness' do + it 'calculates the skewness for the negative binomial distribution' do + n = rand(10) + p = rand + + expect(described_class.new(n, p).skewness).to eq ((1 + p).to_f / Math.sqrt(p * n)) + end + end +end