-
-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add HyperGeometric Distribution to pymc3.distributions.discrete #3504 #4108
Changes from 5 commits
0e865dc
7d12a7b
a2bffe7
fec3029
a23960f
bd6e7fb
d797ef7
c708e2f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -28,7 +28,8 @@ | |||||||||||||||||||
__all__ = ['Binomial', 'BetaBinomial', 'Bernoulli', 'DiscreteWeibull', | ||||||||||||||||||||
'Poisson', 'NegativeBinomial', 'ConstantDist', 'Constant', | ||||||||||||||||||||
'ZeroInflatedPoisson', 'ZeroInflatedBinomial', 'ZeroInflatedNegativeBinomial', | ||||||||||||||||||||
'DiscreteUniform', 'Geometric', 'Categorical', 'OrderedLogistic'] | ||||||||||||||||||||
'DiscreteUniform', 'Geometric', 'HyperGeometric', 'Categorical', | ||||||||||||||||||||
'OrderedLogistic'] | ||||||||||||||||||||
|
||||||||||||||||||||
|
||||||||||||||||||||
class Binomial(Discrete): | ||||||||||||||||||||
|
@@ -819,6 +820,110 @@ def _repr_latex_(self, name=None, dist=None): | |||||||||||||||||||
get_variable_name(p)) | ||||||||||||||||||||
|
||||||||||||||||||||
|
||||||||||||||||||||
class HyperGeometric(Discrete): | ||||||||||||||||||||
R""" | ||||||||||||||||||||
Hypergeometric log-likelihood. | ||||||||||||||||||||
|
||||||||||||||||||||
The probability of x successes in a sequence of n Bernoulli | ||||||||||||||||||||
trials (That is, sample size = n) - where the population | ||||||||||||||||||||
size is N, containing a total of k successful individuals. | ||||||||||||||||||||
The process is carried out without replacement. | ||||||||||||||||||||
Comment on lines
+863
to
+866
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nitpick. Not a blocking comment. |
||||||||||||||||||||
|
||||||||||||||||||||
The pmf of this distribution is | ||||||||||||||||||||
.. math:: f(x \mid N, n, k) = \frac{\binom{k}{x}\binom{N-k}{n-x}}{\binom{N}{n}} | ||||||||||||||||||||
.. plot:: | ||||||||||||||||||||
Comment on lines
+868
to
+870
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nitpick. Not a blocking comment |
||||||||||||||||||||
import matplotlib.pyplot as plt | ||||||||||||||||||||
import numpy as np | ||||||||||||||||||||
import scipy.stats as st | ||||||||||||||||||||
plt.style.use('seaborn-darkgrid') | ||||||||||||||||||||
x = np.arange(1, 15) | ||||||||||||||||||||
N = 50 | ||||||||||||||||||||
k = 10 | ||||||||||||||||||||
for n in [20, 25]: | ||||||||||||||||||||
pmf = st.hypergeom.pmf(x, N, k, n) | ||||||||||||||||||||
plt.plot(x, pmf, '-o', label='n = {}'.format(n)) | ||||||||||||||||||||
plt.plot(x, pmf, '-o', label='N = {}'.format(N)) | ||||||||||||||||||||
plt.plot(x, pmf, '-o', label='k = {}'.format(k)) | ||||||||||||||||||||
plt.xlabel('x', fontsize=12) | ||||||||||||||||||||
plt.ylabel('f(x)', fontsize=12) | ||||||||||||||||||||
plt.legend(loc=1) | ||||||||||||||||||||
plt.show() | ||||||||||||||||||||
|
||||||||||||||||||||
======== ============================= | ||||||||||||||||||||
Support :math:`x \in \mathbb{N}_{>0}` | ||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure this is right? On the Wikipedia page, support is given as |
||||||||||||||||||||
Mean :math:`\dfrac{n.k}{N}` | ||||||||||||||||||||
Variance :math:`\dfrac{(N-n).n.k.(N-k)}{(N-1).N^2}` | ||||||||||||||||||||
Comment on lines
+890
to
+891
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||
======== ============================= | ||||||||||||||||||||
|
||||||||||||||||||||
Parameters | ||||||||||||||||||||
---------- | ||||||||||||||||||||
N : integer | ||||||||||||||||||||
Total size of the population | ||||||||||||||||||||
n : integer | ||||||||||||||||||||
Number of samples drawn from the population | ||||||||||||||||||||
k : integer | ||||||||||||||||||||
Number of successful individuals in the population | ||||||||||||||||||||
""" | ||||||||||||||||||||
|
||||||||||||||||||||
def __init__(self, N, k, n, *args, **kwargs): | ||||||||||||||||||||
super().__init__(*args, **kwargs) | ||||||||||||||||||||
self.N = N = tt.as_tensor_variable(intX(N)) | ||||||||||||||||||||
self.k = k = tt.as_tensor_variable(intX(k)) | ||||||||||||||||||||
self.n = n = tt.as_tensor_variable(intX(n)) | ||||||||||||||||||||
Comment on lines
+906
to
+908
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nitpick. Not a blocking comment. |
||||||||||||||||||||
self.mode = intX(tt.floor((n + 1)*(k + 1)/(N + 2))) | ||||||||||||||||||||
|
||||||||||||||||||||
def random(self, point=None, size=None): | ||||||||||||||||||||
r""" | ||||||||||||||||||||
Draw random values from HyperGeometric distribution. | ||||||||||||||||||||
Parameters | ||||||||||||||||||||
---------- | ||||||||||||||||||||
Comment on lines
+913
to
+915
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nitpick. Not a blocking comment |
||||||||||||||||||||
point : dict, optional | ||||||||||||||||||||
Dict of variable values on which random values are to be | ||||||||||||||||||||
conditioned (uses default point if not specified). | ||||||||||||||||||||
size : int, optional | ||||||||||||||||||||
Desired size of random sample (returns one sample if not | ||||||||||||||||||||
specified). | ||||||||||||||||||||
Returns | ||||||||||||||||||||
------- | ||||||||||||||||||||
array | ||||||||||||||||||||
Comment on lines
+921
to
+924
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nitpick. Not a blocking comment |
||||||||||||||||||||
""" | ||||||||||||||||||||
N, n, k = draw_values([self.N, self.n, self.k], point=point, size=size) | ||||||||||||||||||||
return generate_samples(np.random.hypergeometric, N, n, k, | ||||||||||||||||||||
dist_shape=self.shape, | ||||||||||||||||||||
size=size) | ||||||||||||||||||||
|
||||||||||||||||||||
def logp(self, value): | ||||||||||||||||||||
r""" | ||||||||||||||||||||
Calculate log-probability of HyperGeometric distribution at specified value. | ||||||||||||||||||||
Parameters | ||||||||||||||||||||
---------- | ||||||||||||||||||||
Comment on lines
+933
to
+935
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nitpick. Not a blocking comment |
||||||||||||||||||||
value : numeric | ||||||||||||||||||||
Value(s) for which log-probability is calculated. If the log probabilities for multiple | ||||||||||||||||||||
values are desired the values must be provided in a numpy array or theano tensor | ||||||||||||||||||||
Returns | ||||||||||||||||||||
------- | ||||||||||||||||||||
Comment on lines
+938
to
+940
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Nitpick. Not a blocking comment |
||||||||||||||||||||
TensorVariable | ||||||||||||||||||||
""" | ||||||||||||||||||||
N = self.N | ||||||||||||||||||||
k = self.k | ||||||||||||||||||||
n = self.n | ||||||||||||||||||||
return bound(binomln(k, value) + binomln(N - k, n - value) - binomln(N, n), | ||||||||||||||||||||
0 <= k, k <= N, 0 <= n, 0 <= N, n - N + k <= value, 0 <= value, | ||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are already testing that 0 <= k and k <= N, so you should not have to include 0 <= N There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, point! Removing the unnecessary condition. |
||||||||||||||||||||
value <= k, value <= n) | ||||||||||||||||||||
|
||||||||||||||||||||
def _repr_latex_(self, name=None, dist=None): | ||||||||||||||||||||
if dist is None: | ||||||||||||||||||||
dist = self | ||||||||||||||||||||
N = dist.N | ||||||||||||||||||||
n = dist.n | ||||||||||||||||||||
k = dist.k | ||||||||||||||||||||
name = r'\text{%s}' % name | ||||||||||||||||||||
return r'${} \sim \text{{HyperGeometric}}(\mathit{{N}}={},~\mathit{{n}}={},~\mathit{{k}}={})$'.format(name, | ||||||||||||||||||||
get_variable_name(N), | ||||||||||||||||||||
get_variable_name(n), | ||||||||||||||||||||
get_variable_name(k)) | ||||||||||||||||||||
|
||||||||||||||||||||
|
||||||||||||||||||||
class DiscreteUniform(Discrete): | ||||||||||||||||||||
R""" | ||||||||||||||||||||
Discrete uniform distribution. | ||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't this better?