-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathdistributions.py
385 lines (322 loc) · 15.4 KB
/
distributions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
"""Extension template for probability distributions - simple pattern."""
# todo: write an informative docstring for the file or module, remove the above
# todo: add an appropriate copyright notice for your estimator
# estimators contributed to skpro should have the copyright notice at the top
# estimators of your own do not need to have permissive or BSD-3 copyright
# todo: uncomment the following line, enter authors' GitHub IDs
# __author__ = [authorGitHubID, anotherAuthorGitHubID]
from skpro.distributions.base import BaseDistribution
# todo: add any necessary imports here - no soft dependency imports
# todo: for imports of skpro soft dependencies:
# make sure to fill in the "python_dependencies" tag with the package import name
# import soft dependencies only inside methods of the class, not at the top of the file
# todo: change class name and write docstring
class ClassName(BaseDistribution):
"""Custom probability distribution. todo: write docstring.
todo: describe your custom probability distribution here
Parameters
----------
parama : float or np.ndarray
descriptive explanation of parama
paramb : float or np.ndarray, optional (default='default')
descriptive explanation of paramb
"""
# todo: fill out estimator tags here
# tags are inherited from parent class if they are not set
# tags inherited from base are "safe defaults" which can usually be left as-is
_tags = {
# packaging info
# --------------
"authors": ["author1", "author2"], # authors, GitHub handles
"maintainers": ["maintainer1", "maintainer2"], # maintainers, GitHub handles
# author = significant contribution to code at some point
# maintainer = algorithm maintainer role, "owner"
# specify one or multiple authors and maintainers, only for skpro contribution
# remove maintainer tag if maintained by skpro/sktim core team
#
"python_version": None, # PEP 440 python version specifier to limit versions
"python_dependencies": None, # PEP 440 python dependencies specifier,
# e.g., "numba>0.53", or a list, e.g., ["numba>0.53", "numpy>=1.19.0"]
# delete if no python dependencies or version limitations
#
# estimator tags
# --------------
"distr:measuretype": "continuous", # one of "discrete", "continuous", "mixed"
# these tags should correspond to which methods are numerically exact
# and which are approximations, e.g., using Monte Carlo
"capabilities:approx": ["pdfnorm", "energy"],
"capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf", "ppf"],
# leave the broadcast_init tag as-is, this tag exists for compatibility with
# distributions deviating from assumptions on input parameters, e.g., Empirical
"broadcast_init": "on",
}
# todo: fill init
# params should be written to self and never changed
# super call must not be removed, change class name
# parameter checks can go after super call
def __init__(self, param1, param2="param2default", index=None, columns=None):
# all distributions must have index and columns arg with None defaults
# this is to ensure pandas-like behaviour
# todo: write any hyper-parameters and components to self
self.param1 = param1
self.param2 = param2
# leave this as is
super().__init__(index=index, columns=columns)
# todo: optional, parameter checking logic (if applicable) should happen here
# if writes derived values to self, should *not* overwrite self.parama etc
# instead, write to self._parama, self._newparam (starting with _)
# todo: implement as many of the following methods as possible
# if not implemented, the base class will try to fill it in
# from the other implemented methods
# at least _ppf, or sample should be implemented for the distribution to be usable
# if _ppf is implemented, sample does not need to be implemented (uses ppf sampling)
# todo: consider implementing
# if not implemented, uses Monte Carlo estimate via sample
def _mean(self):
"""Return expected value of the distribution.
Returns
-------
2D np.ndarray, same shape as ``self``
expected value of distribution (entry-wise)
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing
# if not implemented, uses Monte Carlo estimate via sample
def _var(self):
r"""Return element/entry-wise variance of the distribution.
Returns
-------
2D np.ndarray, same shape as ``self``
variance of the distribution (entry-wise)
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing - only for continuous or mixed distributions
# at least one of _pdf and _log_pdf should be implemented
# if not implemented, returns exp of log_pdf
def _pdf(self, x):
"""Probability density function.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pdf at
Returns
-------
2D np.ndarray, same shape as ``self``
pdf values at the given points
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing - only for continuous or mixed distributions
# at least one of _pdf and _log_pdf should be implemented
# if not implemented, returns log of pdf
def _log_pdf(self, x):
"""Logarithmic probability density function.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pdf at
Returns
-------
2D np.ndarray, same shape as ``self``
log pdf values at the given points
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing - only for discrete or mixed distributions
# at least one of _pmf and _log_pmf should be implemented
# if not implemented, returns exp of log_pmf
def _pmf(self, x):
"""Probability mass function.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pmf at
Returns
-------
2D np.ndarray, same shape as ``self``
pmf values at the given points
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing - only for discrete or mixed distributions
# at least one of _pmf and _log_pmf should be implemented
# if not implemented, returns log of pmf
def _log_pmf(self, x):
"""Logarithmic probability mass function.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pmf at
Returns
-------
2D np.ndarray, same shape as ``self``
log pmf values at the given points
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing
# at least one of _ppf and sample must be implemented
# if not implemented, uses Monte Carlo estimate based on sample
def _cdf(self, x):
"""Cumulative distribution function.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the cdf at
Returns
-------
2D np.ndarray, same shape as ``self``
cdf values at the given points
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing
# at least one of _ppf and sample must be implemented
# if not implemented, uses bisection method on cdf
def _ppf(self, p):
"""Quantile function = percent point function = inverse cdf.
Parameters
----------
p : 2D np.ndarray, same shape as ``self``
values to evaluate the ppf at
Returns
-------
2D np.ndarray, same shape as ``self``
ppf values at the given points
"""
pass
# todo: consider implementing
# if not implemented, uses Monte Carlo estimate via sample
def _energy_self(self):
r"""Energy of self, w.r.t. self.
:math:`\mathbb{E}[|X-Y|]`, where :math:`X, Y` are i.i.d. copies of self.
Private method, to be implemented by subclasses.
Returns
-------
2D np.ndarray, same shape as ``self``
energy values w.r.t. the given points
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing
# if not implemented, uses Monte Carlo estimate via sample
def _energy_x(self, x):
r"""Energy of self, w.r.t. a constant frame x.
:math:`\mathbb{E}[|X-x|]`, where :math:`X` is a copy of self,
and :math:`x` is a constant.
Private method, to be implemented by subclasses.
Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to compute energy w.r.t. to
Returns
-------
2D np.ndarray, same shape as ``self``
energy values w.r.t. the given points
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: consider implementing
# at least one of _ppf and sample must be implemented
# if not implemented, uses _ppf for sampling (inverse cdf on uniform)
def sample(self, n_samples=None):
"""Sample from the distribution.
Parameters
----------
n_samples : int, optional, default = None
number of samples to draw from the distribution
Returns
-------
pd.DataFrame
samples from the distribution
* if ``n_samples`` is ``None``:
returns a sample that contains a single sample from ``self``,
in ``pd.DataFrame`` mtype format convention, with ``index`` and ``columns``
as ``self``
* if n_samples is ``int``:
returns a ``pd.DataFrame`` that contains ``n_samples`` i.i.d.
samples from ``self``, in ``pd-multiindex`` mtype format convention,
with same ``columns`` as ``self``, and row ``MultiIndex`` that is product
of ``RangeIndex(n_samples)`` and ``self.index``
"""
param1 = self._bc_params["param1"] # returns broadcast params to x.shape
param2 = self._bc_params["param2"] # returns broadcast params to x.shape
res = "do_sth_with(" + param1 + param2 + ")" # replace this by internal logic
return res
# todo: return default parameters, so that a test instance can be created
# required for automated unit and integration testing of estimator
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
# todo: set the testing parameters for the estimators
# Testing parameters can be dictionary or list of dictionaries
#
# this can, if required, use:
# class properties (e.g., inherited); parent class test case
# imported objects such as estimators from skpro or sklearn
# important: all such imports should be *inside get_test_params*, not at the top
# since imports are used only at testing time
#
# A parameter dictionary must be returned *for all values* of parameter_set,
# i.e., "parameter_set not available" errors should never be raised.
#
# A good parameter set should primarily satisfy two criteria,
# 1. Chosen set of parameters should have a low testing time,
# ideally in the magnitude of few seconds for the entire test suite.
# This is vital for the cases where default values result in
# "big" models which not only increases test time but also
# run into the risk of test workers crashing.
# 2. There should be a minimum two such parameter sets with different
# sets of values to ensure a wide range of code coverage is provided.
#
# example 1: specify params as dictionary
# any number of params can be specified
# params = {"est": value0, "parama": value1, "paramb": value2}
#
# example 2: specify params as list of dictionary
# note: Only first dictionary will be used by create_test_instance
# params = [{"est": value1, "parama": value2},
# {"est": value3, "parama": value4}]
#
# example 3: parameter set depending on param_set value
# note: only needed if a separate parameter set is needed in tests
# if parameter_set == "special_param_set":
# params = {"est": value1, "parama": value2}
# return params
#
# # "default" params
# params = {"est": value3, "parama": value4}
# return params