-
Notifications
You must be signed in to change notification settings - Fork 2
/
poisson.py
109 lines (93 loc) · 3.17 KB
/
poisson.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import json
from load_data import make_freq_dict
from bayesian_blocks import bayesian_blocks
def find_lamhdas(x,bins):
'''
Parameters:
-----------
=> x = train array
=> bins = bins estimated by using bayesian blocks on train data
Function:
----------
Used to find the lambda parameters for each bin.
Output:
-------
returns list of lambdas.
'''
bin_num =[]
bin_wd=[]
lambdas=[]
for i in range(len(bins)-1):
num=0
for j in x:
if j>=bins[i] and j<bins[i+1]:
num+=1
if i==len(bins)-2 and j== bins[i+1]:
num+=1
bin_wd.append(abs(bins[i]-bins[i+1]))
bin_num.append(num)
lambdas.append((num/abs(bins[i]-bins[i+1])))
return lambdas
def likeli_poisson(x_test,lambdas,bins):
'''
Parameters:
-----------
=> x_test = test array
=> lambdas = list of lambdas estimated for each bin on train data.
=> bins = bins estimated by using bayesian blocks on train data
Function:
----------
Used to find the Log Likelihood of the test data, using paramters estimated on train data.
Output:
-------
returns likelihood value.
'''
likli =0
for i in range(len(bins)-1):
for j in x_test:
if j>=bins[i] and j<bins[i+1]:
likli+= (j)*np.log(lambdas[i])-lambdas[i]#ℓ(λ)=ln (f(x|λ))=−nλ+tlnλ.,n=1, likelihood of that point
if i==len(bins)-2 and j== bins[i+1]:
likli+= (j)*np.log(lambdas[i])-lambdas[i] #ℓ(λ)=ln (f(x|λ))=−nλ+tlnλ.,n=1, likelihood of that point
return likli
def poisson_bay_block(tr,tes,k,gammas,iter):
'''
Parameters:
-----------
=> tr = train data
=> tes = test data
=> k = value of factor in prior function (1-gamma)/(1-gamma**(N//2**k))
=> gammas = list of gammas to iterate over
=> iter = number of times the experiment has to be performed
Function:
----------
Used to estimate the parameters of poisson on train data.
And calculate the log likelihood for test data for varying gamma.
Output:
-------
returns likelihood list with parameters.
'''
total_likeli=[]
dumper={}
for gamma in gammas:
likeli_poi=[]
fold=0
for_best =[]
for i in range(iter):
X_train = tr[i]
X_test =tes[i]
bin_edges = bayesian_blocks(X_train,fitness='poisson',lam=k,gamma=gamma)
lambdas = find_lamhdas(X_train,bin_edges)
likli=likeli_poisson(X_test,lambdas,bin_edges)
tr_likeli= likeli_poisson(X_train,lambdas,bin_edges)
likeli_poi.append([fold,-likli,len(bin_edges)-1])#negetive log likeli
for_best.append(-likli)
fold+=1
total_likeli.append([gamma,likeli_poi])
mu = np.mean(for_best)
sig = np.std(for_best)
dumper[gamma]=mu/sig
with open("./select_best/poi_mu_sig_"+str(len(tes[0]))+".json", "w") as write_file:
json.dump(dumper, write_file)
return total_likeli