forked from UTK-ML-Dream-Team/accident-severity-prediction
-
Notifications
You must be signed in to change notification settings - Fork 3
/
postprocessing.py
135 lines (93 loc) · 4.23 KB
/
postprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
### NAIVE BAYES FUSION
# Inputs: ytest, ymodels should have multiple model results
# ymodels = []
# ymodels.append(ymodel_dt)
# ymodels.append(ymodel_LR)
# etc
import numpy as np
from itertools import product
import random
def NB_fusion(ytest, ymodels):
class0prob = [] # label probabilities for true class 0 for each of the different classifiers
class1prob = [] # label probabilities for true class 1 for each of the different classifiers
prior0 = len(ytest[ytest == 0]) / len(ytest) # prior probability for class 0
prior1 = len(ytest[ytest == 1]) / len(ytest) # prior probability for class 1
for i in range(len(ymodels)):
p0 = []
p1 = []
true0 = ymodels[i][np.where(ytest == 0)] # samples where true value is class 0
true0_lab0 = true0[true0 == 0] # true value is class 0 and label is 0
true0_lab1 = true0[true0 == 1] # true value is 0 and label is 1
p0.append(len(true0_lab0) / len(true0)) # p0[0], probability of label 0 given true 0
p0.append(len(true0_lab1) / len(true0)) # p0[1], probablity of label 1 given true 0
class0prob.append(p0)
true1 = ymodels[i][np.where(ytest == 1)] # samples where true value is class 1
true1_lab0 = true1[true1 == 0] # samples where true value is 1 and label is 0
true1_lab1 = true1[true1 == 1] # samples where true value is 1 and label is 1
p1.append(len(true1_lab0) / len(true1)) # p1[0], probability of label 0 given true 1
p1.append(len(true1_lab1) / len(true1)) # p1[1], probability of label 1 given true 1
class1prob.append(p1)
# possible combinations for labels as an array
classifier_labels = np.array(list(product([0, 1], repeat=len(ymodels))))
final_labels = []
post_probs = []
for c in classifier_labels: # for each combination of classifier labels
post = []
cond0 = 1
cond1 = 1
for i in range(len(class0prob)):
cond0 *= class0prob[i][c[i]]
post.append(prior0 * cond0) # post[0], calculate posterior probability of class 0
for i in range(len(class1prob)):
cond1 *= class1prob[i][c[i]]
post.append(prior1 * cond1) # post[1], calculate posterior probability of class 1
post = np.array(post)
post_probs.append(
post) # posterior probabilities for each class for all combinations of classifier labels
final_labels.append(post.argmax()) # final label for each combination of classifier labels
fused_label = []
ymodels_t = np.transpose(np.array(ymodels))
for i in range(len(ymodels_t)):
for c in range(len(classifier_labels)):
if np.all(ymodels_t[i] == classifier_labels[c]):
fused_label.append(final_labels[c])
fused_label = np.array(fused_label)
return fused_label
def BKS(ytest, ymodels):
# All possible combinations of classifier labels
classifier_labels = np.array(list(product([0, 1], repeat=len(ymodels))))
true0 = np.transpose(np.array(ymodels))[
np.where(ytest == 0)] # ymodel results where true class is 0
true1 = np.transpose(np.array(ymodels))[
np.where(ytest == 1)] # ymodel results where true class is 1
n0 = []
n1 = []
final_labels = []
for c in range(len(classifier_labels)): # Count number of samples from each TRUE CLASS
# that are labeled with each combination
num0 = 0
num1 = 0
for i in range(len(true0)):
if np.all(true0[i] == classifier_labels[c]):
num0 += 1
n0.append(num0)
for i in range(len(true1)):
if np.all(true1[i] == classifier_labels[c]):
num1 += 1
n1.append(num1)
if num1 > num0:
final_labels.append(1)
elif num0 > num1:
final_labels.append(0)
elif num0 == num1:
final_labels.append(random.choice([0, 1]))
fused_label = []
ymodels_t = np.transpose(np.array(ymodels))
for i in range(len(ymodels_t)):
for c in range(len(classifier_labels)):
if np.all(ymodels_t[i] == classifier_labels[c]):
fused_label.append(final_labels[c])
return fused_label