-
Notifications
You must be signed in to change notification settings - Fork 1
/
naivebayes.py
66 lines (58 loc) · 1.69 KB
/
naivebayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
P(y|x) \propto P(x|y)P(y)
With Laplace smoothing
Assumes binary features and feature vectors composed entirely of 1s and 0s.
Will update this to configure continuous features later.
"""
def PY(x,y):
y = np.concatenate([y, [-1,1]])
n = len(y)
s = np.sum(y)
pos = ((n + s)/2.0 + 1.0)/(n + 2)
neg = ((n - s)/2.0 + 1.0)/(n + 2)
return pos, neg
def PXY(x,y):
n, d = x.shape
x = np.concatenate([x, np.ones((2,d))])
y = np.concatenate([y, [-1,1]])
n, d = x.shape
pos = x[np.where(y > 0)[0]]
neg = x[np.where(y < 0)[0]]
posN = np.sum(pos)
negN = np.sum(neg)
posSum = np.sum(pos, 0)
negSum = np.sum(neg, 0)
posprob = np.apply_along_axis(lambda i: i/posN, 0, posSum)
negprob = np.apply_along_axis(lambda i: i/negN, 0, negSum)
return posprob, negprob
def logratio(x,y,xtest):
"""
log (P(Y = 1|X=xtest)/P(Y=-1|X=xtest))
"""
PYpos, PYneg = PY(x, y)
PXYpos, PXYneg = PXY(x, y)
featInd = np.where(xtest == 1)[0]
posProd = np.prod(PXYpos[featInd], 0)
negProd = np.prod(PXYneg[featInd], 0)
log = np.log(PYpos*posProd) - np.log(PYneg*negProd)
return log
def classifier(x,y):
n, d = x.shape
PYpos, PYneg = PY(x, y)
PXYpos, PXYneg = PXY(x, y)
w = np.log(PXYpos) - np.log(PXYneg)
b = np.log(PYpos) - np.log(PYneg)
return w, b
def pred(x,w,b=0):
"""
Returns predictions for the test data.
"""
w = w.reshape(-1)
if b != 0:
x = np.column_stack((x, np.ones(x.shape[0])))
w = np.append(w, [b])
if x.ndim == 1:
preds = np.sign(np.dot(w, x))
else:
preds = np.apply_along_axis(lambda xi: np.sign(np.dot(w, x)), 1, x)
return preds