-
Notifications
You must be signed in to change notification settings - Fork 0
/
neural_network.py
145 lines (114 loc) · 3.71 KB
/
neural_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import numpy as np
import matplotlib.pyplot as plt
from random import randint
#Define sigmoid and sigmoid derivative functions
def sigmoid(x, deriv=False):
return 1 / (1 + np.exp(-x))
def dSigmoid(x):
return sigmoid(x) * (1 - sigmoid(x))
#Sizes of input and output
def layer_sizes(X,Y):
n_x = X.shape[1]
n_y = Y.shape[1]
return (n_x, n_y)
#Initialize weights for hidden layer and output layer
def init_params(n_x, n_h, n_y):
return {
"W1" : np.random.randn(n_x, n_h) * 0.01,
"b1" : np.zeros(n_h),
"W2" : np.random.randn(n_h, n_y) * 0.01,
"b2" : np.zeros(n_y)
}
#Calculate accuracy of the neural network
def accuracy(prediction, test):
predicted_oneHot = np.argmax(prediction, axis=1)
test_oneHot = np.argmax(test.to_numpy(), axis=1)
equals = np.equal(predicted_oneHot, test_oneHot)
return np.mean(equals)*100
#Forward propagate
def f_propagate(X, params):
#First layer
Z1 = np.dot(X, params['W1']) + params['b1']
A1 = sigmoid(Z1)
#Second layer
Z2 = np.dot(A1, params['W2']) + params['b2']
A2 = sigmoid(Z2)
cache = {
'Z1' : Z1,
'A1' : A1,
'Z2' : Z2,
'A2' : A2
}
return A2, cache
#Compute cost
def compute_cost(A2, Y):
logprobs = np.multiply(np.log(A2), Y) + np.multiply((1 - Y), np.log(1 - A2))
cost = - np.sum(logprobs) / len(logprobs)
return np.mean(cost)
#Adaptive learning rate
def adaptive_learning_rate(adaptive, l_rate, cost, prev_cost):
if(prev_cost == None): return adaptive['InitialRate']
else:
if(cost > adaptive['ErrorRatio']*prev_cost): return l_rate*adaptive['DecrementVar']
elif(cost < prev_cost): return l_rate*adaptive['IncrementVar']
else: return l_rate
#Back propagate
def b_propagate(params, cache, X, Y):
#Second layer
dZ2 = cache['A2'] - Y.to_numpy()
dW2 = np.dot(cache['A1'].T, dZ2)
db2 = dZ2
#First layer
dZ1 = np.dot(dZ2, params['W2'].T) * dSigmoid(cache['Z1'])
dW1 = np.dot(X.T, dZ1)
db1 = dZ1
return {
'dW2' : dW2,
'db2' : db2,
'dW1' : dW1,
'db1' : db1
}
#Update weights
def update_params(params, grads, l_rate):
W1 = params['W1'] - l_rate * grads['dW1']
b1 = params['b1'] - l_rate * grads['db1'].sum(axis=0)
# .sum(axis=0)
W2 = params['W2'] - l_rate * grads['dW2']
b2 = params['b2'] - l_rate * grads['db2'].sum(axis=0)
return {
'W1' : W1,
'b1' : b1,
'W2' : W2,
'b2' : b2
}
def train_nn(X, Y, n_h, n_epochs, l_rate, showCost = False):
np.random.seed(randint(1,100))
#Get sizes of input and output layers
n_x, n_y = layer_sizes(X, Y)
#Initialize weights
params = init_params(n_x, n_h, n_y)
#Initialize adaptive learning params
if(type(l_rate) == dict):
adaptive_rate = True
l_rate_params = l_rate
l_rate = l_rate_params['InitialRate']
prev_cost = None
else: adaptive_rate = False
for i in range(0, n_epochs):
#Forward propagate
A2, cache = f_propagate(X, params)
#Compute difference between our predictions and actual values
cost = compute_cost(A2, Y)
#Backpropagation
grads = b_propagate(params, cache, X, Y)
#Update learning rate
if(adaptive_rate == True):
l_rate = adaptive_learning_rate(l_rate_params, l_rate, cost, prev_cost)
prev_cost = cost
#Update weights
params = update_params(params, grads, l_rate)
#Print cost each 10 epochs
if((showCost == True) & (i % 10 == 0)):
avgCost = cost.mean()
print('Cost after iteration %i: %f' %(i, avgCost))
return params