-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathback_propagation.py
211 lines (153 loc) · 5.49 KB
/
back_propagation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import numpy as np
class Unit:
def __init__(self, *parents):
self.parents = list(parents)
for i, parent in enumerate(parents):
parent.add_child(self, i)
self.children = []
def add_parent(self, parent):
self.parents.append(parent)
parent.add_child(self, len(self.parents) - 1)
def get_parents(self):
return self.parents
def add_child(self, child, index):
# A child is a pair of child unit and index of self unit within parents of child unit
self.children.append((child, index))
def get_children(self):
return self.children
def __add__(self, other):
return Add(self, other)
def __mul__(self, other):
return Multiply(self, other)
def __sub__(self, other):
return Subtract(self, other)
def __pow__(self, power, modulo=None):
return Power(self, power)
def __matmul__(self, other):
return MatrixMultiply(self, other)
class Variable(Unit):
def __init__(self, value=None, name=None):
self.value = value
self.name = name
super().__init__()
def evaluate(self):
return self.value
def set_value(self, value):
self.value = value
def __add__(self, other):
return super().__add__(other)
def __mul__(self, other):
return super().__mul__(other)
ZERO = Variable(0)
ONE = Variable(1)
class Add(Unit):
def evaluate(self):
return sum(parent.evaluate() for parent in self.parents)
def get_gradient(self, index):
return ONE
class Subtract(Unit):
def evaluate(self):
return self.parents[0].evaluate() - sum(parent.evaluate() for parent in self.parents[1:])
def get_gradient(self, index):
if index == 0:
return ONE
else:
return Variable(-1.0)
class Multiply(Unit):
def evaluate(self):
result = 1.0
for parent in self.parents:
result *= parent.evaluate()
return result
def get_gradient(self, index):
if len(self.parents) == 2:
return self.parents[abs(index - 1)]
else:
return Multiply(*(self.parents[i] for i in range(len(self.parents)) if i != index))
class Power(Unit):
def __init__(self, target, power):
# TODO: support case when power is another unit
self.power = power
super().__init__(target)
def evaluate(self):
return self.parents[0].evaluate() ** self.power
def get_gradient(self, index):
if self.power == 2:
return Variable(2) * self.parents[0]
return Variable(self.power) * Power(self.parents[0], self.power - 1)
class Sum(Unit):
def evaluate(self):
return sum(np.sum(parent.evaluate()) for parent in self.parents)
def get_gradient(self, index):
return ONE
class MatrixMultiply(Unit):
def evaluate(self):
if type(self.parents[0].evaluate()) in (int, float) or type(self.parents[1].evaluate()) in (int, float):
return self.parents[0].evaluate() * self.parents[1].evaluate()
return self.parents[0].evaluate() @ self.parents[1].evaluate()
def get_gradient(self, index):
# TODO: support Jacobian for matrix-matrix multiplication
if index == 0:
return self.parents[1]
else:
return Transpose(self.parents[0])
class Transpose(Unit):
def evaluate(self):
result = self.parents[0].evaluate()
if type(result) in (int, float):
return result
else:
return result.T
def get_gradient(self, index):
return ONE
class Repeat(Unit):
def __init__(self, parent, repeat):
super().__init__()
self.repeat = repeat
for i in range(repeat):
self.add_parent(parent)
def evaluate(self):
return np.array([parent.evaluate() for parent in self.parents])
def get_gradient(self, index):
result = np.zeros(len(self.parents))
result[index] = 1
return Variable(result)
class Relu(Unit):
def evaluate(self):
A = np.expand_dims(self.parents[0].evaluate(), 0)
return np.max(np.concatenate([A, np.zeros(A.shape)]), 0)
def get_gradient(self, index):
return ReluGradient(self.parents[index])
class ReluGradient(Unit):
def evaluate(self):
parent_value = self.parents[0].evaluate()
result = np.sign(parent_value) / 2.0 + 0.5
if type(parent_value) in (int, float):
return result
if len(parent_value.shape) == 1:
return np.diag(result)
elif len(parent_value.shape) == 2:
return np.diagflat(parent_value).reshape(parent_value.shape * 2)
def get_gradient(self, index):
return ZERO
def differentiate(target, variable):
if variable is target:
return ONE
gradient = ZERO
for child, index in variable.get_children():
local_gradient = child.get_gradient(index)
child_gradient = differentiate(target, child)
if child_gradient is ONE:
current_gradient = local_gradient
elif local_gradient is ONE:
current_gradient = child_gradient
elif local_gradient is ZERO or child_gradient is ZERO:
current_gradient = ZERO
else:
current_gradient = local_gradient @ child_gradient
if current_gradient is not ZERO:
if gradient is ZERO:
gradient = current_gradient
else:
gradient += current_gradient
return gradient