-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear.py
107 lines (81 loc) · 3.6 KB
/
linear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from module import Module
from torch import Tensor
import math
class Linear(Module):
def __init__(self, in_dim, out_dim, weight_init='uniform_pos_neg', bias_init='zero'):
""" in_di, and out_dim are the respective input and output dimensions sizes. The weight
initialisation can be chosen via weight_init (see init_weights() for examples). The
same holds for bias_init for the bias (see init_weights() for examples).
"""
super(Linear, self).__init__()
self.in_dim = in_dim
self.out_dim = out_dim
self.weight_init = weight_init
self.bias_init = bias_init
self.weight_grad = None
self.bias_grad = None
self.weight = None
self.bias = None
def init_weights(self, nb_samples):
""" Initialises the weights and the bias of the Layer. It requires the number of samples
to be passes through the network, nb_samples.
The following weight initialisations are available for both weight and bias:
uniform_pos_neg: initialises the weights uniformly at random between [-1, 1]
uniform_non_neg: initialises the weights uniformly at random between [0, 1]
pytorch_default: initialises the weights uniformly at random between [-std, std]
where std = 1 / sqrt(self.in_dim)
For the weight initialisation we additionally have:
ones: initialises the weights to 1
For the weight initialisation we additionally have:
zero: initialises the weights to 0
"""
if self.weight_init is None:
self.weight = Tensor(self.out_dim, self.in_dim)
elif self.weight_init == 'ones':
ones_list = [1] * (self.in_dim*self.out_dim)
self.weight = Tensor(ones_list).view(self.out_dim, self.in_dim)
elif self.weight_init == 'uniform_pos_neg':
self.weight = Tensor(self.out_dim, self.in_dim).uniform_(-1, 1)
elif self.weight_init == 'uniform_non_neg':
self.weight = Tensor(self.out_dim, self.in_dim).uniform_(0, 1)
elif self.weight_init == 'pytorch_default':
stdv = 1. / math.sqrt(self.in_dim)
self.weight = Tensor(self.out_dim, self.in_dim).uniform_(-stdv, stdv)
self.weight_grad = Tensor(self.out_dim, self.in_dim)
if self.bias_init is None:
self.bias = Tensor(self.out_dim, 1)
elif self.bias_init == 'zero':
zeros_list = [0] * (self.out_dim * 1)
self.bias = Tensor(zeros_list).view(self.out_dim, 1)
elif self.bias_init == 'uniform_pos_neg':
self.bias = Tensor(self.out_dim, 1).uniform_(-1, 1)
elif self.bias_init == 'uniform_non_neg':
self.bias = Tensor(self.out_dim, 1).uniform_(0, 1)
elif self.bias_init == 'pytorch_default':
stdv = 1. / math.sqrt(self.in_dim)
self.bias = Tensor(self.out_dim, 1).uniform_(-stdv, stdv)
self.bias_grad = Tensor(self.out_dim, 1)
def forward(self , input):
"""Return the output of the layer if ``input`` is input."""
self.input = input
return self.weight @ input + self.bias.repeat(1, input.shape[1])
def backward(self , gradwrtoutput):
self.bias_grad += gradwrtoutput.sum(dim=1,keepdim=True)
nb_samples = gradwrtoutput.shape[1]
for i in range(nb_samples):
self.weight_grad += gradwrtoutput.narrow(1, i, 1) @ self.input.narrow(1, i, 1).t()
return self.weight.t() @ gradwrtoutput
def param(self):
return [self.weight, self.bias]
def param_grad(self):
return [self.weight_grad, self.bias_grad]
def set_param_grad(self, param_grad):
self.weight_grad = param_grad[0]
self.bias_grad = param_grad[1]
def set_zero_grad(self):
self.weight_grad.zero_()
self.bias_grad.zero_()
def update_param(self, lr, wd=0):
""" Performs the gradient step once the forwards and backward passes have been done. """
self.weight = self.weight * (1 - lr*wd) - lr*self.weight_grad
self.bias = self.bias - lr*self.bias_grad