-
Notifications
You must be signed in to change notification settings - Fork 13
/
learnedactivations.py
121 lines (101 loc) · 4.65 KB
/
learnedactivations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
import theano
import theano.tensor as T
from theano import ifelse
from lasagne import init
from lasagne import nonlinearities
from lasagne import layers
__all__ = [
"BatchNormalizationLayer"
]
class BatchNormalizationLayer(layers.base.Layer):
"""
Batch normalization Layer [1]
The user is required to setup updates for the learned parameters (Gamma
and Beta). The values nessesary for creating the updates can be
obtained by passing a dict as the moving_avg_hooks keyword to
get_output().
REF:
[1] http://arxiv.org/abs/1502.03167
:parameters:
- input_layer : `Layer` instance
The layer from which this layer will obtain its input
- nonlinearity : callable or None (default: lasagne.nonlinearities.rectify)
The nonlinearity that is applied to the layer activations. If None
is provided, the layer will be linear.
- epsilon : scalar float. Stabilizing training. Setting this too
close to zero will result in nans.
:usage:
>>> from lasagne.layers import InputLayer, BatchNormalizationLayer,
DenseLayer
>>> from lasagne.nonlinearities import linear, rectify
>>> l_in = InputLayer((100, 20))
l_dense = Denselayer(l_in, 50, nonlinearity=linear)
>>> l_bn = BatchNormalizationLayer(l_dense, nonlinearity=rectify)
>>> hooks, input, updates = {}, T.matrix, []
>>> l_out = l_bn.get_output(
input, deterministic=False, moving_avg_hooks=hooks)
>>> mulfac = 1.0/100.0
>>> batchnormparams = list(itertools.chain(
*[i[1] for i in hooks['BatchNormalizationLayer:movingavg']]))
>>> batchnormvalues = list(itertools.chain(
*[i[0] for i in hooks['BatchNormalizationLayer:movingavg']]))
>>> for tensor, param in zip(tensors, params):
updates.append((param, (1.0-mulfac)*param + mulfac*tensor))
# append updates to your normal update list
"""
def __init__(self, incoming,
gamma = init.Uniform([0.95, 1.05]),
beta = init.Constant(0.),
nonlinearity=nonlinearities.rectify,
epsilon = 0.001,
**kwargs):
super(BatchNormalizationLayer, self).__init__(incoming, **kwargs)
if nonlinearity is None:
self.nonlinearity = nonlinearities.identity
else:
self.nonlinearity = nonlinearity
self.num_units = int(np.prod(self.input_shape[1:]))
self.gamma = self.add_param(gamma, (self.num_units,),
name="BatchNormalizationLayer:gamma",trainable=True)
self.beta = self.add_param(beta, (self.num_units,),
name="BatchNormalizationLayer:beta",trainable=True)
self.epsilon = epsilon
self.mean_inference = theano.shared(
np.zeros((1, self.num_units), dtype=theano.config.floatX),
borrow=True,
broadcastable=(True, False))
self.mean_inference.name = "shared:mean-" + self.name ####
self.variance_inference = theano.shared(
np.zeros((1, self.num_units), dtype=theano.config.floatX),
borrow=True,
broadcastable=(True, False))
self.variance_inference.name = "shared:variance-" + self.name ####
def get_output_shape_for(self, input_shape):
return input_shape
def get_output_for(self, input, moving_avg_hooks=None,
deterministic=False, *args, **kwargs):
reshape = False
if input.ndim > 2:
output_shape = input.shape
reshape = True
input = input.flatten(2)
if deterministic is False:
m = T.mean(input, axis=0, keepdims=True)
v = T.sqrt(T.var(input, axis=0, keepdims=True)+self.epsilon)
m.name = "tensor:mean-" + self.name
v.name = "tensor:variance-" + self.name
key = "BatchNormalizationLayer:movingavg"
if key not in moving_avg_hooks:
# moving_avg_hooks[key] = {}
moving_avg_hooks[key] = []
# moving_avg_hooks[key][self.name] = [[m,v], [self.mean_inference, self.variance_inference]]
moving_avg_hooks[key].append([[m,v], [self.mean_inference, self.variance_inference]])
else:
m = self.mean_inference
v = self.variance_inference
input_hat = (input - m) / v # normalize
y = self.gamma*input_hat + self.beta # scale and shift
if reshape:#input.ndim > 2:
y = T.reshape(y, output_shape)
return self.nonlinearity(y)