-
Notifications
You must be signed in to change notification settings - Fork 17
/
aux.py
84 lines (66 loc) · 3.07 KB
/
aux.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#This file contains auxiliary functions
import numpy as np
import tensorflow as tf
#Returns an orthogonal matrix of the given shape
def orthogonal(shape):
flat_shape = (shape[0], np.prod(shape[1:]))
a = np.random.normal(0.0, 1.0, flat_shape)
u, _, v = np.linalg.svd(a, full_matrices=False)
q = u if u.shape == flat_shape else v
return q.reshape(shape)
#Returns an initializer that outputs an orthogonal matrix
def orthogonal_initializer(scale=1.0):
def _initializer(shape, dtype=tf.float32, partition_info=None):
return tf.constant(orthogonal(shape) * scale, dtype)
return _initializer
def layer_norm_all(h, base, num_units, scope):
# Layer Norm (faster version)
#
# Performs layer norm on multiple base at once (ie, i, g, j, o for lstm)
#
# Reshapes h in to perform layer norm in parallel
with tf.variable_scope(scope):
h_reshape = tf.reshape(h, [-1, base, num_units])
mean = tf.reduce_mean(h_reshape, [2], keep_dims=True)
var = tf.reduce_mean(tf.square(h_reshape - mean), [2], keep_dims=True)
epsilon = tf.constant(1e-3)
rstd = tf.rsqrt(var + epsilon)
h_reshape = (h_reshape - mean) * rstd
# reshape back to original
h = tf.reshape(h_reshape, [-1, base * num_units])
alpha = tf.get_variable('layer_norm_alpha', [4 * num_units],
initializer=tf.constant_initializer(1.0), dtype=tf.float32)
bias = tf.get_variable('layer_norm_bias', [4 * num_units],
initializer=tf.constant_initializer(0.0), dtype=tf.float32)
return (h * alpha) + bias
def moments_for_layer_norm(x, axes=1, name=None):
# output for mean and variance should be [batch_size]
# from https://github.com/LeavesBreathe/tensorflow_with_latest_papers
epsilon = 1e-3 # found this works best.
if not isinstance(axes, list): axes = [axes]
mean = tf.reduce_mean(x, axes, keep_dims=True)
variance = tf.sqrt(tf.reduce_mean(tf.square(x - mean), axes, keep_dims=True) + epsilon)
return mean, variance
def layer_norm(x, scope="layer_norm", alpha_start=1.0, bias_start=0.0):
# derived from:
# https://github.com/LeavesBreathe/tensorflow_with_latest_papers, but simplified.
with tf.variable_scope(scope):
num_units = x.get_shape().as_list()[1]
alpha = tf.get_variable('alpha', [num_units],
initializer=tf.constant_initializer(alpha_start), dtype=tf.float32)
bias = tf.get_variable('bias', [num_units],
initializer=tf.constant_initializer(bias_start), dtype=tf.float32)
mean, variance = moments_for_layer_norm(x)
y = (alpha * (x - mean)) / (variance) + bias
return y
def zoneout(new_h, new_c, h, c, h_keep, c_keep, is_training):
mask_c = tf.ones_like(c)
mask_h = tf.ones_like(h)
if is_training:
mask_c = tf.nn.dropout(mask_c, c_keep)
mask_h = tf.nn.dropout(mask_h, h_keep)
mask_c *= c_keep
mask_h *= h_keep
h = new_h * mask_h + (-mask_h + 1.) * h
c = new_c * mask_c + (-mask_c + 1.) * c
return h, c