-
Notifications
You must be signed in to change notification settings - Fork 129
/
Copy pathind_rnn_cell.py
154 lines (124 loc) · 5.77 KB
/
ind_rnn_cell.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""Module implementing the IndRNN cell"""
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.layers import base as base_layer
try:
# TF 1.7+
from tensorflow.python.ops.rnn_cell_impl import LayerRNNCell
except ImportError:
from tensorflow.python.ops.rnn_cell_impl import _LayerRNNCell as LayerRNNCell
class IndRNNCell(LayerRNNCell):
"""Independently RNN Cell. Adapted from `rnn_cell_impl.BasicRNNCell`.
Each unit has a single recurrent weight connected to its last hidden state.
The implementation is based on:
https://arxiv.org/abs/1803.04831
Shuai Li, Wanqing Li, Chris Cook, Ce Zhu, Yanbo Gao
"Independently Recurrent Neural Network (IndRNN): Building A Longer and
Deeper RNN"
The default initialization values for recurrent weights, input weights and
biases are taken from:
https://arxiv.org/abs/1504.00941
Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton
"A Simple Way to Initialize Recurrent Networks of Rectified Linear Units"
Args:
num_units: int, The number of units in the RNN cell.
recurrent_min_abs: float, minimum absolute value of each recurrent weight.
recurrent_max_abs: (optional) float, maximum absolute value of each
recurrent weight. For `relu` activation, `pow(2, 1/timesteps)` is
recommended. If None, recurrent weights will not be clipped.
Default: None.
recurrent_kernel_initializer: (optional) The initializer to use for the
recurrent weights. If None, every recurrent weight is initially set to 1.
Default: None.
input_kernel_initializer: (optional) The initializer to use for the input
weights. If None, the input weights are initialized from a random normal
distribution with `mean=0` and `stddev=0.001`. Default: None.
activation: Nonlinearity to use. Default: `relu`.
reuse: (optional) Python boolean describing whether to reuse variables
in an existing scope. If not `True`, and the existing scope already has
the given variables, an error is raised.
name: String, the name of the layer. Layers with the same name will
share weights, but to avoid mistakes we require reuse=True in such
cases.
"""
def __init__(self,
num_units,
recurrent_min_abs=0,
recurrent_max_abs=None,
recurrent_kernel_initializer=None,
input_kernel_initializer=None,
activation=None,
reuse=None,
name=None):
super(IndRNNCell, self).__init__(_reuse=reuse, name=name)
# Inputs must be 2-dimensional.
self.input_spec = base_layer.InputSpec(ndim=2)
self._num_units = num_units
self._recurrent_min_abs = recurrent_min_abs
self._recurrent_max_abs = recurrent_max_abs
self._recurrent_initializer = recurrent_kernel_initializer
self._input_initializer = input_kernel_initializer
self._activation = activation or nn_ops.relu
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def build(self, inputs_shape):
if inputs_shape[1].value is None:
raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
% inputs_shape)
input_depth = inputs_shape[1].value
if self._input_initializer is None:
self._input_initializer = init_ops.random_normal_initializer(mean=0.0,
stddev=0.001)
self._input_kernel = self.add_variable(
"input_kernel",
shape=[input_depth, self._num_units],
initializer=self._input_initializer)
if self._recurrent_initializer is None:
self._recurrent_initializer = init_ops.constant_initializer(1.)
self._recurrent_kernel = self.add_variable(
"recurrent_kernel",
shape=[self._num_units],
initializer=self._recurrent_initializer)
# Clip the absolute values of the recurrent weights to the specified minimum
if self._recurrent_min_abs:
abs_kernel = math_ops.abs(self._recurrent_kernel)
min_abs_kernel = math_ops.maximum(abs_kernel, self._recurrent_min_abs)
self._recurrent_kernel = math_ops.multiply(
math_ops.sign(self._recurrent_kernel),
min_abs_kernel
)
# Clip the absolute values of the recurrent weights to the specified maximum
if self._recurrent_max_abs:
self._recurrent_kernel = clip_ops.clip_by_value(self._recurrent_kernel,
-self._recurrent_max_abs,
self._recurrent_max_abs)
self._bias = self.add_variable(
"bias",
shape=[self._num_units],
initializer=init_ops.zeros_initializer(dtype=self.dtype))
self.built = True
def call(self, inputs, state):
"""Run one time step of the IndRNN.
Calculates the output and new hidden state using the IndRNN equation
`output = new_state = act(W * input + u (*) state + b)`
where `*` is the matrix multiplication and `(*)` is the Hadamard product.
Args:
inputs: Tensor, 2-D tensor of shape `[batch, num_units]`.
state: Tensor, 2-D tensor of shape `[batch, num_units]` containing the
previous hidden state.
Returns:
A tuple containing the output and new hidden state. Both are the same
2-D tensor of shape `[batch, num_units]`.
"""
gate_inputs = math_ops.matmul(inputs, self._input_kernel)
recurrent_update = math_ops.multiply(state, self._recurrent_kernel)
gate_inputs = math_ops.add(gate_inputs, recurrent_update)
gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
output = self._activation(gate_inputs)
return output, output