Skip to content

Commit 63269a3

Browse files
committed
api call updated to tf2
1 parent 1b6e419 commit 63269a3

File tree

1 file changed

+40
-45
lines changed

1 file changed

+40
-45
lines changed

seq2seq_attn_updated.py

Lines changed: 40 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,16 @@
22
"""
33
@author: tanma
44
"""
5-
import tensorflow as tf
6-
from tensorflow import keras
7-
from tensorflow.keras.models import Model
8-
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, CuDNNLSTM, Flatten, TimeDistributed, Dropout, LSTMCell, RNN
9-
from tensorflow.keras.callbacks import ModelCheckpoint
10-
from tensorflow.python.keras.utils import tf_utils
11-
from tensorflow.keras import backend as K
12-
135
import unicodedata
146
import re
157
import numpy as np
16-
import os
17-
import time
18-
import shutil
8+
import tensorflow.compat.v1 as tf
9+
10+
from tensorflow.python.keras.utils import tf_utils
11+
12+
13+
tf.disable_v2_behavior()
1914

20-
path_to_file = 'C:\\Users\\tanma.TANMAY-STATION\\Downloads/deu.txt'
2115
path = 'deu.txt'
2216

2317
class LanguageIndex():
@@ -59,10 +53,10 @@ def create_dataset(path, num_examples):
5953

6054
def load_dataset(path, num_examples):
6155
pairs = create_dataset(path, num_examples)
62-
out_lang = LanguageIndex(hin for en, hin in pairs)
63-
in_lang = LanguageIndex(en for en, hin in pairs)
64-
input_data = [[in_lang.word2idx[s] for s in en.split(' ')] for en, sp in pairs]
65-
output_data = [[out_lang.word2idx[s] for s in sp.split(' ')] for en, sp in pairs]
56+
out_lang = LanguageIndex(deu for en, deu, _ in pairs)
57+
in_lang = LanguageIndex(en for en, deu, _ in pairs)
58+
input_data = [[in_lang.word2idx[s] for s in en.split(' ')] for en, deu, _ in pairs]
59+
output_data = [[out_lang.word2idx[s] for s in deu.split(' ')] for en, deu, _ in pairs]
6660

6761
max_length_in, max_length_out = max_length(input_data), max_length(output_data)
6862
input_data = tf.keras.preprocessing.sequence.pad_sequences(input_data, maxlen=max_length_in, padding="post")
@@ -89,22 +83,22 @@ def load_dataset(path, num_examples):
8983
vocab_in_size = len(input_lang.word2idx)
9084
vocab_out_size = len(target_lang.word2idx)
9185

92-
class AttentionLSTMCell(LSTMCell):
86+
class AttentionLSTMCell(tf.keras.layers.LSTMCell):
9387
def __init__(self, **kwargs):
9488
self.attentionMode = False
9589
super(AttentionLSTMCell, self).__init__(**kwargs)
9690

9791
@tf_utils.shape_type_conversion
9892
def build(self, input_shape):
9993

100-
self.dense_constant = TimeDistributed(Dense(self.units, name="AttLstmInternal_DenseConstant"))
94+
self.dense_constant = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(self.units, name="AttLstmInternal_DenseConstant"))
10195

102-
self.dense_state = Dense(self.units, name="AttLstmInternal_DenseState")
96+
self.dense_state = tf.keras.layers.Dense(self.units, name="AttLstmInternal_DenseState")
10397

104-
self.dense_transform = Dense(1, name="AttLstmInternal_DenseTransform")
98+
self.dense_transform = tf.keras.layers.Dense(1, name="AttLstmInternal_DenseTransform")
10599

106-
batch, input_dim = input_shape[0]
107-
batch, timesteps, context_size = input_shape[-1]
100+
batch, input_dim = input_shape[:2]
101+
batch, timesteps, context_size = input_shape[0], input_shape[1], input_shape[2]
108102
lstm_input = (batch, input_dim + context_size)
109103

110104
return super(AttentionLSTMCell, self).build(lstm_input)
@@ -120,24 +114,24 @@ def setAttentionMode(self, mode_on=False):
120114
def call(self, inputs, states, constants):
121115
ytm, stm = states
122116

123-
stm_repeated = K.repeat(self.dense_state(stm), self.timesteps)
117+
stm_repeated = tf.keras.backend.repeat(self.dense_state(stm), self.timesteps)
124118

125119
combined_stm_input = self.dense_transform(
126-
keras.activations.relu(stm_repeated + self.input_seq_shaped))
127-
score_vector = keras.activations.softmax(combined_stm_input, 1)
120+
tf.keras.activations.relu(stm_repeated + self.input_seq_shaped))
121+
score_vector = tf.keras.activations.softmax(combined_stm_input, 1)
128122

129-
context_vector = K.sum(score_vector * self.input_seq, 1)
123+
context_vector = tf.keras.backend.sum(score_vector * self.input_seq, 1)
130124

131-
inputs = K.concatenate([inputs, context_vector])
125+
inputs = tf.keras.backend.concatenate([inputs, context_vector])
132126

133127
res = super(AttentionLSTMCell, self).call(inputs=inputs, states=states)
134128

135129
if(self.attentionMode):
136-
return (K.reshape(score_vector, (-1, self.timesteps)), res[1])
130+
return (tf.keras.backend.reshape(score_vector, (-1, self.timesteps)), res[1])
137131
else:
138132
return res
139133

140-
class LSTMWithAttention(RNN):
134+
class LSTMWithAttention(tf.keras.layers.RNN):
141135
def __init__(self, units, **kwargs):
142136
cell = AttentionLSTMCell(units=units)
143137
self.units = units
@@ -161,24 +155,25 @@ def call(self, x, constants, **kwargs):
161155
return super(LSTMWithAttention, self).call(inputs=x, constants=constants, **kwargs)
162156

163157

164-
attenc_inputs = Input(shape=(len_input,), name="attenc_inputs")
165-
attenc_emb = Embedding(input_dim=vocab_in_size, output_dim=embedding_dim)
166-
attenc_lstm = CuDNNLSTM(units=units, return_sequences=True, return_state=True)
158+
attenc_inputs = tf.keras.layers.Input(shape=(len_input,), name="attenc_inputs")
159+
attenc_emb = tf.keras.layers.Embedding(input_dim=vocab_in_size, output_dim=embedding_dim)
160+
attenc_lstm = tf.keras.layers.LSTM(units = 128, activation = 'tanh', recurrent_activation = 'sigmoid', recurrent_dropout = 0 , unroll = False, use_bias = True, return_sequences = True, return_state = True)
161+
# For CuDNN implementation
167162
attenc_outputs, attstate_h, attstate_c = attenc_lstm(attenc_emb(attenc_inputs))
168163
attenc_states = [attstate_h, attstate_c]
169164

170-
attdec_inputs = Input(shape=(None,))
171-
attdec_emb = Embedding(input_dim=vocab_out_size, output_dim=embedding_dim)
165+
attdec_inputs = tf.keras.layers.Input(shape=(None,))
166+
attdec_emb = tf.keras.layers.Embedding(input_dim=vocab_out_size, output_dim=embedding_dim)
172167
attdec_lstm = LSTMWithAttention(units=units, return_sequences=True, return_state=True)
173168

174169
attdec_lstm_out, _, _ = attdec_lstm(inputs=attdec_emb(attdec_inputs),
175170
constants=attenc_outputs,
176171
initial_state=attenc_states)
177-
attdec_d1 = Dense(units, activation="relu")
178-
attdec_d2 = Dense(vocab_out_size, activation="softmax")
179-
attdec_out = attdec_d2(Dropout(rate=.4)(attdec_d1(Dropout(rate=.4)(attdec_lstm_out))))
172+
attdec_d1 = tf.keras.layers.Dense(units, activation="relu")
173+
attdec_d2 = tf.keras.layers.Dense(vocab_out_size, activation="softmax")
174+
attdec_out = attdec_d2(tf.keras.layers.Dropout(rate=.4)(attdec_d1(tf.keras.layers.Dropout(rate=.4)(attdec_lstm_out))))
180175

181-
attmodel = Model([attenc_inputs, attdec_inputs], attdec_out)
176+
attmodel = tf.keras.models.Model([attenc_inputs, attdec_inputs], attdec_out)
182177
attmodel.compile(optimizer=tf.train.AdamOptimizer(), loss="sparse_categorical_crossentropy", metrics=['sparse_categorical_accuracy'])
183178

184179
epochs = 20
@@ -226,22 +221,22 @@ def translate(input_sentence, infenc_model, infmodel, attention=False):
226221
return output_sentence
227222

228223
def createAttentionInference(attention_mode=False):
229-
attencoder_model = Model(attenc_inputs, [attenc_outputs, attstate_h, attstate_c])
230-
state_input_h = Input(shape=(units,), name="state_input_h")
231-
state_input_c = Input(shape=(units,), name="state_input_c")
232-
attenc_seq_out = Input(shape=attenc_outputs.get_shape()[1:], name="attenc_seq_out")
233-
inf_attdec_inputs = Input(shape=(None,), name="inf_attdec_inputs")
224+
attencoder_model = tf.keras.models.Model(attenc_inputs, [attenc_outputs, attstate_h, attstate_c])
225+
state_input_h = tf.keras.layers.Input(shape=(units,), name="state_input_h")
226+
state_input_c = tf.keras.layers.Input(shape=(units,), name="state_input_c")
227+
attenc_seq_out = tf.keras.layers.Input(shape=attenc_outputs.get_shape()[1:], name="attenc_seq_out")
228+
inf_attdec_inputs = tf.keras.layers.Input(shape=(None,), name="inf_attdec_inputs")
234229
attdec_lstm.cell.setAttentionMode(attention_mode)
235230
attdec_res, attdec_h, attdec_c = attdec_lstm(attdec_emb(inf_attdec_inputs),
236231
initial_state=[state_input_h, state_input_c],
237232
constants=attenc_seq_out)
238233
attinf_model = None
239234
if not attention_mode:
240235
inf_attdec_out = attdec_d2(attdec_d1(attdec_res))
241-
attinf_model = Model(inputs=[inf_attdec_inputs, state_input_h, state_input_c, attenc_seq_out],
236+
attinf_model = tf.keras.models.Model(inputs=[inf_attdec_inputs, state_input_h, state_input_c, attenc_seq_out],
242237
outputs=[inf_attdec_out, attdec_h, attdec_c])
243238
else:
244-
attinf_model = Model(inputs=[inf_attdec_inputs, state_input_h, state_input_c, attenc_seq_out],
239+
attinf_model = tf.keras.models.Model(inputs=[inf_attdec_inputs, state_input_h, state_input_c, attenc_seq_out],
245240
outputs=[attdec_res, attdec_h, attdec_c])
246241
return attencoder_model, attinf_model
247242

0 commit comments

Comments
 (0)