2
2
"""
3
3
@author: tanma
4
4
"""
5
- import tensorflow as tf
6
- from tensorflow import keras
7
- from tensorflow .keras .models import Model
8
- from tensorflow .keras .layers import Input , LSTM , Dense , Embedding , CuDNNLSTM , Flatten , TimeDistributed , Dropout , LSTMCell , RNN
9
- from tensorflow .keras .callbacks import ModelCheckpoint
10
- from tensorflow .python .keras .utils import tf_utils
11
- from tensorflow .keras import backend as K
12
-
13
5
import unicodedata
14
6
import re
15
7
import numpy as np
16
- import os
17
- import time
18
- import shutil
8
+ import tensorflow .compat .v1 as tf
9
+
10
+ from tensorflow .python .keras .utils import tf_utils
11
+
12
+
13
+ tf .disable_v2_behavior ()
19
14
20
- path_to_file = 'C:\\ Users\\ tanma.TANMAY-STATION\\ Downloads/deu.txt'
21
15
path = 'deu.txt'
22
16
23
17
class LanguageIndex ():
@@ -59,10 +53,10 @@ def create_dataset(path, num_examples):
59
53
60
54
def load_dataset (path , num_examples ):
61
55
pairs = create_dataset (path , num_examples )
62
- out_lang = LanguageIndex (hin for en , hin in pairs )
63
- in_lang = LanguageIndex (en for en , hin in pairs )
64
- input_data = [[in_lang .word2idx [s ] for s in en .split (' ' )] for en , sp in pairs ]
65
- output_data = [[out_lang .word2idx [s ] for s in sp .split (' ' )] for en , sp in pairs ]
56
+ out_lang = LanguageIndex (deu for en , deu , _ in pairs )
57
+ in_lang = LanguageIndex (en for en , deu , _ in pairs )
58
+ input_data = [[in_lang .word2idx [s ] for s in en .split (' ' )] for en , deu , _ in pairs ]
59
+ output_data = [[out_lang .word2idx [s ] for s in deu .split (' ' )] for en , deu , _ in pairs ]
66
60
67
61
max_length_in , max_length_out = max_length (input_data ), max_length (output_data )
68
62
input_data = tf .keras .preprocessing .sequence .pad_sequences (input_data , maxlen = max_length_in , padding = "post" )
@@ -89,22 +83,22 @@ def load_dataset(path, num_examples):
89
83
vocab_in_size = len (input_lang .word2idx )
90
84
vocab_out_size = len (target_lang .word2idx )
91
85
92
- class AttentionLSTMCell (LSTMCell ):
86
+ class AttentionLSTMCell (tf . keras . layers . LSTMCell ):
93
87
def __init__ (self , ** kwargs ):
94
88
self .attentionMode = False
95
89
super (AttentionLSTMCell , self ).__init__ (** kwargs )
96
90
97
91
@tf_utils .shape_type_conversion
98
92
def build (self , input_shape ):
99
93
100
- self .dense_constant = TimeDistributed (Dense (self .units , name = "AttLstmInternal_DenseConstant" ))
94
+ self .dense_constant = tf . keras . layers . TimeDistributed (tf . keras . layers . Dense (self .units , name = "AttLstmInternal_DenseConstant" ))
101
95
102
- self .dense_state = Dense (self .units , name = "AttLstmInternal_DenseState" )
96
+ self .dense_state = tf . keras . layers . Dense (self .units , name = "AttLstmInternal_DenseState" )
103
97
104
- self .dense_transform = Dense (1 , name = "AttLstmInternal_DenseTransform" )
98
+ self .dense_transform = tf . keras . layers . Dense (1 , name = "AttLstmInternal_DenseTransform" )
105
99
106
- batch , input_dim = input_shape [0 ]
107
- batch , timesteps , context_size = input_shape [- 1 ]
100
+ batch , input_dim = input_shape [: 2 ]
101
+ batch , timesteps , context_size = input_shape [0 ], input_shape [ 1 ], input_shape [ 2 ]
108
102
lstm_input = (batch , input_dim + context_size )
109
103
110
104
return super (AttentionLSTMCell , self ).build (lstm_input )
@@ -120,24 +114,24 @@ def setAttentionMode(self, mode_on=False):
120
114
def call (self , inputs , states , constants ):
121
115
ytm , stm = states
122
116
123
- stm_repeated = K .repeat (self .dense_state (stm ), self .timesteps )
117
+ stm_repeated = tf . keras . backend .repeat (self .dense_state (stm ), self .timesteps )
124
118
125
119
combined_stm_input = self .dense_transform (
126
- keras .activations .relu (stm_repeated + self .input_seq_shaped ))
127
- score_vector = keras .activations .softmax (combined_stm_input , 1 )
120
+ tf . keras .activations .relu (stm_repeated + self .input_seq_shaped ))
121
+ score_vector = tf . keras .activations .softmax (combined_stm_input , 1 )
128
122
129
- context_vector = K .sum (score_vector * self .input_seq , 1 )
123
+ context_vector = tf . keras . backend .sum (score_vector * self .input_seq , 1 )
130
124
131
- inputs = K .concatenate ([inputs , context_vector ])
125
+ inputs = tf . keras . backend .concatenate ([inputs , context_vector ])
132
126
133
127
res = super (AttentionLSTMCell , self ).call (inputs = inputs , states = states )
134
128
135
129
if (self .attentionMode ):
136
- return (K .reshape (score_vector , (- 1 , self .timesteps )), res [1 ])
130
+ return (tf . keras . backend .reshape (score_vector , (- 1 , self .timesteps )), res [1 ])
137
131
else :
138
132
return res
139
133
140
- class LSTMWithAttention (RNN ):
134
+ class LSTMWithAttention (tf . keras . layers . RNN ):
141
135
def __init__ (self , units , ** kwargs ):
142
136
cell = AttentionLSTMCell (units = units )
143
137
self .units = units
@@ -161,24 +155,25 @@ def call(self, x, constants, **kwargs):
161
155
return super (LSTMWithAttention , self ).call (inputs = x , constants = constants , ** kwargs )
162
156
163
157
164
- attenc_inputs = Input (shape = (len_input ,), name = "attenc_inputs" )
165
- attenc_emb = Embedding (input_dim = vocab_in_size , output_dim = embedding_dim )
166
- attenc_lstm = CuDNNLSTM (units = units , return_sequences = True , return_state = True )
158
+ attenc_inputs = tf .keras .layers .Input (shape = (len_input ,), name = "attenc_inputs" )
159
+ attenc_emb = tf .keras .layers .Embedding (input_dim = vocab_in_size , output_dim = embedding_dim )
160
+ attenc_lstm = tf .keras .layers .LSTM (units = 128 , activation = 'tanh' , recurrent_activation = 'sigmoid' , recurrent_dropout = 0 , unroll = False , use_bias = True , return_sequences = True , return_state = True )
161
+ # For CuDNN implementation
167
162
attenc_outputs , attstate_h , attstate_c = attenc_lstm (attenc_emb (attenc_inputs ))
168
163
attenc_states = [attstate_h , attstate_c ]
169
164
170
- attdec_inputs = Input (shape = (None ,))
171
- attdec_emb = Embedding (input_dim = vocab_out_size , output_dim = embedding_dim )
165
+ attdec_inputs = tf . keras . layers . Input (shape = (None ,))
166
+ attdec_emb = tf . keras . layers . Embedding (input_dim = vocab_out_size , output_dim = embedding_dim )
172
167
attdec_lstm = LSTMWithAttention (units = units , return_sequences = True , return_state = True )
173
168
174
169
attdec_lstm_out , _ , _ = attdec_lstm (inputs = attdec_emb (attdec_inputs ),
175
170
constants = attenc_outputs ,
176
171
initial_state = attenc_states )
177
- attdec_d1 = Dense (units , activation = "relu" )
178
- attdec_d2 = Dense (vocab_out_size , activation = "softmax" )
179
- attdec_out = attdec_d2 (Dropout (rate = .4 )(attdec_d1 (Dropout (rate = .4 )(attdec_lstm_out ))))
172
+ attdec_d1 = tf . keras . layers . Dense (units , activation = "relu" )
173
+ attdec_d2 = tf . keras . layers . Dense (vocab_out_size , activation = "softmax" )
174
+ attdec_out = attdec_d2 (tf . keras . layers . Dropout (rate = .4 )(attdec_d1 (tf . keras . layers . Dropout (rate = .4 )(attdec_lstm_out ))))
180
175
181
- attmodel = Model ([attenc_inputs , attdec_inputs ], attdec_out )
176
+ attmodel = tf . keras . models . Model ([attenc_inputs , attdec_inputs ], attdec_out )
182
177
attmodel .compile (optimizer = tf .train .AdamOptimizer (), loss = "sparse_categorical_crossentropy" , metrics = ['sparse_categorical_accuracy' ])
183
178
184
179
epochs = 20
@@ -226,22 +221,22 @@ def translate(input_sentence, infenc_model, infmodel, attention=False):
226
221
return output_sentence
227
222
228
223
def createAttentionInference (attention_mode = False ):
229
- attencoder_model = Model (attenc_inputs , [attenc_outputs , attstate_h , attstate_c ])
230
- state_input_h = Input (shape = (units ,), name = "state_input_h" )
231
- state_input_c = Input (shape = (units ,), name = "state_input_c" )
232
- attenc_seq_out = Input (shape = attenc_outputs .get_shape ()[1 :], name = "attenc_seq_out" )
233
- inf_attdec_inputs = Input (shape = (None ,), name = "inf_attdec_inputs" )
224
+ attencoder_model = tf . keras . models . Model (attenc_inputs , [attenc_outputs , attstate_h , attstate_c ])
225
+ state_input_h = tf . keras . layers . Input (shape = (units ,), name = "state_input_h" )
226
+ state_input_c = tf . keras . layers . Input (shape = (units ,), name = "state_input_c" )
227
+ attenc_seq_out = tf . keras . layers . Input (shape = attenc_outputs .get_shape ()[1 :], name = "attenc_seq_out" )
228
+ inf_attdec_inputs = tf . keras . layers . Input (shape = (None ,), name = "inf_attdec_inputs" )
234
229
attdec_lstm .cell .setAttentionMode (attention_mode )
235
230
attdec_res , attdec_h , attdec_c = attdec_lstm (attdec_emb (inf_attdec_inputs ),
236
231
initial_state = [state_input_h , state_input_c ],
237
232
constants = attenc_seq_out )
238
233
attinf_model = None
239
234
if not attention_mode :
240
235
inf_attdec_out = attdec_d2 (attdec_d1 (attdec_res ))
241
- attinf_model = Model (inputs = [inf_attdec_inputs , state_input_h , state_input_c , attenc_seq_out ],
236
+ attinf_model = tf . keras . models . Model (inputs = [inf_attdec_inputs , state_input_h , state_input_c , attenc_seq_out ],
242
237
outputs = [inf_attdec_out , attdec_h , attdec_c ])
243
238
else :
244
- attinf_model = Model (inputs = [inf_attdec_inputs , state_input_h , state_input_c , attenc_seq_out ],
239
+ attinf_model = tf . keras . models . Model (inputs = [inf_attdec_inputs , state_input_h , state_input_c , attenc_seq_out ],
245
240
outputs = [attdec_res , attdec_h , attdec_c ])
246
241
return attencoder_model , attinf_model
247
242
0 commit comments