@@ -155,6 +155,7 @@ def do_train(args):
155155
156156 batch_id = 0
157157 batch_start = time .time ()
158+ interval_word_num = 0.0
158159 for input_data in train_loader ():
159160 if args .max_iter and step_idx == args .max_iter : #NOTE: used for benchmark
160161 return
@@ -163,6 +164,7 @@ def do_train(args):
163164 (src_word , src_pos , src_slf_attn_bias , trg_word , trg_pos ,
164165 trg_slf_attn_bias , trg_src_attn_bias , lbl_word ,
165166 lbl_weight ) = input_data
167+
166168 logits = transformer (src_word , src_pos , src_slf_attn_bias ,
167169 trg_word , trg_pos , trg_slf_attn_bias ,
168170 trg_src_attn_bias )
@@ -180,6 +182,7 @@ def do_train(args):
180182 optimizer .minimize (avg_cost )
181183 transformer .clear_gradients ()
182184
185+ interval_word_num += np .prod (src_word .shape )
183186 if step_idx % args .print_step == 0 :
184187 total_avg_cost = avg_cost .numpy () * trainer_count
185188
@@ -193,14 +196,18 @@ def do_train(args):
193196 else :
194197 train_avg_batch_cost = args .print_step / (
195198 time .time () - batch_start )
199+ word_speed = interval_word_num / (
200+ time .time () - batch_start )
196201 logger .info (
197202 "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
198- "normalized loss: %f, ppl: %f, avg_speed: %.2f step/s"
199- % (step_idx , pass_id , batch_id , total_avg_cost ,
200- total_avg_cost - loss_normalizer ,
201- np .exp ([min (total_avg_cost , 100 )]),
202- train_avg_batch_cost ))
203+ "normalized loss: %f, ppl: %f, avg_speed: %.2f step/s, "
204+ "words speed: %0.2f works/s" %
205+ (step_idx , pass_id , batch_id , total_avg_cost ,
206+ total_avg_cost - loss_normalizer ,
207+ np .exp ([min (total_avg_cost , 100 )]),
208+ train_avg_batch_cost , word_speed ))
203209 batch_start = time .time ()
210+ interval_word_num = 0.0
204211
205212 if step_idx % args .save_step == 0 and step_idx != 0 :
206213 # validation
0 commit comments