diff --git a/tensorlayer/__init__.py b/tensorlayer/__init__.py
index 0a45da2ec..bc80b663a 100644
--- a/tensorlayer/__init__.py
+++ b/tensorlayer/__init__.py
@@ -3,7 +3,6 @@
 """
 from __future__ import absolute_import
 
-
 try:
     install_instr = "Please make sure you install a recent enough version of TensorFlow."
     import tensorflow
diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py
index 23dc26221..0db26f1e8 100644
--- a/tensorlayer/activation.py
+++ b/tensorlayer/activation.py
@@ -18,9 +18,11 @@ def identity(x, name=None):
     """
     return x
 
+
 # Shortcut
 linear = identity
 
+
 def ramp(x=None, v_min=0, v_max=1, name=None):
     """The ramp activation function.
 
@@ -41,6 +43,7 @@ def ramp(x=None, v_min=0, v_max=1, name=None):
     """
     return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name)
 
+
 def leaky_relu(x=None, alpha=0.1, name="lrelu"):
     """The LeakyReLU, Shortcut is ``lrelu``.
 
@@ -65,12 +68,13 @@ def leaky_relu(x=None, alpha=0.1, name="lrelu"):
     - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) <http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf>`_
     """
     # with tf.name_scope(name) as scope:
-        # x = tf.nn.relu(x)
-        # m_x = tf.nn.relu(-x)
-        # x -= alpha * m_x
+    # x = tf.nn.relu(x)
+    # m_x = tf.nn.relu(-x)
+    # x -= alpha * m_x
     x = tf.maximum(x, alpha * x, name=name)
     return x
 
+
 #Shortcut
 lrelu = leaky_relu
 
@@ -88,9 +92,10 @@ def swish(x, name='swish'):
     A `Tensor` with the same type as `x`.
     """
     with tf.name_scope(name) as scope:
-        x =  tf.nn.sigmoid(x) * x
+        x = tf.nn.sigmoid(x) * x
     return x
 
+
 def pixel_wise_softmax(output, name='pixel_wise_softmax'):
     """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1.
     Usually be used for image segmentation.
diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py
index 8f3ded80a..296ce4942 100644
--- a/tensorlayer/cost.py
+++ b/tensorlayer/cost.py
@@ -8,9 +8,9 @@
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import standard_ops
 
-
 ## Cost Functions
 
+
 def cross_entropy(output, target, name=None):
     """It is a softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy of two distributions, implement
     softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``.
@@ -39,6 +39,7 @@ def cross_entropy(output, target, name=None):
     assert name is not None, "Please give a unique name to tl.cost.cross_entropy for TF1.0+"
     return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output, name=name))
 
+
 def sigmoid_cross_entropy(output, target, name=None):
     """It is a sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``.
     """
@@ -68,13 +69,12 @@ def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'):
     -----------
     - `DRAW <https://github.com/ericjang/draw/blob/master/draw.py#L73>`_
     """
-#     from tensorflow.python.framework import ops
-#     with ops.op_scope([output, target], name, "bce_loss") as name:
-#         output = ops.convert_to_tensor(output, name="preds")
-#         target = ops.convert_to_tensor(targets, name="target")
+    #     from tensorflow.python.framework import ops
+    #     with ops.op_scope([output, target], name, "bce_loss") as name:
+    #         output = ops.convert_to_tensor(output, name="preds")
+    #         target = ops.convert_to_tensor(targets, name="target")
     with tf.name_scope(name):
-        return tf.reduce_mean(tf.reduce_sum(-(target * tf.log(output + epsilon) +
-                              (1. - target) * tf.log(1. - output + epsilon)), axis=1))
+        return tf.reduce_mean(tf.reduce_sum(-(target * tf.log(output + epsilon) + (1. - target) * tf.log(1. - output + epsilon)), axis=1))
 
 
 def mean_squared_error(output, target, is_mean=False, name="mean_squared_error"):
@@ -91,17 +91,17 @@ def mean_squared_error(output, target, is_mean=False, name="mean_squared_error")
     - `Wiki Mean Squared Error <https://en.wikipedia.org/wiki/Mean_squared_error>`_
     """
     with tf.name_scope(name):
-        if output.get_shape().ndims == 2:   # [batch_size, n_feature]
+        if output.get_shape().ndims == 2:  # [batch_size, n_feature]
             if is_mean:
                 mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), 1))
             else:
                 mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), 1))
-        elif output.get_shape().ndims == 3: # [batch_size, w, h]
+        elif output.get_shape().ndims == 3:  # [batch_size, w, h]
             if is_mean:
                 mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2]))
             else:
                 mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), [1, 2]))
-        elif output.get_shape().ndims == 4: # [batch_size, w, h, c]
+        elif output.get_shape().ndims == 4:  # [batch_size, w, h, c]
             if is_mean:
                 mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2, 3]))
             else:
@@ -110,6 +110,7 @@ def mean_squared_error(output, target, is_mean=False, name="mean_squared_error")
             raise Exception("Unknow dimension")
         return mse
 
+
 def normalized_mean_square_error(output, target):
     """Return the TensorFlow expression of normalized mean-square-error of two distributions.
 
@@ -119,18 +120,19 @@ def normalized_mean_square_error(output, target):
     target : 2D, 3D or 4D tensor.
     """
     with tf.name_scope("mean_squared_error_loss"):
-        if output.get_shape().ndims == 2:   # [batch_size, n_feature]
+        if output.get_shape().ndims == 2:  # [batch_size, n_feature]
             nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=1))
             nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=1))
-        elif output.get_shape().ndims == 3:   # [batch_size, w, h]
-            nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1,2]))
-            nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1,2]))
-        elif output.get_shape().ndims == 4: # [batch_size, w, h, c]
-            nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1,2,3]))
-            nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1,2,3]))
+        elif output.get_shape().ndims == 3:  # [batch_size, w, h]
+            nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1, 2]))
+            nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1, 2]))
+        elif output.get_shape().ndims == 4:  # [batch_size, w, h, c]
+            nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1, 2, 3]))
+            nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1, 2, 3]))
         nmse = tf.reduce_mean(nmse_a / nmse_b)
     return nmse
 
+
 def absolute_difference_error(output, target, is_mean=False):
     """ Return the TensorFlow expression of absolute difference error (L1) of two batch of data.
 
@@ -141,17 +143,17 @@ def absolute_difference_error(output, target, is_mean=False):
     is_mean : boolean, if True, use ``tf.reduce_mean`` to compute the loss of one data, otherwise, use ``tf.reduce_sum`` (default).
     """
     with tf.name_scope("mean_squared_error_loss"):
-        if output.get_shape().ndims == 2:   # [batch_size, n_feature]
+        if output.get_shape().ndims == 2:  # [batch_size, n_feature]
             if is_mean:
                 loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), 1))
             else:
                 loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), 1))
-        elif output.get_shape().ndims == 3: # [batch_size, w, h]
+        elif output.get_shape().ndims == 3:  # [batch_size, w, h]
             if is_mean:
                 loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), [1, 2]))
             else:
                 loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), [1, 2]))
-        elif output.get_shape().ndims == 4: # [batch_size, w, h, c]
+        elif output.get_shape().ndims == 4:  # [batch_size, w, h, c]
             if is_mean:
                 loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), [1, 2, 3]))
             else:
@@ -161,7 +163,7 @@ def absolute_difference_error(output, target, is_mean=False):
         return loss
 
 
-def dice_coe(output, target, loss_type='jaccard', axis=[1,2,3], smooth=1e-5):
+def dice_coe(output, target, loss_type='jaccard', axis=[1, 2, 3], smooth=1e-5):
     """Soft dice (Sørensen or Jaccard) coefficient for comparing the similarity
     of two batch of data, usually be used for binary image segmentation
     i.e. labels are binary. The coefficient between 0 to 1, 1 means totally match.
@@ -212,7 +214,7 @@ def dice_coe(output, target, loss_type='jaccard', axis=[1,2,3], smooth=1e-5):
     return dice
 
 
-def dice_hard_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5):
+def dice_hard_coe(output, target, threshold=0.5, axis=[1, 2, 3], smooth=1e-5):
     """Non-differentiable Sørensen–Dice coefficient for comparing the similarity
     of two batch of data, usually be used for binary image segmentation i.e. labels are binary.
     The coefficient between 0 to 1, 1 if totally match.
@@ -250,7 +252,7 @@ def dice_hard_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5):
     return hard_dice
 
 
-def iou_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5):
+def iou_coe(output, target, threshold=0.5, axis=[1, 2, 3], smooth=1e-5):
     """Non-differentiable Intersection over Union (IoU) for comparing the
     similarity of two batch of data, usually be used for evaluating binary image segmentation.
     The coefficient between 0 to 1, 1 means totally match.
@@ -274,15 +276,16 @@ def iou_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5):
     """
     pre = tf.cast(output > threshold, dtype=tf.float32)
     truth = tf.cast(target > threshold, dtype=tf.float32)
-    inse = tf.reduce_sum(tf.multiply(pre, truth), axis=axis) # AND
-    union = tf.reduce_sum(tf.cast(tf.add(pre, truth)>= 1, dtype=tf.float32), axis=axis) # OR
+    inse = tf.reduce_sum(tf.multiply(pre, truth), axis=axis)  # AND
+    union = tf.reduce_sum(tf.cast(tf.add(pre, truth) >= 1, dtype=tf.float32), axis=axis)  # OR
     ## old axis=[0,1,2,3]
     # epsilon = 1e-5
     # batch_iou = inse / (union + epsilon)
     ## new haodong
     batch_iou = (inse + smooth) / (union + smooth)
     iou = tf.reduce_mean(batch_iou)
-    return iou#, pre, truth, inse, union
+    return iou  #, pre, truth, inse, union
+
 
 # ## test soft/hard dice and iou
 # import numpy as np
@@ -318,7 +321,7 @@ def iou_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5):
 # exit()
 
 
-def cross_entropy_seq(logits, target_seqs, batch_size=None):#, batch_size=1, num_steps=None):
+def cross_entropy_seq(logits, target_seqs, batch_size=None):  #, batch_size=1, num_steps=None):
     """Returns the expression of cross-entropy of two sequences, implement
     softmax internally. Normally be used for Fixed Length RNN outputs.
 
@@ -343,12 +346,9 @@ def cross_entropy_seq(logits, target_seqs, batch_size=None):#, batch_size=1, num
     # except:
     #     sequence_loss_by_example_fn = tf.nn.seq2seq.sequence_loss_by_example
 
-    loss = sequence_loss_by_example_fn(
-        [logits],
-        [tf.reshape(target_seqs, [-1])],
-        [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)])
-        # [tf.ones([batch_size * num_steps])])
-    cost = tf.reduce_sum(loss) #/ batch_size
+    loss = sequence_loss_by_example_fn([logits], [tf.reshape(target_seqs, [-1])], [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)])
+    # [tf.ones([batch_size * num_steps])])
+    cost = tf.reduce_sum(loss)  #/ batch_size
     if batch_size is not None:
         cost = cost / batch_size
     return cost
@@ -374,15 +374,16 @@ def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=
     --------
     - see Image Captioning Example.
     """
-    targets = tf.reshape(target_seqs, [-1])   # to one vector
-    weights = tf.to_float(tf.reshape(input_mask, [-1]))   # to one vector like targets
+    targets = tf.reshape(target_seqs, [-1])  # to one vector
+    weights = tf.to_float(tf.reshape(input_mask, [-1]))  # to one vector like targets
     losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name) * weights
     #losses = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name)) # for TF1.0 and others
 
     # try: ## TF1.0
-    loss = tf.divide(tf.reduce_sum(losses),   # loss from mask. reduce_sum before element-wise mul with mask !!
-                    tf.reduce_sum(weights),
-                    name="seq_loss_with_mask")
+    loss = tf.divide(
+        tf.reduce_sum(losses),  # loss from mask. reduce_sum before element-wise mul with mask !!
+        tf.reduce_sum(weights),
+        name="seq_loss_with_mask")
     # except: ## TF0.12
     #     loss = tf.div(tf.reduce_sum(losses),   # loss from mask. reduce_sum before element-wise mul with mask !!
     #                     tf.reduce_sum(weights),
@@ -413,7 +414,7 @@ def cosine_similarity(v1, v2):
 
 ## Regularization Functions
 def li_regularizer(scale, scope=None):
-  """li regularization removes the neurons of previous layer, `i` represents `inputs`.\n
+    """li regularization removes the neurons of previous layer, `i` represents `inputs`.\n
   Returns a function that can be used to apply group li regularization to weights.\n
   The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
 
@@ -431,44 +432,37 @@ def li_regularizer(scale, scope=None):
   ------
   ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float.
   """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-  # from tensorflow.python.platform import tf_logging as logging
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    if scale >= 1.:
-      raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-                       scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def li(weights, name=None):
-    """Applies li regularization to weights."""
-    with tf.name_scope('li_regularizer') as scope:
-        my_scale = ops.convert_to_tensor(scale,
-                                           dtype=weights.dtype.base_dtype,
-                                           name='scale')
-        # if tf.__version__ <= '0.12':
-        #     standard_ops_fn = standard_ops.mul
-        # else:
-        standard_ops_fn = standard_ops.multiply
-        return standard_ops_fn(
-          my_scale,
-          standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))),
-          name=scope)
-  return li
-
+    import numbers
+    from tensorflow.python.framework import ops
+    from tensorflow.python.ops import standard_ops
+    # from tensorflow.python.platform import tf_logging as logging
+
+    if isinstance(scale, numbers.Integral):
+        raise ValueError('scale cannot be an integer: %s' % scale)
+    if isinstance(scale, numbers.Real):
+        if scale < 0.:
+            raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale)
+        if scale >= 1.:
+            raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % scale)
+        if scale == 0.:
+            logging.info('Scale of 0 disables regularizer.')
+            return lambda _, name=None: None
+
+    def li(weights, name=None):
+        """Applies li regularization to weights."""
+        with tf.name_scope('li_regularizer') as scope:
+            my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale')
+            # if tf.__version__ <= '0.12':
+            #     standard_ops_fn = standard_ops.mul
+            # else:
+            standard_ops_fn = standard_ops.multiply
+            return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))), name=scope)
+
+    return li
 
 
 def lo_regularizer(scale, scope=None):
-  """lo regularization removes the neurons of current layer, `o` represents `outputs`\n
+    """lo regularization removes the neurons of current layer, `o` represents `outputs`\n
   Returns a function that can be used to apply group lo regularization to weights.\n
   The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
 
@@ -486,42 +480,37 @@ def lo_regularizer(scale, scope=None):
   ------
   ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
   """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-  # from tensorflow.python.platform import tf_logging as logging
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    if scale >= 1.:
-      raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-                       scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def lo(weights, name='lo_regularizer'):
-    """Applies group column regularization to weights."""
-    with tf.name_scope(name) as scope:
-        my_scale = ops.convert_to_tensor(scale,
-                                       dtype=weights.dtype.base_dtype,
-                                       name='scale')
-        # if tf.__version__ <= '0.12':
-        #     standard_ops_fn = standard_ops.mul
-        # else:
-        standard_ops_fn = standard_ops.multiply
-        return standard_ops_fn(
-          my_scale,
-          standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))),
-          name=scope)
-  return lo
+    import numbers
+    from tensorflow.python.framework import ops
+    from tensorflow.python.ops import standard_ops
+    # from tensorflow.python.platform import tf_logging as logging
+
+    if isinstance(scale, numbers.Integral):
+        raise ValueError('scale cannot be an integer: %s' % scale)
+    if isinstance(scale, numbers.Real):
+        if scale < 0.:
+            raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale)
+        if scale >= 1.:
+            raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % scale)
+        if scale == 0.:
+            logging.info('Scale of 0 disables regularizer.')
+            return lambda _, name=None: None
+
+    def lo(weights, name='lo_regularizer'):
+        """Applies group column regularization to weights."""
+        with tf.name_scope(name) as scope:
+            my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale')
+            # if tf.__version__ <= '0.12':
+            #     standard_ops_fn = standard_ops.mul
+            # else:
+            standard_ops_fn = standard_ops.multiply
+            return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))), name=scope)
+
+    return lo
+
 
 def maxnorm_regularizer(scale=1.0, scope=None):
-  """Max-norm regularization returns a function that can be used
+    """Max-norm regularization returns a function that can be used
   to apply max-norm regularization to weights.
   About max-norm: `wiki <https://en.wikipedia.org/wiki/Matrix_norm#Max_norm>`_.\n
   The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
@@ -540,38 +529,37 @@ def maxnorm_regularizer(scale=1.0, scope=None):
   --------
   ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
   """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    # if scale >= 1.:
-    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-    #                    scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def mn(weights, name='max_regularizer'):
-    """Applies max-norm regularization to weights."""
-    with tf.name_scope(name) as scope:
-          my_scale = ops.convert_to_tensor(scale,
-                                           dtype=weights.dtype.base_dtype,
-                                           name='scale')
-        #   if tf.__version__ <= '0.12':
-        #       standard_ops_fn = standard_ops.mul
-        #   else:
-          standard_ops_fn = standard_ops.multiply
-          return standard_ops_fn(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope)
-  return mn
+    import numbers
+    from tensorflow.python.framework import ops
+    from tensorflow.python.ops import standard_ops
+
+    if isinstance(scale, numbers.Integral):
+        raise ValueError('scale cannot be an integer: %s' % scale)
+    if isinstance(scale, numbers.Real):
+        if scale < 0.:
+            raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale)
+        # if scale >= 1.:
+        #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+        #                    scale)
+        if scale == 0.:
+            logging.info('Scale of 0 disables regularizer.')
+            return lambda _, name=None: None
+
+    def mn(weights, name='max_regularizer'):
+        """Applies max-norm regularization to weights."""
+        with tf.name_scope(name) as scope:
+            my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale')
+            #   if tf.__version__ <= '0.12':
+            #       standard_ops_fn = standard_ops.mul
+            #   else:
+            standard_ops_fn = standard_ops.multiply
+            return standard_ops_fn(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope)
+
+    return mn
+
 
 def maxnorm_o_regularizer(scale, scope):
-  """Max-norm output regularization removes the neurons of current layer.\n
+    """Max-norm output regularization removes the neurons of current layer.\n
   Returns a function that can be used to apply max-norm regularization to each column of weight matrix.\n
   The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
 
@@ -589,38 +577,37 @@ def maxnorm_o_regularizer(scale, scope):
   ---------
   ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
   """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    # if scale >= 1.:
-    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-    #                    scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def mn_o(weights, name='maxnorm_o_regularizer'):
-     """Applies max-norm regularization to weights."""
-     with tf.name_scope(name) as scope:
-          my_scale = ops.convert_to_tensor(scale,
-                                           dtype=weights.dtype.base_dtype,
-                                                   name='scale')
-          if tf.__version__ <= '0.12':
-             standard_ops_fn = standard_ops.mul
-          else:
-             standard_ops_fn = standard_ops.multiply
-          return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope)
-  return mn_o
+    import numbers
+    from tensorflow.python.framework import ops
+    from tensorflow.python.ops import standard_ops
+
+    if isinstance(scale, numbers.Integral):
+        raise ValueError('scale cannot be an integer: %s' % scale)
+    if isinstance(scale, numbers.Real):
+        if scale < 0.:
+            raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale)
+        # if scale >= 1.:
+        #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+        #                    scale)
+        if scale == 0.:
+            logging.info('Scale of 0 disables regularizer.')
+            return lambda _, name=None: None
+
+    def mn_o(weights, name='maxnorm_o_regularizer'):
+        """Applies max-norm regularization to weights."""
+        with tf.name_scope(name) as scope:
+            my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale')
+            if tf.__version__ <= '0.12':
+                standard_ops_fn = standard_ops.mul
+            else:
+                standard_ops_fn = standard_ops.multiply
+            return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope)
+
+    return mn_o
+
 
 def maxnorm_i_regularizer(scale, scope=None):
-  """Max-norm input regularization removes the neurons of previous layer.\n
+    """Max-norm input regularization removes the neurons of previous layer.\n
   Returns a function that can be used to apply max-norm regularization to each row of weight matrix.\n
   The implementation follows `TensorFlow contrib <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/layers/python/layers/regularizers.py>`_.
 
@@ -638,38 +625,33 @@ def maxnorm_i_regularizer(scale, scope=None):
   ---------
   ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float.
   """
-  import numbers
-  from tensorflow.python.framework import ops
-  from tensorflow.python.ops import standard_ops
-
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.:
-      raise ValueError('Setting a scale less than 0 on a regularizer: %g' %
-                       scale)
-    # if scale >= 1.:
-    #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
-    #                    scale)
-    if scale == 0.:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _, name=None: None
-
-  def mn_i(weights, name='maxnorm_i_regularizer'):
-     """Applies max-norm regularization to weights."""
-     with tf.name_scope(name) as scope:
-          my_scale = ops.convert_to_tensor(scale,
-                                           dtype=weights.dtype.base_dtype,
-                                                   name='scale')
-          if tf.__version__ <= '0.12':
-             standard_ops_fn = standard_ops.mul
-          else:
-             standard_ops_fn = standard_ops.multiply
-          return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope)
-  return mn_i
-
-
+    import numbers
+    from tensorflow.python.framework import ops
+    from tensorflow.python.ops import standard_ops
+
+    if isinstance(scale, numbers.Integral):
+        raise ValueError('scale cannot be an integer: %s' % scale)
+    if isinstance(scale, numbers.Real):
+        if scale < 0.:
+            raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale)
+        # if scale >= 1.:
+        #   raise ValueError('Setting a scale greater than 1 on a regularizer: %g' %
+        #                    scale)
+        if scale == 0.:
+            logging.info('Scale of 0 disables regularizer.')
+            return lambda _, name=None: None
+
+    def mn_i(weights, name='maxnorm_i_regularizer'):
+        """Applies max-norm regularization to weights."""
+        with tf.name_scope(name) as scope:
+            my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale')
+            if tf.__version__ <= '0.12':
+                standard_ops_fn = standard_ops.mul
+            else:
+                standard_ops_fn = standard_ops.multiply
+            return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope)
 
+    return mn_i
 
 
 #
diff --git a/tensorlayer/db.py b/tensorlayer/db.py
index 6a432e6ee..b5bb524b3 100644
--- a/tensorlayer/db.py
+++ b/tensorlayer/db.py
@@ -6,7 +6,6 @@
 Latest Version
 """
 
-
 import inspect
 import math
 import pickle
@@ -23,15 +22,12 @@
 
 
 def AutoFill(func):
-    def func_wrapper(self,*args,**kwargs):
-        d=inspect.getcallargs(func,self,*args,**kwargs)
-        d['args'].update({"studyID":self.studyID})
-        return  func(**d)
-    return func_wrapper
-
-
-
+    def func_wrapper(self, *args, **kwargs):
+        d = inspect.getcallargs(func, self, *args, **kwargs)
+        d['args'].update({"studyID": self.studyID})
+        return func(**d)
 
+    return func_wrapper
 
 
 class TensorDB(object):
@@ -68,32 +64,24 @@ class TensorDB(object):
     1 : You may like to install MongoChef or Mongo Management Studo APP for
        visualizing or testing your MongoDB.
     """
-    def __init__(
-        self,
-        ip = 'localhost',
-        port = 27017,
-        db_name = 'db_name',
-        user_name = None,
-        password = 'password',
-        studyID=None
-    ):
+
+    def __init__(self, ip='localhost', port=27017, db_name='db_name', user_name=None, password='password', studyID=None):
         ## connect mongodb
         client = MongoClient(ip, port)
         self.db = client[db_name]
         if user_name != None:
             self.db.authenticate(user_name, password)
 
-
         if studyID is None:
-            self.studyID=str(uuid.uuid1())
+            self.studyID = str(uuid.uuid1())
         else:
-            self.studyID=studyID
+            self.studyID = studyID
 
         ## define file system (Buckets)
         self.datafs = gridfs.GridFS(self.db, collection="datafs")
         self.modelfs = gridfs.GridFS(self.db, collection="modelfs")
         self.paramsfs = gridfs.GridFS(self.db, collection="paramsfs")
-        self.archfs=gridfs.GridFS(self.db,collection="ModelArchitecture")
+        self.archfs = gridfs.GridFS(self.db, collection="ModelArchitecture")
         ##
         print("[TensorDB] Connect SUCCESS {}:{} {} {} {}".format(ip, port, db_name, user_name, studyID))
 
@@ -102,16 +90,16 @@ def __init__(
         self.db_name = db_name
         self.user_name = user_name
 
-    def __autofill(self,args):
-        return args.update({'studyID':self.studyID})
+    def __autofill(self, args):
+        return args.update({'studyID': self.studyID})
 
-    def __serialization(self,ps):
+    def __serialization(self, ps):
         return pickle.dumps(ps, protocol=2)
 
-    def __deserialization(self,ps):
+    def __deserialization(self, ps):
         return pickle.loads(ps)
 
-    def save_params(self, params=[], args={}):#, file_name='parameters'):
+    def save_params(self, params=[], args={}):  #, file_name='parameters'):
         """ Save parameters into MongoDB Buckets, and save the file ID into Params Collections.
 
         Parameters
@@ -125,15 +113,15 @@ def save_params(self, params=[], args={}):#, file_name='parameters'):
         """
         self.__autofill(args)
         s = time.time()
-        f_id = self.paramsfs.put(self.__serialization(params))#, file_name=file_name)
+        f_id = self.paramsfs.put(self.__serialization(params))  #, file_name=file_name)
         args.update({'f_id': f_id, 'time': datetime.utcnow()})
         self.db.Params.insert_one(args)
         # print("[TensorDB] Save params: {} SUCCESS, took: {}s".format(file_name, round(time.time()-s, 2)))
-        print("[TensorDB] Save params: SUCCESS, took: {}s".format(round(time.time()-s, 2)))
+        print("[TensorDB] Save params: SUCCESS, took: {}s".format(round(time.time() - s, 2)))
         return f_id
 
     @AutoFill
-    def find_one_params(self, args={},sort=None):
+    def find_one_params(self, args={}, sort=None):
         """ Find one parameter from MongoDB Buckets.
 
         Parameters
@@ -148,7 +136,7 @@ def find_one_params(self, args={},sort=None):
 
         s = time.time()
         # print(args)
-        d = self.db.Params.find_one(filter=args,sort=sort)
+        d = self.db.Params.find_one(filter=args, sort=sort)
 
         if d is not None:
             f_id = d['f_id']
@@ -157,7 +145,7 @@ def find_one_params(self, args={},sort=None):
             return False, False
         try:
             params = self.__deserialization(self.paramsfs.get(f_id).read())
-            print("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time()-s, 2)))
+            print("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time() - s, 2)))
             return params, f_id
         except:
             return False, False
@@ -182,14 +170,14 @@ def find_all_params(self, args={}):
         if pc is not None:
             f_id_list = pc.distinct('f_id')
             params = []
-            for f_id in f_id_list: # you may have multiple Buckets files
+            for f_id in f_id_list:  # you may have multiple Buckets files
                 tmp = self.paramsfs.get(f_id).read()
                 params.append(self.__deserialization(tmp))
         else:
             print("[TensorDB] FAIL! Cannot find any: {}".format(args))
             return False
 
-        print("[TensorDB] Find all params SUCCESS, took: {}s".format(round(time.time()-s, 2)))
+        print("[TensorDB] Find all params SUCCESS, took: {}s".format(round(time.time() - s, 2)))
         return params
 
     @AutoFill
@@ -217,7 +205,7 @@ def _print_dict(self, args):
         string = ''
         for key, value in args.items():
             if key is not '_id':
-                string += str(key) + ": "+ str(value) + " / "
+                string += str(key) + ": " + str(value) + " / "
         return string
 
     ## =========================== LOG =================================== ##
@@ -267,7 +255,7 @@ def valid_log(self, args={}):
         _result = self.db.ValidLog.insert_one(args)
         # _log = "".join(str(key) + ": " + str(value) for key, value in args.items())
         _log = self._print_dict(args)
-        print("[TensorDB] ValidLog: " +_log)
+        print("[TensorDB] ValidLog: " + _log)
         return _result
 
     @AutoFill
@@ -297,7 +285,7 @@ def test_log(self, args={}):
         _result = self.db.TestLog.insert_one(args)
         # _log = "".join(str(key) + str(value) for key, value in args.items())
         _log = self._print_dict(args)
-        print("[TensorDB] TestLog: " +_log)
+        print("[TensorDB] TestLog: " + _log)
         return _result
 
     @AutoFill
@@ -314,14 +302,14 @@ def del_test_log(self, args={}):
 
     ## =========================== Network Architecture ================== ##
     @AutoFill
-    def save_model_architecture(self,s,args={}):
+    def save_model_architecture(self, s, args={}):
         self.__autofill(args)
-        fid=self.archfs.put(s,filename="modelarchitecture")
-        args.update({"fid":fid})
+        fid = self.archfs.put(s, filename="modelarchitecture")
+        args.update({"fid": fid})
         self.db.march.insert_one(args)
 
     @AutoFill
-    def load_model_architecture(self,args={}):
+    def load_model_architecture(self, args={}):
 
         d = self.db.march.find_one(args)
         if d is not None:
@@ -331,7 +319,7 @@ def load_model_architecture(self,args={}):
             # "print find"
         else:
             print("[TensorDB] FAIL! Cannot find: {}".format(args))
-            print ("no idtem")
+            print("no idtem")
             return False, False
         try:
             archs = self.archfs.get(fid).read()
@@ -385,7 +373,6 @@ def find_one_job(self, args={}):
         dictionary : contains all meta data and script.
         """
 
-
         temp = self.db.Job.find_one(args)
 
         if temp is not None:
@@ -400,34 +387,34 @@ def find_one_job(self, args={}):
 
         return temp
 
-    def push_job(self,margs, wargs,dargs,epoch):
+    def push_job(self, margs, wargs, dargs, epoch):
 
-        ms,mid=self.load_model_architecture(margs)
-        weight,wid=self.find_one_params(wargs)
-        args={"weight":wid,"model":mid,"dargs":dargs,"epoch":epoch,"time":datetime.utcnow(),"Running":False}
+        ms, mid = self.load_model_architecture(margs)
+        weight, wid = self.find_one_params(wargs)
+        args = {"weight": wid, "model": mid, "dargs": dargs, "epoch": epoch, "time": datetime.utcnow(), "Running": False}
         self.__autofill(args)
         self.db.JOBS.insert_one(args)
 
     def peek_job(self):
-        args={'Running':False}
+        args = {'Running': False}
         self.__autofill(args)
-        m=self.db.JOBS.find_one(args)
+        m = self.db.JOBS.find_one(args)
         print(m)
         if m is None:
             return False
 
-        s=self.paramsfs.get(m['weight']).read()
-        w=self.__deserialization(s)
+        s = self.paramsfs.get(m['weight']).read()
+        w = self.__deserialization(s)
 
-        ach=self.archfs.get(m['model']).read()
+        ach = self.archfs.get(m['model']).read()
 
-        return m['_id'], ach,w,m["dargs"],m['epoch']
+        return m['_id'], ach, w, m["dargs"], m['epoch']
 
-    def run_job(self,jid):
-        self.db.JOBS.find_one_and_update({'_id':jid},{'$set': {'Running': True,"Since":datetime.utcnow()}})
+    def run_job(self, jid):
+        self.db.JOBS.find_one_and_update({'_id': jid}, {'$set': {'Running': True, "Since": datetime.utcnow()}})
 
-    def del_job(self,jid):
-        self.db.JOBS.find_one_and_update({'_id':jid},{'$set': {'Running': True,"Finished":datetime.utcnow()}})
+    def del_job(self, jid):
+        self.db.JOBS.find_one_and_update({'_id': jid}, {'$set': {'Running': True, "Finished": datetime.utcnow()}})
 
     def __str__(self):
         _s = "[TensorDB] Info:\n"
@@ -502,49 +489,50 @@ def __str__(self):
     #     return data
 
 
-
 class DBLogger:
     """ """
-    def __init__(self,db,model):
-        self.db=db
-        self.model=model
 
-    def on_train_begin(self,logs={}):
+    def __init__(self, db, model):
+        self.db = db
+        self.model = model
+
+    def on_train_begin(self, logs={}):
         print("start")
 
-    def on_train_end(self,logs={}):
+    def on_train_end(self, logs={}):
         print("end")
 
-    def on_epoch_begin(self,epoch,logs={}):
-        self.epoch=epoch
-        self.et=time.time()
+    def on_epoch_begin(self, epoch, logs={}):
+        self.epoch = epoch
+        self.et = time.time()
         return
 
     def on_epoch_end(self, epoch, logs={}):
-        self.et=time.time()-self.et
+        self.et = time.time() - self.et
         print("ending")
         print(epoch)
-        logs['epoch']=epoch
-        logs['time']=datetime.utcnow()
-        logs['stepTime']=self.et
-        logs['acc']=np.asscalar(logs['acc'])
+        logs['epoch'] = epoch
+        logs['time'] = datetime.utcnow()
+        logs['stepTime'] = self.et
+        logs['acc'] = np.asscalar(logs['acc'])
         print(logs)
 
-        w=self.model.Params
-        fid=self.db.save_params(w,logs)
-        logs.update({'params':fid})
+        w = self.model.Params
+        fid = self.db.save_params(w, logs)
+        logs.update({'params': fid})
         self.db.valid_log(logs)
-    def on_batch_begin(self, batch,logs={}):
-        self.t=time.time()
+
+    def on_batch_begin(self, batch, logs={}):
+        self.t = time.time()
         self.losses = []
-        self.batch=batch
+        self.batch = batch
 
     def on_batch_end(self, batch, logs={}):
-        self.t2=time.time()-self.t
-        logs['acc']=np.asscalar(logs['acc'])
+        self.t2 = time.time() - self.t
+        logs['acc'] = np.asscalar(logs['acc'])
         #logs['loss']=np.asscalar(logs['loss'])
-        logs['step_time']=self.t2
-        logs['time']=datetime.utcnow()
-        logs['epoch']=self.epoch
-        logs['batch']=self.batch
+        logs['step_time'] = self.t2
+        logs['time'] = datetime.utcnow()
+        logs['epoch'] = self.epoch
+        logs['batch'] = self.batch
         self.db.train_log(logs)
diff --git a/tensorlayer/distributed.py b/tensorlayer/distributed.py
index cf638b039..c02098d83 100644
--- a/tensorlayer/distributed.py
+++ b/tensorlayer/distributed.py
@@ -41,8 +41,7 @@ class TaskSpecDef(object):
     - `ML-engine trainer considerations <https://cloud.google.com/ml-engine/docs/trainer-considerations#use_tf_config>`_
     """
 
-    def __init__(self, type='master', index=0, trial=None, ps_hosts=None, worker_hosts=None,
-                 master=None):
+    def __init__(self, type='master', index=0, trial=None, ps_hosts=None, worker_hosts=None, master=None):
         self.type = type
         self._index = int(index)
         self._cluster_spec = None
@@ -61,17 +60,14 @@ def __init__(self, type='master', index=0, trial=None, ps_hosts=None, worker_hos
             self.num_ps = len(self.ps_hosts)
             self.worker_hosts = worker_hosts if isinstance(worker_hosts, list) else worker_hosts.split(',')
             if master is not None and len(master) > 0:
-                self._cluster_spec = tf.train.ClusterSpec({'ps'    : self.ps_hosts,
-                                                           'worker': self.worker_hosts,
-                                                           'master': master})
+                self._cluster_spec = tf.train.ClusterSpec({'ps': self.ps_hosts, 'worker': self.worker_hosts, 'master': master})
                 # master is a worker too
                 self.num_workers = len(self.worker_hosts) + 1
                 if self.type == 'worker':
                     self.shard_index = self._index + 1
                 self._master = self.type == 'master'
             else:
-                self._cluster_spec = tf.train.ClusterSpec({'ps'    : self.ps_hosts,
-                                                           'worker': self.worker_hosts})
+                self._cluster_spec = tf.train.ClusterSpec({'ps': self.ps_hosts, 'worker': self.worker_hosts})
                 self.num_workers = len(self.worker_hosts)
                 if self.type == 'worker':
                     self.shard_index = self._index
@@ -97,16 +93,12 @@ def device_fn(self):
         """Returns the function with the specification to create the graph in this server"""
         current_device = '/job:{}/task:{}'.format(self.type, self._index)
         ps_devices = '/job:ps'
-        return tf.train.replica_device_setter(ps_device=ps_devices,
-                                              worker_device=current_device,
-                                              cluster=self._cluster_spec)
+        return tf.train.replica_device_setter(ps_device=ps_devices, worker_device=current_device, cluster=self._cluster_spec)
 
     def create_server(self):
         if self._server is None and self.ps_hosts and self.worker_hosts and not self.is_evaluator():
             # create server and join if it is a parameter server
-            self._server = tf.train.Server(self._cluster_spec,
-                                           job_name=self.type,
-                                           task_index=self._index)
+            self._server = tf.train.Server(self._cluster_spec, job_name=self.type, task_index=self._index)
             if self.is_ps():
                 self._server.join()
 
@@ -127,12 +119,7 @@ def user_last_worker_as_evaluator(self):
          """
         if self.num_workers <= 1:
             raise Exception('You need more than one worker instance to use one as evaluator')
-        return TaskSpecDef(type=self.type,
-                           index=self._index,
-                           trial=self.trial,
-                           ps_hosts=self.ps_hosts,
-                           worker_hosts=self.worker_hosts[:-1],
-                           master=self.master)
+        return TaskSpecDef(type=self.type, index=self._index, trial=self.trial, ps_hosts=self.ps_hosts, worker_hosts=self.worker_hosts[:-1], master=self.master)
 
 
 def TaskSpec():
@@ -150,20 +137,22 @@ def TaskSpec():
         env = json.loads(os.environ.get('TF_CONFIG', '{}'))
         task_data = env.get('task', None) or {'type': 'master', 'index': 0}
         cluster_data = env.get('cluster', None) or {'ps': None, 'worker': None, 'master': None}
-        return TaskSpecDef(type=task_data['type'],
-                           index=task_data['index'],
-                           trial=task_data['trial'] if 'trial' in task_data else None,
-                           ps_hosts=cluster_data['ps'],
-                           worker_hosts=cluster_data['worker'],
-                           master=cluster_data['master'] if 'master' in cluster_data else None)
+        return TaskSpecDef(
+            type=task_data['type'],
+            index=task_data['index'],
+            trial=task_data['trial'] if 'trial' in task_data else None,
+            ps_hosts=cluster_data['ps'],
+            worker_hosts=cluster_data['worker'],
+            master=cluster_data['master'] if 'master' in cluster_data else None)
 
     # JOB_NAME, TASK_INDEX, PS_HOSTS, WORKER_HOSTS and MASTER_HOST are used in TensorPort
     if 'JOB_NAME' in os.environ:
-        return TaskSpecDef(type=os.environ['JOB_NAME'],
-                        index=os.environ['TASK_INDEX'],
-                        ps_hosts=os.environ.get('PS_HOSTS', None),
-                        worker_hosts=os.environ.get('WORKER_HOSTS', None),
-                        master=os.environ.get('MASTER_HOST', None))
+        return TaskSpecDef(
+            type=os.environ['JOB_NAME'],
+            index=os.environ['TASK_INDEX'],
+            ps_hosts=os.environ.get('PS_HOSTS', None),
+            worker_hosts=os.environ.get('WORKER_HOSTS', None),
+            master=os.environ.get('MASTER_HOST', None))
     return None
 
 
@@ -252,19 +241,19 @@ def DistributedSession(task_spec=None,
     """
     target = task_spec.target() if task_spec is not None else None
     is_chief = task_spec.is_master() if task_spec is not None else True
-    return tf.train.MonitoredTrainingSession(master=target,
-                                             is_chief=is_chief,
-                                             checkpoint_dir=checkpoint_dir,
-                                             scaffold=scaffold,
-                                             save_checkpoint_secs=save_checkpoint_secs,
-                                             save_summaries_steps=save_summaries_steps,
-                                             save_summaries_secs=save_summaries_secs,
-                                             log_step_count_steps=log_step_count_steps,
-                                             stop_grace_period_secs=stop_grace_period_secs,
-                                             config=config,
-                                             hooks=hooks,
-                                             chief_only_hooks=chief_only_hooks)
-
+    return tf.train.MonitoredTrainingSession(
+        master=target,
+        is_chief=is_chief,
+        checkpoint_dir=checkpoint_dir,
+        scaffold=scaffold,
+        save_checkpoint_secs=save_checkpoint_secs,
+        save_summaries_steps=save_summaries_steps,
+        save_summaries_secs=save_summaries_secs,
+        log_step_count_steps=log_step_count_steps,
+        stop_grace_period_secs=stop_grace_period_secs,
+        config=config,
+        hooks=hooks,
+        chief_only_hooks=chief_only_hooks)
 
 
 class StopAtTimeHook(session_run_hook.SessionRunHook):
diff --git a/tensorlayer/files.py b/tensorlayer/files.py
index 834e8cfbb..82d41a965 100644
--- a/tensorlayer/files.py
+++ b/tensorlayer/files.py
@@ -1,7 +1,6 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
-
 import gzip
 import os
 import pickle
@@ -19,7 +18,7 @@
 
 
 ## Load dataset functions
-def load_mnist_dataset(shape=(-1,784), path="data"):
+def load_mnist_dataset(shape=(-1, 784), path="data"):
     """Automatically download MNIST dataset
     and return the training, validation and test set with 50000, 10000 and 10000
     digit images respectively.
@@ -37,6 +36,7 @@ def load_mnist_dataset(shape=(-1,784), path="data"):
     >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))
     """
     path = os.path.join(path, 'mnist')
+
     # We first define functions for loading MNIST images and labels.
     # For convenience, they also download the requested files if needed.
     def load_mnist_images(path, filename):
@@ -83,6 +83,7 @@ def load_mnist_labels(path, filename):
     y_test = np.asarray(y_test, dtype=np.int32)
     return X_train, y_train, X_val, y_val, X_test, y_test
 
+
 def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data', plotable=False, second=3):
     """The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with
     6000 images per class. There are 50000 training images and 10000 test images.
@@ -135,7 +136,7 @@ def unpickle(file):
     #Unpickle file and fill in data
     X_train = None
     y_train = []
-    for i in range(1,6):
+    for i in range(1, 6):
         data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "data_batch_{}".format(i)))
         if i == 1:
             X_train = data_dic['data']
@@ -143,7 +144,7 @@ def unpickle(file):
             X_train = np.vstack((X_train, data_dic['data']))
         y_train += data_dic['labels']
 
-    test_data_dic = unpickle(os.path.join(path,  'cifar-10-batches-py/', "test_batch"))
+    test_data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "test_batch"))
     X_test = test_data_dic['data']
     y_test = np.array(test_data_dic['labels'])
 
@@ -166,32 +167,32 @@ def unpickle(file):
         import matplotlib.pyplot as plt
         fig = plt.figure(1)
 
-        print('Shape of a training image: X_train[0]',X_train[0].shape)
+        print('Shape of a training image: X_train[0]', X_train[0].shape)
 
-        plt.ion()       # interactive mode
+        plt.ion()  # interactive mode
         count = 1
         for row in range(10):
             for col in range(10):
                 a = fig.add_subplot(10, 10, count)
                 if shape == (-1, 3, 32, 32):
                     # plt.imshow(X_train[count-1], interpolation='nearest')
-                    plt.imshow(np.transpose(X_train[count-1], (1, 2, 0)), interpolation='nearest')
+                    plt.imshow(np.transpose(X_train[count - 1], (1, 2, 0)), interpolation='nearest')
                     # plt.imshow(np.transpose(X_train[count-1], (2, 1, 0)), interpolation='nearest')
                 elif shape == (-1, 32, 32, 3):
-                    plt.imshow(X_train[count-1], interpolation='nearest')
+                    plt.imshow(X_train[count - 1], interpolation='nearest')
                     # plt.imshow(np.transpose(X_train[count-1], (1, 0, 2)), interpolation='nearest')
                 else:
                     raise Exception("Do not support the given 'shape' to plot the image examples")
-                plt.gca().xaxis.set_major_locator(plt.NullLocator())    # 不显示刻度(tick)
+                plt.gca().xaxis.set_major_locator(plt.NullLocator())  # 不显示刻度(tick)
                 plt.gca().yaxis.set_major_locator(plt.NullLocator())
                 count = count + 1
-        plt.draw()      # interactive mode
-        plt.pause(3)   # interactive mode
+        plt.draw()  # interactive mode
+        plt.pause(3)  # interactive mode
 
-        print("X_train:",X_train.shape)
-        print("y_train:",y_train.shape)
-        print("X_test:",X_test.shape)
-        print("y_test:",y_test.shape)
+        print("X_train:", X_train.shape)
+        print("y_train:", y_train.shape)
+        print("X_test:", X_test.shape)
+        print("y_test:", y_test.shape)
 
     X_train = np.asarray(X_train, dtype=np.float32)
     X_test = np.asarray(X_test, dtype=np.float32)
@@ -200,6 +201,7 @@ def unpickle(file):
 
     return X_train, y_train, X_test, y_test
 
+
 def load_ptb_dataset(path='data'):
     """Penn TreeBank (PTB) dataset is used in many LANGUAGE MODELING papers,
     including "Empirical Evaluation and Combination of Advanced Language
@@ -252,6 +254,7 @@ def load_ptb_dataset(path='data'):
     # exit()
     return train_data, valid_data, test_data, vocabulary
 
+
 def load_matt_mahoney_text8_dataset(path='data'):
     """Download a text file from Matt Mahoney's website
     if not present, and make sure it's the right size.
@@ -287,9 +290,8 @@ def load_matt_mahoney_text8_dataset(path='data'):
             word_list[idx] = word_list[idx].decode()
     return word_list
 
-def load_imdb_dataset(path='data', nb_words=None, skip_top=0,
-              maxlen=None, test_split=0.2, seed=113,
-              start_char=1, oov_char=2, index_from=3):
+
+def load_imdb_dataset(path='data', nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, index_from=3):
     """Load IMDB dataset
 
     Parameters
@@ -344,9 +346,7 @@ def load_imdb_dataset(path='data', nb_words=None, skip_top=0,
         X = new_X
         labels = new_labels
     if not X:
-        raise Exception('After filtering for sequences shorter than maxlen=' +
-                        str(maxlen) + ', no sequence was kept. '
-                        'Increase maxlen.')
+        raise Exception('After filtering for sequences shorter than maxlen=' + str(maxlen) + ', no sequence was kept. ' 'Increase maxlen.')
     if not nb_words:
         nb_words = max([max(x) for x in X])
 
@@ -372,6 +372,7 @@ def load_imdb_dataset(path='data', nb_words=None, skip_top=0,
 
     return X_train, y_train, X_test, y_test
 
+
 def load_nietzsche_dataset(path='data'):
     """Load Nietzsche dataset.
     Returns a string.
@@ -399,6 +400,7 @@ def load_nietzsche_dataset(path='data'):
         words = f.read()
         return words
 
+
 def load_wmt_en_fr_dataset(path='data'):
     """It will download English-to-French translation data from the WMT'15
     Website (10^9-French-English corpus), and the 2013 news test from
@@ -449,12 +451,12 @@ def get_wmt_enfr_dev_set(path):
         if not (gfile.Exists(dev_path + ".fr") and gfile.Exists(dev_path + ".en")):
             print("Extracting tgz file %s" % dev_file)
             with tarfile.open(dev_file, "r:gz") as dev_tar:
-              fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr")
-              en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en")
-              fr_dev_file.name = dev_name + ".fr"  # Extract without "dev/" prefix.
-              en_dev_file.name = dev_name + ".en"
-              dev_tar.extract(fr_dev_file, path)
-              dev_tar.extract(en_dev_file, path)
+                fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr")
+                en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en")
+                fr_dev_file.name = dev_name + ".fr"  # Extract without "dev/" prefix.
+                en_dev_file.name = dev_name + ".en"
+                dev_tar.extract(fr_dev_file, path)
+                dev_tar.extract(en_dev_file, path)
         return dev_path
 
     print("Load or Download WMT English-to-French translation > {}".format(path))
@@ -464,6 +466,7 @@ def get_wmt_enfr_dev_set(path):
 
     return train_path, dev_path
 
+
 def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False):
     """Returns a list of images by a given tag from Flick25k dataset,
     it will download Flickr25k from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`_
@@ -492,18 +495,18 @@ def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False
     filename = 'mirflickr25k.zip'
     url = 'http://press.liacs.nl/mirflickr/mirflickr25k/'
     ## download dataset
-    if folder_exists(path+"/mirflickr") is False:
+    if folder_exists(path + "/mirflickr") is False:
         print("[*] Flickr25k is nonexistent in {}".format(path))
         maybe_download_and_extract(filename, path, url, extract=True)
-        del_file(path+'/'+filename)
+        del_file(path + '/' + filename)
     ## return images by the given tag.
     # 1. image path list
-    folder_imgs = path+"/mirflickr"
+    folder_imgs = path + "/mirflickr"
     path_imgs = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False)
     path_imgs.sort(key=natural_keys)
     # print(path_imgs[0:10])
     # 2. tag path list
-    folder_tags = path+"/mirflickr/meta/tags"
+    folder_tags = path + "/mirflickr/meta/tags"
     path_tags = load_file_list(path=folder_tags, regx='\\.txt', printable=False)
     path_tags.sort(key=natural_keys)
     # print(path_tags[0:10])
@@ -514,7 +517,7 @@ def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False
         print("[Flickr25k] reading images with tag: {}".format(tag))
     images_list = []
     for idx in range(0, len(path_tags)):
-        tags = read_file(folder_tags+'/'+path_tags[idx]).split('\n')
+        tags = read_file(folder_tags + '/' + path_tags[idx]).split('\n')
         # print(idx+1, tags)
         if tag is None or tag in tags:
             images_list.append(path_imgs[idx])
@@ -522,6 +525,7 @@ def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False
     images = visualize.read_images(images_list, folder_imgs, n_threads=n_threads, printable=printable)
     return images
 
+
 def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printable=False):
     """Returns a list of images by a given tag from Flickr1M dataset,
     it will download Flickr1M from `the official website <http://press.liacs.nl/mirflickr/mirdownload.html>`_
@@ -548,29 +552,29 @@ def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printab
     >>> images = tl.files.load_flickr1M_dataset(tag='zebra')
     """
     path = os.path.join(path, 'flickr1M')
-    print("[Flickr1M] using {}% of images = {}".format(size*10, size*100000))
-    images_zip = ['images0.zip', 'images1.zip', 'images2.zip', 'images3.zip',
-             'images4.zip',  'images5.zip', 'images6.zip', 'images7.zip',
-             'images8.zip',  'images9.zip']
+    print("[Flickr1M] using {}% of images = {}".format(size * 10, size * 100000))
+    images_zip = [
+        'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip', 'images7.zip', 'images8.zip', 'images9.zip'
+    ]
     tag_zip = 'tags.zip'
     url = 'http://press.liacs.nl/mirflickr/mirflickr1m/'
     ## download dataset
     for image_zip in images_zip[0:size]:
         image_folder = image_zip.split(".")[0]
         # print(path+"/"+image_folder)
-        if folder_exists(path+"/"+image_folder) is False:
+        if folder_exists(path + "/" + image_folder) is False:
             # print(image_zip)
             print("[Flickr1M] {} is missing in {}".format(image_folder, path))
             maybe_download_and_extract(image_zip, path, url, extract=True)
-            del_file(path+'/'+image_zip)
-            os.system("mv {} {}".format(path+'/images',path+'/'+image_folder))
+            del_file(path + '/' + image_zip)
+            os.system("mv {} {}".format(path + '/images', path + '/' + image_folder))
         else:
             print("[Flickr1M] {} exists in {}".format(image_folder, path))
     ## download tag
-    if folder_exists(path+"/tags") is False:
+    if folder_exists(path + "/tags") is False:
         print("[Flickr1M] tag files is nonexistent in {}".format(path))
         maybe_download_and_extract(tag_zip, path, url, extract=True)
-        del_file(path+'/'+tag_zip)
+        del_file(path + '/' + tag_zip)
     else:
         print("[Flickr1M] tags exists in {}".format(path))
 
@@ -578,26 +582,26 @@ def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printab
     images_list = []
     images_folder_list = []
     for i in range(0, size):
-        images_folder_list += load_folder_list(path=path+'/images%d'%i)
-    images_folder_list.sort(key=lambda s : int(s.split('/')[-1]))   # folder/images/ddd
+        images_folder_list += load_folder_list(path=path + '/images%d' % i)
+    images_folder_list.sort(key=lambda s: int(s.split('/')[-1]))  # folder/images/ddd
     # print(images_folder_list)
     # exit()
-    for folder in images_folder_list[0:size*10]:
+    for folder in images_folder_list[0:size * 10]:
         tmp = load_file_list(path=folder, regx='\\.jpg', printable=False)
-        tmp.sort(key=lambda s : int(s.split('.')[-2]))  # ddd.jpg
+        tmp.sort(key=lambda s: int(s.split('.')[-2]))  # ddd.jpg
         # print(tmp[0::570])
-        images_list.extend([folder+'/'+x for x in tmp])
+        images_list.extend([folder + '/' + x for x in tmp])
     # print('IM', len(images_list), images_list[0::6000])
     ## 2. tag path list
     tag_list = []
-    tag_folder_list = load_folder_list(path+"/tags")
-    tag_folder_list.sort(key=lambda s : int(s.split('/')[-1]))  # folder/images/ddd
+    tag_folder_list = load_folder_list(path + "/tags")
+    tag_folder_list.sort(key=lambda s: int(s.split('/')[-1]))  # folder/images/ddd
 
-    for folder in tag_folder_list[0:size*10]:
+    for folder in tag_folder_list[0:size * 10]:
         # print(folder)
         tmp = load_file_list(path=folder, regx='\\.txt', printable=False)
-        tmp.sort(key=lambda s : int(s.split('.')[-2])) # ddd.txt
-        tmp = [folder+'/'+s for s in tmp]
+        tmp.sort(key=lambda s: int(s.split('.')[-2]))  # ddd.txt
+        tmp = [folder + '/' + s for s in tmp]
         tag_list += tmp
     # print('T', len(tag_list), tag_list[0::6000])
     # exit()
@@ -613,6 +617,7 @@ def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printab
     images = visualize.read_images(select_images_list, '', n_threads=n_threads, printable=printable)
     return images
 
+
 def load_cyclegan_dataset(filename='summer2winter_yosemite', path='data'):
     """Load image data from CycleGAN's database, see `this link <https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/>`_.
 
@@ -632,18 +637,19 @@ def load_cyclegan_dataset(filename='summer2winter_yosemite', path='data'):
 
     if folder_exists(os.path.join(path, filename)) is False:
         print("[*] {} is nonexistent in {}".format(filename, path))
-        maybe_download_and_extract(filename+'.zip', path, url, extract=True)
-        del_file(os.path.join(path, filename+'.zip'))
+        maybe_download_and_extract(filename + '.zip', path, url, extract=True)
+        del_file(os.path.join(path, filename + '.zip'))
 
     def load_image_from_folder(path):
         path_imgs = load_file_list(path=path, regx='\\.jpg', printable=False)
         return visualize.read_images(path_imgs, path=path, n_threads=10, printable=False)
+
     im_train_A = load_image_from_folder(os.path.join(path, filename, "trainA"))
     im_train_B = load_image_from_folder(os.path.join(path, filename, "trainB"))
     im_test_A = load_image_from_folder(os.path.join(path, filename, "testA"))
     im_test_B = load_image_from_folder(os.path.join(path, filename, "testB"))
 
-    def if_2d_to_3d(images):         # [h, w] --> [h, w, 3]
+    def if_2d_to_3d(images):  # [h, w] --> [h, w, 3]
         for i in range(len(images)):
             if len(images[i].shape) == 2:
                 images[i] = images[i][:, :, np.newaxis]
@@ -657,6 +663,7 @@ def if_2d_to_3d(images):         # [h, w] --> [h, w, 3]
 
     return im_train_A, im_train_B, im_test_A, im_test_B
 
+
 def download_file_from_google_drive(id, destination):
     """ Download file from Google Drive, see ``load_celebA_dataset`` for example.
 
@@ -667,34 +674,37 @@ def download_file_from_google_drive(id, destination):
     """
     from tqdm import tqdm
     import requests
-    def save_response_content(response, destination, chunk_size=32*1024):
+
+    def save_response_content(response, destination, chunk_size=32 * 1024):
         total_size = int(response.headers.get('content-length', 0))
         with open(destination, "wb") as f:
-            for chunk in tqdm(response.iter_content(chunk_size), total=total_size,
-                    unit='B', unit_scale=True, desc=destination):
-                if chunk: # filter out keep-alive new chunks
+            for chunk in tqdm(response.iter_content(chunk_size), total=total_size, unit='B', unit_scale=True, desc=destination):
+                if chunk:  # filter out keep-alive new chunks
                     f.write(chunk)
+
     def get_confirm_token(response):
         for key, value in response.cookies.items():
             if key.startswith('download_warning'):
                 return value
         return None
+
     URL = "https://docs.google.com/uc?export=download"
     session = requests.Session()
 
-    response = session.get(URL, params={ 'id': id }, stream=True)
+    response = session.get(URL, params={'id': id}, stream=True)
     token = get_confirm_token(response)
 
     if token:
-        params = { 'id' : id, 'confirm' : token }
+        params = {'id': id, 'confirm': token}
         response = session.get(URL, params=params, stream=True)
     save_response_content(response, destination)
 
+
 def load_celebA_dataset(dirpath='data'):
     """ Automatically download celebA dataset, and return a list of image path. """
     import zipfile, os
     data_dir = 'celebA'
-    filename, drive_id  = "img_align_celeba.zip", "0B7EVK8r0v71pZjFTYXZWM3FlRnM"
+    filename, drive_id = "img_align_celeba.zip", "0B7EVK8r0v71pZjFTYXZWM3FlRnM"
     save_path = os.path.join(dirpath, filename)
     image_path = os.path.join(dirpath, data_dir)
     if os.path.exists(image_path):
@@ -711,9 +721,10 @@ def load_celebA_dataset(dirpath='data'):
 
     data_files = load_file_list(path=image_path, regx='\\.jpg', printable=False)
     for i in range(len(data_files)):
-        data_files[i] =  os.path.join(image_path, data_files[i])
+        data_files[i] = os.path.join(image_path, data_files[i])
     return data_files
 
+
 def load_voc_dataset(path='data', dataset='2012', contain_classes_in_person=False):
     """ Pascal VOC 2007/2012 Dataset has 20 objects : aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor and additional 3 classes : head, hand, foot for person.
 
@@ -781,10 +792,10 @@ def load_voc_dataset(path='data', dataset='2012', contain_classes_in_person=Fals
     - `Pascal VOC2007 Website <http://host.robots.ox.ac.uk/pascal/VOC/voc2007/>`_.
     - `TensorFlow/Models/object-detection <https://github.com/zsdonghao/object-detection/blob/master/g3doc/preparing_inputs.md>`_.
     """
-    path= os.path.join(path, 'VOC')
+    path = os.path.join(path, 'VOC')
 
     def _recursive_parse_xml_to_dict(xml):
-      """Recursively parses XML contents to python dict.
+        """Recursively parses XML contents to python dict.
       We assume that `object` tags are the only ones that can appear
       multiple times at the same level of a tree.
 
@@ -794,38 +805,40 @@ def _recursive_parse_xml_to_dict(xml):
       Returns:
         Python dictionary holding XML contents.
       """
-      if not xml:
-      # if xml is not None:
-        return {xml.tag: xml.text}
-      result = {}
-      for child in xml:
-        child_result = _recursive_parse_xml_to_dict(child)
-        if child.tag != 'object':
-          result[child.tag] = child_result[child.tag]
-        else:
-          if child.tag not in result:
-            result[child.tag] = []
-          result[child.tag].append(child_result[child.tag])
-      return {xml.tag: result}
+        if not xml:
+            # if xml is not None:
+            return {xml.tag: xml.text}
+        result = {}
+        for child in xml:
+            child_result = _recursive_parse_xml_to_dict(child)
+            if child.tag != 'object':
+                result[child.tag] = child_result[child.tag]
+            else:
+                if child.tag not in result:
+                    result[child.tag] = []
+                result[child.tag].append(child_result[child.tag])
+        return {xml.tag: result}
 
-    from lxml import etree # pip install lxml
+    from lxml import etree  # pip install lxml
     import xml.etree.ElementTree as ET
 
     ##
     if dataset == "2012":
         url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/"
         tar_filename = "VOCtrainval_11-May-2012.tar"
-        extracted_filename = "VOC2012"#"VOCdevkit/VOC2012"
+        extracted_filename = "VOC2012"  #"VOCdevkit/VOC2012"
         print("    [============= VOC 2012 =============]")
     elif dataset == "2012test":
-        extracted_filename = "VOC2012test"#"VOCdevkit/VOC2012"
+        extracted_filename = "VOC2012test"  #"VOCdevkit/VOC2012"
         print("    [============= VOC 2012 Test Set =============]")
         print("    \nAuthor: 2012test only have person annotation, so 2007test is highly recommended for testing !\n")
         import time
         time.sleep(3)
         if os.path.isdir(os.path.join(path, extracted_filename)) is False:
             print("For VOC 2012 Test data - online registration required")
-            print(" Please download VOC2012test.tar from:  \n register: http://host.robots.ox.ac.uk:8080 \n voc2012 : http://host.robots.ox.ac.uk:8080/eval/challenges/voc2012/ \ndownload: http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar")
+            print(
+                " Please download VOC2012test.tar from:  \n register: http://host.robots.ox.ac.uk:8080 \n voc2012 : http://host.robots.ox.ac.uk:8080/eval/challenges/voc2012/ \ndownload: http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar"
+            )
             print(" unzip VOC2012test.tar,rename the folder to VOC2012test and put it into %s" % path)
             exit()
         # # http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar
@@ -870,15 +883,16 @@ def _recursive_parse_xml_to_dict(xml):
                     os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007test".format(path, path))
             del_folder(os.path.join(path, 'VOCdevkit'))
     ##======== object classes(labels)  NOTE: YOU CAN CUSTOMIZE THIS LIST
-    classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car",
-            "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
-            "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
+    classes = [
+        "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
+        "pottedplant", "sheep", "sofa", "train", "tvmonitor"
+    ]
     if contain_classes_in_person:
         classes_in_person = ["head", "hand", "foot"]
     else:
         classes_in_person = []
 
-    classes += classes_in_person    # use extra 3 classes for person
+    classes += classes_in_person  # use extra 3 classes for person
 
     classes_dict = utils.list_string_to_dict(classes)
     print("[VOC] object classes {}".format(classes_dict))
@@ -888,26 +902,26 @@ def _recursive_parse_xml_to_dict(xml):
     folder_imgs = os.path.join(path, extracted_filename, "JPEGImages")
     imgs_file_list = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False)
     print("[VOC] {} images found".format(len(imgs_file_list)))
-    imgs_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000027.jpg --> 2007000027
+    imgs_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]))  # 2007_000027.jpg --> 2007000027
     imgs_file_list = [os.path.join(folder_imgs, s) for s in imgs_file_list]
-        # print('IM',imgs_file_list[0::3333], imgs_file_list[-1])
+    # print('IM',imgs_file_list[0::3333], imgs_file_list[-1])
     if dataset != "2012test":
         ##======== 2. semantic segmentation maps path list
         # folder_semseg = path+"/"+extracted_filename+"/SegmentationClass/"
         folder_semseg = os.path.join(path, extracted_filename, "SegmentationClass")
         imgs_semseg_file_list = load_file_list(path=folder_semseg, regx='\\.png', printable=False)
         print("[VOC] {} maps for semantic segmentation found".format(len(imgs_semseg_file_list)))
-        imgs_semseg_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032
+        imgs_semseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]))  # 2007_000032.png --> 2007000032
         imgs_semseg_file_list = [os.path.join(folder_semseg, s) for s in imgs_semseg_file_list]
-            # print('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1])
+        # print('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1])
         ##======== 3. instance segmentation maps path list
         # folder_insseg = path+"/"+extracted_filename+"/SegmentationObject/"
         folder_insseg = os.path.join(path, extracted_filename, "SegmentationObject")
         imgs_insseg_file_list = load_file_list(path=folder_insseg, regx='\\.png', printable=False)
         print("[VOC] {} maps for instance segmentation found".format(len(imgs_semseg_file_list)))
-        imgs_insseg_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032
+        imgs_insseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]))  # 2007_000032.png --> 2007000032
         imgs_insseg_file_list = [os.path.join(folder_insseg, s) for s in imgs_insseg_file_list]
-            # print('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1])
+        # print('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1])
     else:
         imgs_semseg_file_list = []
         imgs_insseg_file_list = []
@@ -916,11 +930,11 @@ def _recursive_parse_xml_to_dict(xml):
     folder_ann = os.path.join(path, extracted_filename, "Annotations")
     imgs_ann_file_list = load_file_list(path=folder_ann, regx='\\.xml', printable=False)
     print("[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list)))
-    imgs_ann_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000027.xml --> 2007000027
+    imgs_ann_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2]))  # 2007_000027.xml --> 2007000027
     imgs_ann_file_list = [os.path.join(folder_ann, s) for s in imgs_ann_file_list]
-        # print('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1])
+    # print('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1])
 
-    if dataset == "2012test": # remove unused images in JPEG folder
+    if dataset == "2012test":  # remove unused images in JPEG folder
         imgs_file_list_new = []
         for ann in imgs_ann_file_list:
             ann = os.path.split(ann)[-1].split('.')[0]
@@ -933,17 +947,17 @@ def _recursive_parse_xml_to_dict(xml):
 
     ##======== parse XML annotations
     def convert(size, box):
-        dw = 1./size[0]
-        dh = 1./size[1]
-        x = (box[0] + box[1])/2.0
-        y = (box[2] + box[3])/2.0
+        dw = 1. / size[0]
+        dh = 1. / size[1]
+        x = (box[0] + box[1]) / 2.0
+        y = (box[2] + box[3]) / 2.0
         w = box[1] - box[0]
         h = box[3] - box[2]
-        x = x*dw
-        w = w*dw
-        y = y*dh
-        h = h*dh
-        return (x,y,w,h)
+        x = x * dw
+        w = w * dw
+        y = y * dh
+        h = h * dh
+        return (x, y, w, h)
 
     def convert_annotation(file_name):
         """ Given VOC2012 XML Annotations, returns number of objects and info. """
@@ -971,7 +985,7 @@ def convert_annotation(file_name):
             cls_id = classes.index(cls)
             xmlbox = obj.find('bndbox')
             b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
-            bb = convert((w,h), b)
+            bb = convert((w, h), b)
             # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
             out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n'
             n_objs += 1
@@ -983,7 +997,7 @@ def convert_annotation(file_name):
                     cls_id = classes.index(cls)
                     xmlbox = part.find('bndbox')
                     b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
-                    bb = convert((w,h), b)
+                    bb = convert((w, h), b)
                     # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
                     out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n'
                     n_objs += 1
@@ -992,7 +1006,7 @@ def convert_annotation(file_name):
 
     print("[VOC] Parsing xml annotations files")
     n_objs_list = []
-    objs_info_list = [] # Darknet Format list of string
+    objs_info_list = []  # Darknet Format list of string
     objs_info_dicts = {}
     for idx, ann_file in enumerate(imgs_ann_file_list):
         # print(ann_file)
@@ -1010,7 +1024,6 @@ def convert_annotation(file_name):
         n_objs_list, objs_info_list, objs_info_dicts
 
 
-
 ## Load and save network list npz
 def save_npz(save_list=[], name='model.npz', sess=None):
     """Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore.
@@ -1063,6 +1076,7 @@ def save_npz(save_list=[], name='model.npz', sess=None):
     # np.savez(name, **rename_dict)
     # print('Model is saved to: %s' % name)
 
+
 def load_npz(path='', name='model.npz'):
     """Load the parameters of a Model saved by tl.files.save_npz().
 
@@ -1095,7 +1109,7 @@ def load_npz(path='', name='model.npz'):
     #     print('Loading %s, %s' % (key, str(val.shape)))
     # return params
     ## if save_npz save params into a list
-    d = np.load( path+name )
+    d = np.load(path + name)
     # for val in sorted( d.items() ):
     #     params = val
     #     return params
@@ -1104,6 +1118,7 @@ def load_npz(path='', name='model.npz'):
     # exit()
     # return d.items()[0][1]['params']
 
+
 def assign_params(sess, params, network):
     """Assign the given parameters to the TensorLayer network.
 
@@ -1141,6 +1156,7 @@ def assign_params(sess, params, network):
         sess.run(ops)
     return ops
 
+
 def load_and_assign_npz(sess=None, name=None, network=None):
     """Load model from npz and assign to a network.
 
@@ -1172,6 +1188,7 @@ def load_and_assign_npz(sess=None, name=None, network=None):
         print("[*] Load {} SUCCESS!".format(name))
         return network
 
+
 ## Load and save network dict npz
 def save_npz_dict(save_list=[], name='model.npz', sess=None):
     """Input parameters and the file name, save parameters as a dictionary into .npz file.
@@ -1196,6 +1213,7 @@ def save_npz_dict(save_list=[], name='model.npz', sess=None):
     del save_var_dict
     print("[*] Model saved in npz_dict %s" % name)
 
+
 def load_and_assign_npz_dict(name='model.npz', sess=None):
     """Restore the parameters saved by ``tl.files.save_npz_dict()``.
 
@@ -1232,6 +1250,7 @@ def load_and_assign_npz_dict(name='model.npz', sess=None):
     sess.run(ops)
     print("[*] Model restored from npz_dict %s" % name)
 
+
 # def save_npz_dict(save_list=[], name='model.npz', sess=None):
 #     """Input parameters and the file name, save parameters as a dictionary into .npz file. Use tl.utils.load_npz_dict() to restore.
 #
@@ -1286,7 +1305,6 @@ def load_and_assign_npz_dict(name='model.npz', sess=None):
 #     return saved_list_var
 
 
-
 ## Load and save network ckpt
 def save_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=[], global_step=None, printable=False):
     """Save parameters into ckpt file.
@@ -1318,6 +1336,7 @@ def save_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list
     saver = tf.train.Saver(var_list)
     saver.save(sess, ckpt_file, global_step=global_step)
 
+
 def load_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=[], is_latest=True, printable=False):
     """Load parameters from ckpt file.
 
@@ -1366,7 +1385,6 @@ def load_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list
         print("[*] load ckpt fail ...")
 
 
-
 ## Load and save variables
 def save_any_to_npy(save_dict={}, name='file.npy'):
     """Save variables to .npy file.
@@ -1380,6 +1398,7 @@ def save_any_to_npy(save_dict={}, name='file.npy'):
     """
     np.save(name, save_dict)
 
+
 def load_npy_to_any(path='', name='file.npy'):
     """Load .npy file.
 
@@ -1400,25 +1419,27 @@ def load_npy_to_any(path='', name='file.npy'):
             exit()
 
 
-
-
 ## Folder functions
 def file_exists(filepath):
     """ Check whether a file exists by given file path. """
     return os.path.isfile(filepath)
 
+
 def folder_exists(folderpath):
     """ Check whether a folder exists by given folder path. """
     return os.path.isdir(folderpath)
 
+
 def del_file(filepath):
     """ Delete a file by given file path. """
     os.remove(filepath)
 
+
 def del_folder(folderpath):
     """ Delete a folder by given folder path. """
     os.rmdir(folderpath)
 
+
 def read_file(filepath):
     """ Read a file and return a string.
 
@@ -1429,6 +1450,7 @@ def read_file(filepath):
     with open(filepath, 'r') as afile:
         return afile.read()
 
+
 def load_file_list(path=None, regx='\.npz', printable=True):
     """Return a file list in a folder by given a path and regular expression.
 
@@ -1457,6 +1479,7 @@ def load_file_list(path=None, regx='\.npz', printable=True):
         print('Number of files = %d' % len(return_list))
     return return_list
 
+
 def load_folder_list(path=""):
     """Return a folder list in a folder by given a folder path.
 
@@ -1465,7 +1488,8 @@ def load_folder_list(path=""):
     path : a string or None
         A folder path.
     """
-    return [os.path.join(path,o) for o in os.listdir(path) if os.path.isdir(os.path.join(path,o))]
+    return [os.path.join(path, o) for o in os.listdir(path) if os.path.isdir(os.path.join(path, o))]
+
 
 def exists_or_mkdir(path, verbose=True):
     """Check a folder by given name, if not exist, create the folder and return False,
@@ -1496,6 +1520,7 @@ def exists_or_mkdir(path, verbose=True):
             print("[!] %s exists ..." % path)
         return True
 
+
 def maybe_download_and_extract(filename, working_directory, url_source, extract=False, expected_bytes=None):
     """Checks if file exists in working_directory otherwise tries to dowload the file,
     and optionally also tries to extract the file if format is ".zip" or ".tar"
@@ -1528,19 +1553,21 @@ def maybe_download_and_extract(filename, working_directory, url_source, extract=
                                             url_source = 'http://sceneparsing.csail.mit.edu/data/',
                                             extract=True)
     """
+
     # We first define a download function, supporting both Python 2 and 3.
     def _download(filename, working_directory, url_source):
         def _dlProgress(count, blockSize, totalSize):
-            if(totalSize != 0):
+            if (totalSize != 0):
                 percent = float(count * blockSize) / float(totalSize) * 100.0
                 sys.stdout.write("\r" "Downloading " + filename + "...%d%%" % percent)
                 sys.stdout.flush()
+
         if sys.version_info[0] == 2:
             from urllib import urlretrieve
         else:
             from urllib.request import urlretrieve
         filepath = os.path.join(working_directory, filename)
-        urlretrieve(url_source+filename, filepath, reporthook=_dlProgress)
+        urlretrieve(url_source + filename, filepath, reporthook=_dlProgress)
 
     exists_or_mkdir(working_directory, verbose=False)
     filepath = os.path.join(working_directory, filename)
@@ -1549,10 +1576,10 @@ def _dlProgress(count, blockSize, totalSize):
         _download(filename, working_directory, url_source)
         print()
         statinfo = os.stat(filepath)
-        print('Succesfully downloaded %s %s bytes.' % (filename, statinfo.st_size))#, 'bytes.')
-        if(not(expected_bytes is None) and (expected_bytes != statinfo.st_size)):
+        print('Succesfully downloaded %s %s bytes.' % (filename, statinfo.st_size))  #, 'bytes.')
+        if (not (expected_bytes is None) and (expected_bytes != statinfo.st_size)):
             raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?')
-        if(extract):
+        if (extract):
             if tarfile.is_tarfile(filepath):
                 print('Trying to extract tar file')
                 tarfile.open(filepath, 'r').extractall(working_directory)
@@ -1585,9 +1612,12 @@ def natural_keys(text):
     http://nedbatchelder.com/blog/200712/human_sorting.html
     (See Toothy's implementation in the comments)
     """
+
     def atoi(text):
         return int(text) if text.isdigit() else text
-    return [ atoi(c) for c in re.split('(\d+)', text) ]
+
+    return [atoi(c) for c in re.split('(\d+)', text)]
+
 
 # Visualizing npz files
 def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'):
@@ -1608,5 +1638,5 @@ def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'):
     file_list = load_file_list(path=path, regx=regx)
     for f in file_list:
         W = load_npz(path, f)[0]
-        print("%s --> %s" % (f, f.split('.')[0]+'.pdf'))
+        print("%s --> %s" % (f, f.split('.')[0] + '.pdf'))
         visualize.W(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012)
diff --git a/tensorlayer/iterate.py b/tensorlayer/iterate.py
index f7b6a5e7b..50473c974 100644
--- a/tensorlayer/iterate.py
+++ b/tensorlayer/iterate.py
@@ -1,8 +1,6 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
-
-
 import numpy as np
 from six.moves import xrange
 
@@ -55,6 +53,7 @@ def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
             excerpt = slice(start_idx, start_idx + batch_size)
         yield inputs[excerpt], targets[excerpt]
 
+
 def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
     """Generate a generator that return a batch of sequence inputs and targets.
     If ``batch_size = 100, seq_length = 5``, one return will have ``500`` rows (examples).
@@ -102,19 +101,18 @@ def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1):
     assert len(inputs) == len(targets)
     n_loads = (batch_size * stride) + (seq_length - stride)
     for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)):
-        seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:],
-                              dtype=inputs.dtype)
-        seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:],
-                               dtype=targets.dtype)
+        seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:], dtype=inputs.dtype)
+        seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:], dtype=targets.dtype)
         for b_idx in xrange(batch_size):
             start_seq_idx = start_idx + (b_idx * stride)
             end_seq_idx = start_seq_idx + seq_length
             seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx]
             seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx]
-        flatten_inputs = seq_inputs.reshape((-1,) + inputs.shape[1:])
-        flatten_targets = seq_targets.reshape((-1,) + targets.shape[1:])
+        flatten_inputs = seq_inputs.reshape((-1, ) + inputs.shape[1:])
+        flatten_targets = seq_targets.reshape((-1, ) + targets.shape[1:])
         yield flatten_inputs, flatten_targets
 
+
 def seq_minibatches2(inputs, targets, batch_size, num_steps):
     """Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and
     the target context by the given batch_size and num_steps (sequence_length),
@@ -184,8 +182,7 @@ def seq_minibatches2(inputs, targets, batch_size, num_steps):
     data_len = len(inputs)
     batch_len = data_len // batch_size
     # data = np.zeros([batch_size, batch_len])
-    data = np.zeros((batch_size, batch_len) + inputs.shape[1:],
-                          dtype=inputs.dtype)
+    data = np.zeros((batch_size, batch_len) + inputs.shape[1:], dtype=inputs.dtype)
     data2 = np.zeros([batch_size, batch_len])
 
     for i in range(batch_size):
@@ -198,8 +195,8 @@ def seq_minibatches2(inputs, targets, batch_size, num_steps):
         raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
 
     for i in range(epoch_size):
-        x = data[:, i*num_steps:(i+1)*num_steps]
-        x2 = data2[:, i*num_steps:(i+1)*num_steps]
+        x = data[:, i * num_steps:(i + 1) * num_steps]
+        x2 = data2[:, i * num_steps:(i + 1) * num_steps]
         yield (x, x2)
 
 
@@ -275,12 +272,11 @@ def ptb_iterator(raw_data, batch_size, num_steps):
         raise ValueError("epoch_size == 0, decrease batch_size or num_steps")
 
     for i in range(epoch_size):
-        x = data[:, i*num_steps:(i+1)*num_steps]
-        y = data[:, i*num_steps+1:(i+1)*num_steps+1]
+        x = data[:, i * num_steps:(i + 1) * num_steps]
+        y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1]
         yield (x, y)
 
 
-
 # def minibatches_for_sequence2D(inputs, targets, batch_size, sequence_length, stride=1):
 #     """
 #     Input a group of example in 2D numpy.array and their labels.
diff --git a/tensorlayer/layers.py b/tensorlayer/layers.py
index 0bf5e1c22..e80fd1fc7 100644
--- a/tensorlayer/layers.py
+++ b/tensorlayer/layers.py
@@ -20,7 +20,7 @@
 
 # set_keep = locals()
 set_keep = globals()
-set_keep['_layers_name_list'] =[]
+set_keep['_layers_name_list'] = []
 set_keep['name_reuse'] = False
 
 D_TYPE = tf.float32
@@ -30,6 +30,7 @@
 except:  # For TF11 and before
     TF_GRAPHKEYS_VARIABLES = tf.GraphKeys.VARIABLES
 
+
 ## Variable Operation
 def flatten_reshape(variable, name=''):
     """Reshapes high-dimension input to a vector.
@@ -61,6 +62,7 @@ def flatten_reshape(variable, name=''):
         dim *= d
     return tf.reshape(variable, shape=[-1, dim], name=name)
 
+
 def clear_layers_name():
     """Clear all layer names in set_keep['_layers_name_list'],
     enable layer name reuse.
@@ -75,7 +77,8 @@ def clear_layers_name():
     >>> network2 = tl.layers.DenseLayer(network2, n_units=800, name='relu1')
     ...
     """
-    set_keep['_layers_name_list'] =[]
+    set_keep['_layers_name_list'] = []
+
 
 def set_name_reuse(enable=True):
     """Enable or disable reuse layer name. By default, each layer must has unique
@@ -114,6 +117,7 @@ def set_name_reuse(enable=True):
     """
     set_keep['name_reuse'] = enable
 
+
 def initialize_rnn_state(state, feed_dict=None):
     """Returns the initialized RNN state.
     The inputs are LSTMStateTuple or State of RNNCells and an optional feed_dict.
@@ -124,7 +128,7 @@ def initialize_rnn_state(state, feed_dict=None):
     feed_dict : None or a dictionary for initializing the state values (optional).
         If None, returns the zero state.
     """
-    try: # TF1.0
+    try:  # TF1.0
         LSTMStateTuple = tf.contrib.rnn.LSTMStateTuple
     except:
         LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple
@@ -137,6 +141,7 @@ def initialize_rnn_state(state, feed_dict=None):
         new_state = state.eval(feed_dict=feed_dict)
         return new_state
 
+
 def print_all_variables(train_only=False):
     """Print all trainable and non-trainable variables
     without tl.layers.initialize_global_variables(sess)
@@ -151,14 +156,15 @@ def print_all_variables(train_only=False):
         t_vars = tf.trainable_variables()
         print("  [*] printing trainable variables")
     else:
-        try: # TF1.0+
+        try:  # TF1.0+
             t_vars = tf.global_variables()
-        except: # TF0.12
+        except:  # TF0.12
             t_vars = tf.all_variables()
         print("  [*] printing global variables")
     for idx, v in enumerate(t_vars):
         print("  var {:3}: {:15}   {}".format(idx, str(v.get_shape()), v.name))
 
+
 def get_variables_with_name(name, train_only=True, printable=False):
     """Get variable list by a given name scope.
 
@@ -171,9 +177,9 @@ def get_variables_with_name(name, train_only=True, printable=False):
     if train_only:
         t_vars = tf.trainable_variables()
     else:
-        try: # TF1.0+
+        try:  # TF1.0+
             t_vars = tf.global_variables()
-        except: # TF0.12
+        except:  # TF0.12
             t_vars = tf.all_variables()
 
     d_vars = [var for var in t_vars if name in var.name]
@@ -182,6 +188,7 @@ def get_variables_with_name(name, train_only=True, printable=False):
             print("  got {:3}: {:15}   {}".format(idx, v.name, str(v.get_shape())))
     return d_vars
 
+
 def get_layers_with_name(network=None, name="", printable=False):
     """Get layer list in a network by a given name scope.
 
@@ -203,6 +210,7 @@ def get_layers_with_name(network=None, name="", printable=False):
                 i = i + 1
     return layers
 
+
 def list_remove_repeat(l=None):
     """Remove the repeated items in a list, and return the processed list.
     You may need it to create merged layer like Concat, Elementwise and etc.
@@ -221,6 +229,7 @@ def list_remove_repeat(l=None):
     [l2.append(i) for i in l if not i in l2]
     return l2
 
+
 def merge_networks(layers=[]):
     """Merge all parameters, layers and dropout probabilities to a :class:`Layer`.
 
@@ -254,6 +263,7 @@ def merge_networks(layers=[]):
 
     return layer
 
+
 def initialize_global_variables(sess=None):
     """Excute ``sess.run(tf.global_variables_initializer())`` for TF 0.12+ or
     ``sess.run(tf.initialize_all_variables())`` for TF 0.11.
@@ -285,11 +295,8 @@ class Layer(object):
     name : a string or None
         An optional name to attach to this layer.
     """
-    def __init__(
-        self,
-        inputs = None,
-        name ='layer'
-    ):
+
+    def __init__(self, inputs=None, name='layer'):
         self.inputs = inputs
         scope_name = tf.get_variable_scope().name
         if scope_name:
@@ -309,7 +316,8 @@ def print_params(self, details=True, session=None):
                 try:
                     # print("  param {:3}: {:15} (mean: {:<18}, median: {:<18}, std: {:<18})   {}".format(i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name))
                     val = p.eval(session=session)
-                    print("  param {:3}: {:20} {:15}    {} (mean: {:<18}, median: {:<18}, std: {:<18})   ".format(i, p.name, str(val.shape), p.dtype.name, val.mean(), np.median(val), val.std()))
+                    print("  param {:3}: {:20} {:15}    {} (mean: {:<18}, median: {:<18}, std: {:<18})   ".format(
+                        i, p.name, str(val.shape), p.dtype.name, val.mean(), np.median(val), val.std()))
                 except Exception as e:
                     print(str(e))
                     raise Exception("Hint: print params details after tl.layers.initialize_global_variables(sess) or use network.print_params(False).")
@@ -345,6 +353,7 @@ def __str__(self):
         # self.print_layers()
         return "  Last layer is: %s" % self.__class__.__name__
 
+
 ## Input layer
 class InputLayer(Layer):
     """
@@ -357,11 +366,8 @@ class InputLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
-    def __init__(
-        self,
-        inputs = None,
-        name ='input_layer'
-    ):
+
+    def __init__(self, inputs=None, name='input_layer'):
         Layer.__init__(self, inputs=inputs, name=name)
         print("  [TL] InputLayer  %s: %s" % (self.name, inputs.get_shape()))
         self.outputs = inputs
@@ -369,6 +375,7 @@ def __init__(
         self.all_params = []
         self.all_drop = {}
 
+
 ## OneHot layer
 class OneHotInputLayer(Layer):
     """
@@ -388,16 +395,8 @@ class OneHotInputLayer(Layer):
     axis : default, None
     dtype : default, None
     """
-    def __init__(
-        self,
-        inputs = None,
-        depth = None,
-        on_value = None,
-        off_value = None,
-        axis = None,
-        dtype=None,
-        name ='input_layer'
-    ):
+
+    def __init__(self, inputs=None, depth=None, on_value=None, off_value=None, axis=None, dtype=None, name='input_layer'):
         Layer.__init__(self, inputs=inputs, name=name)
         assert depth != None, "depth is not given"
         print("  [TL]:Instantiate OneHotInputLayer  %s: %s" % (self.name, inputs.get_shape()))
@@ -406,6 +405,7 @@ def __init__(
         self.all_params = []
         self.all_drop = {}
 
+
 ## Word Embedding Input layer
 class Word2vecEmbeddingInputlayer(Layer):
     """
@@ -490,21 +490,22 @@ class Word2vecEmbeddingInputlayer(Layer):
     ----------
     - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py <https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/examples/tutorials/word2vec/word2vec_basic.py>`_
     """
+
     def __init__(
-        self,
-        inputs = None,
-        train_labels = None,
-        vocabulary_size = 80000,
-        embedding_size = 200,
-        num_sampled = 64,
-        nce_loss_args = {},
-        E_init = tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
-        E_init_args = {},
-        nce_W_init = tf.truncated_normal_initializer(stddev=0.03),
-        nce_W_init_args = {},
-        nce_b_init = tf.constant_initializer(value=0.0),
-        nce_b_init_args = {},
-        name ='word2vec_layer',
+            self,
+            inputs=None,
+            train_labels=None,
+            vocabulary_size=80000,
+            embedding_size=200,
+            num_sampled=64,
+            nce_loss_args={},
+            E_init=tf.random_uniform_initializer(minval=-1.0, maxval=1.0),
+            E_init_args={},
+            nce_W_init=tf.truncated_normal_initializer(stddev=0.03),
+            nce_W_init_args={},
+            nce_b_init=tf.constant_initializer(value=0.0),
+            nce_b_init_args={},
+            name='word2vec_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = inputs
@@ -517,31 +518,23 @@ def __init__(
         # embed is the outputs of the hidden layer (embedding layer), it is a
         # row vector with 'embedding_size' values.
         with tf.variable_scope(name) as vs:
-            embeddings = tf.get_variable(name='embeddings',
-                                    shape=(vocabulary_size, embedding_size),
-                                    initializer=E_init,
-                                    dtype=D_TYPE,
-                                    **E_init_args)
+            embeddings = tf.get_variable(name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=D_TYPE, **E_init_args)
             embed = tf.nn.embedding_lookup(embeddings, self.inputs)
             # Construct the variables for the NCE loss (i.e. negative sampling)
-            nce_weights = tf.get_variable(name='nce_weights',
-                                    shape=(vocabulary_size, embedding_size),
-                                    initializer=nce_W_init,
-                                    dtype=D_TYPE,
-                                    **nce_W_init_args)
-            nce_biases = tf.get_variable(name='nce_biases',
-                                    shape=(vocabulary_size),
-                                    initializer=nce_b_init,
-                                    dtype=D_TYPE,
-                                    **nce_b_init_args)
+            nce_weights = tf.get_variable(name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init, dtype=D_TYPE, **nce_W_init_args)
+            nce_biases = tf.get_variable(name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, dtype=D_TYPE, **nce_b_init_args)
 
         # Compute the average NCE loss for the batch.
         # tf.nce_loss automatically draws a new sample of the negative labels
         # each time we evaluate the loss.
         self.nce_cost = tf.reduce_mean(
-                tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
-                inputs=embed, labels=train_labels,
-                num_sampled=num_sampled, num_classes=vocabulary_size,
+            tf.nn.nce_loss(
+                weights=nce_weights,
+                biases=nce_biases,
+                inputs=embed,
+                labels=train_labels,
+                num_sampled=num_sampled,
+                num_classes=vocabulary_size,
                 **nce_loss_args))
 
         self.outputs = embed
@@ -551,6 +544,7 @@ def __init__(
         self.all_params = [embeddings, nce_weights, nce_biases]
         self.all_drop = {}
 
+
 class EmbeddingInputlayer(Layer):
     """
     The :class:`EmbeddingInputlayer` class is a fully connected layer,
@@ -629,25 +623,22 @@ class EmbeddingInputlayer(Layer):
     ... (4, 200)
 
     """
+
     def __init__(
-        self,
-        inputs = None,
-        vocabulary_size = 80000,
-        embedding_size = 200,
-        E_init = tf.random_uniform_initializer(-0.1, 0.1),
-        E_init_args = {},
-        name ='embedding_layer',
+            self,
+            inputs=None,
+            vocabulary_size=80000,
+            embedding_size=200,
+            E_init=tf.random_uniform_initializer(-0.1, 0.1),
+            E_init_args={},
+            name='embedding_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = inputs
         print("  [TL] EmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size))
 
         with tf.variable_scope(name) as vs:
-            embeddings = tf.get_variable(name='embeddings',
-                                    shape=(vocabulary_size, embedding_size),
-                                    initializer=E_init,
-                                    dtype=D_TYPE,
-                                    **E_init_args)
+            embeddings = tf.get_variable(name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=D_TYPE, **E_init_args)
             embed = tf.nn.embedding_lookup(embeddings, self.inputs)
 
         self.outputs = embed
@@ -675,8 +666,12 @@ class AverageEmbeddingInputlayer(Layer):
     - [1] Iyyer, M., Manjunatha, V., Boyd-Graber, J., & Daum’e III, H. (2015). Deep Unordered Composition Rivals Syntactic Methods for Text Classification. In Association for Computational Linguistics.
     - [2] Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016). `Bag of Tricks for Efficient Text Classification. <http://arxiv.org/abs/1607.01759>`_
     """
+
     def __init__(
-            self, inputs, vocabulary_size, embedding_size,
+            self,
+            inputs,
+            vocabulary_size,
+            embedding_size,
             pad_value=0,
             name='average_embedding_layer',
             embeddings_initializer=tf.random_uniform_initializer(-0.1, 0.1),
@@ -688,8 +683,7 @@ def __init__(
         #     embeddings_kwargs = {}
 
         if inputs.get_shape().ndims != 2:
-            raise ValueError(
-                'inputs must be of size batch_size * batch_sentence_length')
+            raise ValueError('inputs must be of size batch_size * batch_sentence_length')
 
         self.inputs = inputs
 
@@ -702,10 +696,11 @@ def __init__(
                 dtype=D_TYPE,
                 **(embeddings_kwargs or {})
                 # **embeddings_kwargs
-            ) # **(embeddings_kwargs or {}),
+            )  # **(embeddings_kwargs or {}),
 
             word_embeddings = tf.nn.embedding_lookup(
-                self.embeddings, self.inputs,
+                self.embeddings,
+                self.inputs,
                 name='word_embeddings',
             )
             # Zero out embeddings of pad value
@@ -730,14 +725,14 @@ def __init__(
             sentence_embeddings = tf.divide(
                 sum_word_embeddings,
                 sentence_lengths + 1e-8,  # Add epsilon to avoid dividing by 0
-                name='sentence_embeddings'
-            )
+                name='sentence_embeddings')
 
         self.outputs = sentence_embeddings
         self.all_layers = [self.outputs]
         self.all_params = [self.embeddings]
         self.all_drop = {}
 
+
 ## Dense layer
 class DenseLayer(Layer):
     """
@@ -784,16 +779,17 @@ class DenseLayer(Layer):
     If the input to this layer has more than two axes, it need to flatten the
     input by using :class:`FlattenLayer` in this case.
     """
+
     def __init__(
-        self,
-        layer = None,
-        n_units = 100,
-        act = tf.identity,
-        W_init = tf.truncated_normal_initializer(stddev=0.1),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='dense_layer',
+            self,
+            layer=None,
+            n_units=100,
+            act=tf.identity,
+            W_init=tf.truncated_normal_initializer(stddev=0.1),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={},
+            name='dense_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -804,12 +800,12 @@ def __init__(
         self.n_units = n_units
         print("  [TL] DenseLayer  %s: %d %s" % (self.name, self.n_units, act.__name__))
         with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args )
+            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args)
             if b_init is not None:
                 try:
-                    b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args )
-                except: # If initializer is a constant, do not specify shape.
-                    b = tf.get_variable(name='b', initializer=b_init, dtype=D_TYPE, **b_init_args )
+                    b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args)
+                except:  # If initializer is a constant, do not specify shape.
+                    b = tf.get_variable(name='b', initializer=b_init, dtype=D_TYPE, **b_init_args)
                 self.outputs = act(tf.matmul(self.inputs, W) + b)
             else:
                 self.outputs = act(tf.matmul(self.inputs, W))
@@ -819,11 +815,12 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         if b_init is not None:
-            self.all_params.extend( [W, b] )
+            self.all_params.extend([W, b])
         else:
-            self.all_params.extend( [W] )
+            self.all_params.extend([W])
+
 
 class ReconLayer(DenseLayer):
     """
@@ -869,13 +866,14 @@ class ReconLayer(DenseLayer):
     - For sigmoid layer, the implementation can be `UFLDL <http://deeplearning.stanford.edu/wiki/index.php/UFLDL_Tutorial>`_
     - For rectifying layer, the implementation can be `Glorot (2011). Deep Sparse Rectifier Neural Networks <http://doi.org/10.1.1.208.6449>`_
     """
+
     def __init__(
-        self,
-        layer = None,
-        x_recon = None,
-        name = 'recon_layer',
-        n_units = 784,
-        act = tf.nn.softplus,
+            self,
+            layer=None,
+            x_recon=None,
+            name='recon_layer',
+            n_units=784,
+            act=tf.nn.softplus,
     ):
         DenseLayer.__init__(self, layer=layer, n_units=n_units, act=act, name=name)
         print("     [TL] %s is a ReconLayer" % self.name)
@@ -897,35 +895,36 @@ def __init__(
         print("     learning_rate: %f" % learning_rate)
 
         # Mean-square-error i.e. quadratic-cost
-        mse = tf.reduce_sum(tf.squared_difference(y, x_recon),  1)
-        mse = tf.reduce_mean(mse)            # in theano: mse = ((y - x) ** 2 ).sum(axis=1).mean()
-            # mse = tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y, x_recon)),  1))
-            # mse = tf.reduce_mean(tf.squared_difference(y, x_recon)) # <haodong>: Error
-            # mse = tf.sqrt(tf.reduce_mean(tf.square(y - x_recon)))   # <haodong>: Error
+        mse = tf.reduce_sum(tf.squared_difference(y, x_recon), 1)
+        mse = tf.reduce_mean(mse)  # in theano: mse = ((y - x) ** 2 ).sum(axis=1).mean()
+        # mse = tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y, x_recon)),  1))
+        # mse = tf.reduce_mean(tf.squared_difference(y, x_recon)) # <haodong>: Error
+        # mse = tf.sqrt(tf.reduce_mean(tf.square(y - x_recon)))   # <haodong>: Error
         # Cross-entropy
-            # ce = cost.cross_entropy(y, x_recon)                                               # <haodong>: list , list , Error (only be used for softmax output)
-            # ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, x_recon))          # <haodong>: list , list , Error (only be used for softmax output)
-            # ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, x_recon))   # <haodong>: list , index , Error (only be used for softmax output)
+        # ce = cost.cross_entropy(y, x_recon)                                               # <haodong>: list , list , Error (only be used for softmax output)
+        # ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, x_recon))          # <haodong>: list , list , Error (only be used for softmax output)
+        # ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, x_recon))   # <haodong>: list , index , Error (only be used for softmax output)
         L2_w = tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[0]) \
                 + tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[2])           # faster than the code below
-            # L2_w = lambda_l2_w * tf.reduce_mean(tf.square(self.train_params[0])) + lambda_l2_w * tf.reduce_mean( tf.square(self.train_params[2]))
+        # L2_w = lambda_l2_w * tf.reduce_mean(tf.square(self.train_params[0])) + lambda_l2_w * tf.reduce_mean( tf.square(self.train_params[2]))
         # DropNeuro
-        P_o = cost.lo_regularizer(0.03)(self.train_params[0])   # + cost.lo_regularizer(0.5)(self.train_params[2])    # <haodong>: if add lo on decoder, no neuron will be broken
+        P_o = cost.lo_regularizer(0.03)(
+            self.train_params[0])  # + cost.lo_regularizer(0.5)(self.train_params[2])    # <haodong>: if add lo on decoder, no neuron will be broken
         P_i = cost.li_regularizer(0.03)(self.train_params[0])  # + cost.li_regularizer(0.001)(self.train_params[2])
 
         # L1 of activation outputs
         activation_out = self.all_layers[-2]
-        L1_a = 0.001 * tf.reduce_mean(activation_out)   # <haodong>:  theano: T.mean( self.a[i] )         # some neuron are broken, white and black
-            # L1_a = 0.001 * tf.reduce_mean( tf.reduce_sum(activation_out, 0) )         # <haodong>: some neuron are broken, white and black
-            # L1_a = 0.001 * 100 * tf.reduce_mean( tf.reduce_sum(activation_out, 1) )   # <haodong>: some neuron are broken, white and black
+        L1_a = 0.001 * tf.reduce_mean(activation_out)  # <haodong>:  theano: T.mean( self.a[i] )         # some neuron are broken, white and black
+        # L1_a = 0.001 * tf.reduce_mean( tf.reduce_sum(activation_out, 0) )         # <haodong>: some neuron are broken, white and black
+        # L1_a = 0.001 * 100 * tf.reduce_mean( tf.reduce_sum(activation_out, 1) )   # <haodong>: some neuron are broken, white and black
         # KL Divergence
         beta = 4
         rho = 0.15
-        p_hat = tf.reduce_mean(activation_out, 0)   # theano: p_hat = T.mean( self.a[i], axis=0 )
-        try: ## TF1.0
-            KLD = beta * tf.reduce_sum( rho * tf.log(tf.divide(rho, p_hat)) + (1- rho) * tf.log((1- rho)/ (tf.subtract(float(1), p_hat))) )
-        except: ## TF0.12
-            KLD = beta * tf.reduce_sum( rho * tf.log(tf.div(rho, p_hat)) + (1- rho) * tf.log((1- rho)/ (tf.sub(float(1), p_hat))) )
+        p_hat = tf.reduce_mean(activation_out, 0)  # theano: p_hat = T.mean( self.a[i], axis=0 )
+        try:  ## TF1.0
+            KLD = beta * tf.reduce_sum(rho * tf.log(tf.divide(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.subtract(float(1), p_hat))))
+        except:  ## TF0.12
+            KLD = beta * tf.reduce_sum(rho * tf.log(tf.div(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.sub(float(1), p_hat))))
             # KLD = beta * tf.reduce_sum( rho * tf.log(rho/ p_hat) + (1- rho) * tf.log((1- rho)/(1- p_hat)) )
             # theano: L1_a = l1_a[i] * T.sum( rho[i] * T.log(rho[i]/ p_hat) + (1- rho[i]) * T.log((1- rho[i])/(1- p_hat)) )
         # Total cost
@@ -953,12 +952,12 @@ def __init__(
         else:
             raise Exception("Don't support the given reconstruct activation function")
 
-        self.train_op = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999,
-                                        epsilon=1e-08, use_locking=False).minimize(self.cost, var_list=self.train_params)
-                # self.train_op = tf.train.GradientDescentOptimizer(1.0).minimize(self.cost, var_list=self.train_params)
+        self.train_op = tf.train.AdamOptimizer(
+            learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(
+                self.cost, var_list=self.train_params)
+        # self.train_op = tf.train.GradientDescentOptimizer(1.0).minimize(self.cost, var_list=self.train_params)
 
-    def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10,
-                  save=True, save_name='w1pre_'):
+    def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_'):
         # ====================================================
         #
         # You need to modify the cost function in __init__() so as to
@@ -976,7 +975,7 @@ def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batc
         for epoch in range(n_epoch):
             start_time = time.time()
             for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True):
-                dp_dict = utils.dict_to_one( self.all_drop )
+                dp_dict = utils.dict_to_one(self.all_drop)
                 if denoise_name:
                     dp_dict[set_keep[denoise_name]] = dp_denoise
                 feed_dict = {x: X_train_a}
@@ -987,28 +986,30 @@ def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batc
                 print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time))
                 train_loss, n_batch = 0, 0
                 for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True):
-                    dp_dict = utils.dict_to_one( self.all_drop )
+                    dp_dict = utils.dict_to_one(self.all_drop)
                     feed_dict = {x: X_train_a}
                     feed_dict.update(dp_dict)
                     err = sess.run(self.cost, feed_dict=feed_dict)
                     train_loss += err
                     n_batch += 1
-                print("   train loss: %f" % (train_loss/ n_batch))
+                print("   train loss: %f" % (train_loss / n_batch))
                 val_loss, n_batch = 0, 0
                 for X_val_a, _ in iterate.minibatches(X_val, X_val, batch_size, shuffle=True):
-                    dp_dict = utils.dict_to_one( self.all_drop )
+                    dp_dict = utils.dict_to_one(self.all_drop)
                     feed_dict = {x: X_val_a}
                     feed_dict.update(dp_dict)
                     err = sess.run(self.cost, feed_dict=feed_dict)
                     val_loss += err
                     n_batch += 1
-                print("   val loss: %f" % (val_loss/ n_batch))
+                print("   val loss: %f" % (val_loss / n_batch))
                 if save:
                     try:
-                        visualize.W(self.train_params[0].eval(), second=10, saveable=True, shape=[28,28], name=save_name+str(epoch+1), fig_idx=2012)
-                        files.save_npz([self.all_params[0]] , name=save_name+str(epoch+1)+'.npz')
+                        visualize.W(self.train_params[0].eval(), second=10, saveable=True, shape=[28, 28], name=save_name + str(epoch + 1), fig_idx=2012)
+                        files.save_npz([self.all_params[0]], name=save_name + str(epoch + 1) + '.npz')
                     except:
-                        raise Exception("You should change the visualize.W() in ReconLayer.pretrain(), if you want to save the feature images for different dataset")
+                        raise Exception(
+                            "You should change the visualize.W() in ReconLayer.pretrain(), if you want to save the feature images for different dataset")
+
 
 ## Noise layer
 class DropoutLayer(Layer):
@@ -1058,14 +1059,15 @@ class DropoutLayer(Layer):
     In many simple cases, user may find it is better to use one inference instead of two inferences for training and testing seperately, :class:`DropoutLayer`
     allows you to control the dropout rate via `feed_dict`. However, you can fix the keeping probability by setting `is_fix` to True.
     """
+
     def __init__(
-        self,
-        layer = None,
-        keep = 0.5,
-        is_fix = False,
-        is_train = True,
-        seed = None,
-        name = 'dropout_layer',
+            self,
+            layer=None,
+            keep=0.5,
+            is_fix=False,
+            is_train=True,
+            seed=None,
+            name='dropout_layer',
     ):
         Layer.__init__(self, name=name)
         if is_train is False:
@@ -1084,14 +1086,14 @@ def __init__(
                 self.outputs = tf.nn.dropout(self.inputs, keep, seed=seed, name=name)
             else:
                 set_keep[name] = tf.placeholder(tf.float32)
-                self.outputs = tf.nn.dropout(self.inputs, set_keep[name], seed=seed, name=name) # 1.2
+                self.outputs = tf.nn.dropout(self.inputs, set_keep[name], seed=seed, name=name)  # 1.2
 
             self.all_layers = list(layer.all_layers)
             self.all_params = list(layer.all_params)
             self.all_drop = dict(layer.all_drop)
             if is_fix is False:
-                self.all_drop.update( {set_keep[name]: keep} )
-            self.all_layers.extend( [self.outputs] )
+                self.all_drop.update({set_keep[name]: keep})
+            self.all_layers.extend([self.outputs])
 
         # print(set_keep[name])
         #   Tensor("Placeholder_2:0", dtype=float32)
@@ -1110,6 +1112,7 @@ def __init__(
         # value will be checked for compatibility with the placeholder.
         # If the key is a SparseTensor, the value should be a SparseTensorValue.
 
+
 class GaussianNoiseLayer(Layer):
     """
     The :class:`GaussianNoiseLayer` class is noise layer that adding noise with
@@ -1128,14 +1131,15 @@ class GaussianNoiseLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        mean = 0.0,
-        stddev = 1.0,
-        is_train = True,
-        seed = None,
-        name = 'gaussian_noise_layer',
+            self,
+            layer=None,
+            mean=0.0,
+            stddev=1.0,
+            is_train=True,
+            seed=None,
+            name='gaussian_noise_layer',
     ):
         Layer.__init__(self, name=name)
         if is_train is False:
@@ -1149,12 +1153,13 @@ def __init__(
             print("  [TL] GaussianNoiseLayer %s: mean:%f stddev:%f" % (self.name, mean, stddev))
             with tf.variable_scope(name) as vs:
                 # noise = np.random.normal(0.0 , sigma , tf.to_int64(self.inputs).get_shape())
-                noise = tf.random_normal(shape = self.inputs.get_shape(), mean=mean, stddev=stddev, seed=seed)
+                noise = tf.random_normal(shape=self.inputs.get_shape(), mean=mean, stddev=stddev, seed=seed)
                 self.outputs = self.inputs + noise
             self.all_layers = list(layer.all_layers)
             self.all_params = list(layer.all_params)
             self.all_drop = dict(layer.all_drop)
 
+
 class DropconnectDenseLayer(Layer):
     """
     The :class:`DropconnectDenseLayer` class is ``DenseLayer`` with DropConnect
@@ -1196,17 +1201,18 @@ class DropconnectDenseLayer(Layer):
     ----------
     - `Wan, L. (2013). Regularization of neural networks using dropconnect <http://machinelearning.wustl.edu/mlpapers/papers/icml2013_wan13>`_
     """
+
     def __init__(
-        self,
-        layer = None,
-        keep = 0.5,
-        n_units = 100,
-        act = tf.identity,
-        W_init = tf.truncated_normal_initializer(stddev=0.1),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='dropconnect_layer',
+            self,
+            layer=None,
+            keep=0.5,
+            n_units=100,
+            act=tf.identity,
+            W_init=tf.truncated_normal_initializer(stddev=0.1),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={},
+            name='dropconnect_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -1217,23 +1223,25 @@ def __init__(
         print("  [TL] DropconnectDenseLayer %s: %d %s" % (self.name, self.n_units, act.__name__))
 
         with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args )
-            b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args )
-            self.outputs = act(tf.matmul(self.inputs, W) + b)#, name=name)    # 1.2
+            W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args)
+            b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args)
+            self.outputs = act(tf.matmul(self.inputs, W) + b)  #, name=name)    # 1.2
 
         set_keep[name] = tf.placeholder(tf.float32)
-        W_dropcon = tf.nn.dropout(W,  set_keep[name])
+        W_dropcon = tf.nn.dropout(W, set_keep[name])
         self.outputs = act(tf.matmul(self.inputs, W_dropcon) + b)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_drop.update( {set_keep[name]: keep} )
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [W, b] )
+        self.all_drop.update({set_keep[name]: keep})
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend([W, b])
+
 
 ## Convolutional layer (Pro)
 
+
 class Conv1dLayer(Layer):
     """
     The :class:`Conv1dLayer` class is a 1D CNN layer, see `tf.nn.convolution <https://www.tensorflow.org/api_docs/python/tf/nn/convolution>`_.
@@ -1264,40 +1272,34 @@ class Conv1dLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [5, 1, 5],
-        stride = 1,
-        dilation_rate = 1,
-        padding='SAME',
-        use_cudnn_on_gpu=None,
-        data_format='NWC',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='cnn_layer',
+            self,
+            layer=None,
+            act=tf.identity,
+            shape=[5, 1, 5],
+            stride=1,
+            dilation_rate=1,
+            padding='SAME',
+            use_cudnn_on_gpu=None,
+            data_format='NWC',
+            W_init=tf.truncated_normal_initializer(stddev=0.02),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={},
+            name='cnn_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] Conv1dLayer %s: shape:%s stride:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(stride), padding, act.__name__))
+        print("  [TL] Conv1dLayer %s: shape:%s stride:%s pad:%s act:%s" % (self.name, str(shape), str(stride), padding, act.__name__))
         if act is None:
             act = tf.identity
         with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_conv1d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args )
+            W = tf.get_variable(name='W_conv1d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args)
             self.outputs = tf.nn.convolution(
-                self.inputs,
-                W,
-                strides=(stride,),
-                padding=padding,
-                dilation_rate=(dilation_rate,),
-                data_format=data_format
-            ) #1.2
+                self.inputs, W, strides=(stride, ), padding=padding, dilation_rate=(dilation_rate, ), data_format=data_format)  #1.2
             if b_init:
-                b = tf.get_variable(name='b_conv1d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args )
+                b = tf.get_variable(name='b_conv1d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args)
                 self.outputs = self.outputs + b
 
             self.outputs = act(self.outputs)
@@ -1305,11 +1307,12 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         if b_init:
-            self.all_params.extend( [W, b] )
+            self.all_params.extend([W, b])
         else:
-            self.all_params.extend( [W] )
+            self.all_params.extend([W])
+
 
 class Conv2dLayer(Layer):
     """
@@ -1374,42 +1377,44 @@ class Conv2dLayer(Layer):
     ...                       strides=[1, 1, 1, 1],
     ...                       padding='SAME') + b )
     """
+
     def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [5, 5, 1, 100],
-        strides=[1, 1, 1, 1],
-        padding='SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        use_cudnn_on_gpu = None,
-        data_format = None,
-        name ='cnn_layer',
+            self,
+            layer=None,
+            act=tf.identity,
+            shape=[5, 5, 1, 100],
+            strides=[1, 1, 1, 1],
+            padding='SAME',
+            W_init=tf.truncated_normal_initializer(stddev=0.02),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={},
+            use_cudnn_on_gpu=None,
+            data_format=None,
+            name='cnn_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] Conv2dLayer %s: shape:%s strides:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(strides), padding, act.__name__))
+        print("  [TL] Conv2dLayer %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__))
 
         with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args )
+            W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args)
             if b_init:
-                b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args )
-                self.outputs = act( tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b )
+                b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args)
+                self.outputs = act(
+                    tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b)
             else:
-                self.outputs = act( tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format))
+                self.outputs = act(tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format))
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         if b_init:
-            self.all_params.extend( [W, b] )
+            self.all_params.extend([W, b])
         else:
-            self.all_params.extend( [W] )
+            self.all_params.extend([W])
+
 
 class DeConv2dLayer(Layer):
     """
@@ -1481,41 +1486,43 @@ class DeConv2dLayer(Layer):
     ...         shape=[3,3,512,1024], strides=[1,2,2,1], output_shape=[batch_size,64,64,512],
     ...         padding='SAME', W_init=w_init, b_init=b_init, name='devcon1_1')
     """
+
     def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [3, 3, 128, 256],
-        output_shape = [1, 256, 256, 128],
-        strides = [1, 2, 2, 1],
-        padding = 'SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='decnn2d_layer',
+            self,
+            layer=None,
+            act=tf.identity,
+            shape=[3, 3, 128, 256],
+            output_shape=[1, 256, 256, 128],
+            strides=[1, 2, 2, 1],
+            padding='SAME',
+            W_init=tf.truncated_normal_initializer(stddev=0.02),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={},
+            name='decnn2d_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] DeConv2dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(output_shape), str(strides), padding, act.__name__))
+        print("  [TL] DeConv2dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(output_shape), str(strides), padding,
+                                                                                           act.__name__))
         # print("  DeConv2dLayer: Untested")
         with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_deconv2d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args )
+            W = tf.get_variable(name='W_deconv2d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args)
             if b_init:
-                b = tf.get_variable(name='b_deconv2d', shape=(shape[-2]), initializer=b_init, dtype=D_TYPE, **b_init_args )
-                self.outputs = act( tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
+                b = tf.get_variable(name='b_deconv2d', shape=(shape[-2]), initializer=b_init, dtype=D_TYPE, **b_init_args)
+                self.outputs = act(tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b)
             else:
-                self.outputs = act( tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding))
+                self.outputs = act(tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding))
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         if b_init:
-            self.all_params.extend( [W, b] )
+            self.all_params.extend([W, b])
         else:
-            self.all_params.extend( [W] )
+            self.all_params.extend([W])
+
 
 class Conv3dLayer(Layer):
     """
@@ -1544,18 +1551,19 @@ class Conv3dLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [2, 2, 2, 64, 128],
-        strides=[1, 2, 2, 2, 1],
-        padding='SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='cnn3d_layer',
+            self,
+            layer=None,
+            act=tf.identity,
+            shape=[2, 2, 2, 64, 128],
+            strides=[1, 2, 2, 2, 1],
+            padding='SAME',
+            W_init=tf.truncated_normal_initializer(stddev=0.02),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={},
+            name='cnn3d_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -1564,17 +1572,18 @@ def __init__(
         with tf.variable_scope(name) as vs:
             # W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv')
             # b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv')
-            W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args )
-            b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args )
-            self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
+            W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args)
+            b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args)
+            self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b)
 
         # self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b )
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [W, b] )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend([W, b])
+
 
 class DeConv3dLayer(Layer):
     """The :class:`DeConv3dLayer` class is deconvolutional 3D layer, see `tf.nn.conv3d_transpose <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#conv3d_transpose>`_.
@@ -1604,36 +1613,38 @@ class DeConv3dLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        act = tf.identity,
-        shape = [2, 2, 2, 128, 256],
-        output_shape = [1, 12, 32, 32, 128],
-        strides = [1, 2, 2, 2, 1],
-        padding = 'SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='decnn3d_layer',
+            self,
+            layer=None,
+            act=tf.identity,
+            shape=[2, 2, 2, 128, 256],
+            output_shape=[1, 12, 32, 32, 128],
+            strides=[1, 2, 2, 2, 1],
+            padding='SAME',
+            W_init=tf.truncated_normal_initializer(stddev=0.02),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={},
+            name='decnn3d_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] DeConv3dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(output_shape), str(strides), padding, act.__name__))
+        print("  [TL] DeConv3dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(output_shape), str(strides), padding,
+                                                                                           act.__name__))
 
         with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args )
-            b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=D_TYPE, **b_init_args )
+            W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args)
+            b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=D_TYPE, **b_init_args)
 
-            self.outputs = act( tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b )
+            self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [W, b] )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend([W, b])
+
 
 class UpSampling2dLayer(Layer):
     """The :class:`UpSampling2dLayer` class is upSampling 2d layer, see `tf.image.resize_images <https://www.tensorflow.org/versions/master/api_docs/python/image/resizing#resize_images>`_.
@@ -1653,14 +1664,15 @@ class UpSampling2dLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        size = [],
-        is_scale = True,
-        method = 0,
-        align_corners = False,
-        name ='upsample2d_layer',
+            self,
+            layer=None,
+            size=[],
+            is_scale=True,
+            method=0,
+            align_corners=False,
+            name='upsample2d_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -1676,18 +1688,18 @@ def __init__(
                 size = [int(size_h), int(size_w)]
         else:
             raise Exception("Donot support shape %s" % self.inputs.get_shape())
-        print("  [TL] UpSampling2dLayer %s: is_scale:%s size:%s method:%d align_corners:%s" %
-                                (name, is_scale, size, method, align_corners))
+        print("  [TL] UpSampling2dLayer %s: is_scale:%s size:%s method:%d align_corners:%s" % (name, is_scale, size, method, align_corners))
         with tf.variable_scope(name) as vs:
             try:
                 self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners)
-            except: # for TF 0.10
+            except:  # for TF 0.10
                 self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
+
 
 class DownSampling2dLayer(Layer):
     """The :class:`DownSampling2dLayer` class is downSampling 2d layer, see `tf.image.resize_images <https://www.tensorflow.org/versions/master/api_docs/python/image/resizing#resize_images>`_.
@@ -1707,14 +1719,15 @@ class DownSampling2dLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        size = [],
-        is_scale = True,
-        method = 0,
-        align_corners = False,
-        name ='downsample2d_layer',
+            self,
+            layer=None,
+            size=[],
+            is_scale=True,
+            method=0,
+            align_corners=False,
+            name='downsample2d_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -1730,18 +1743,17 @@ def __init__(
                 size = [int(size_h), int(size_w)]
         else:
             raise Exception("Donot support shape %s" % self.inputs.get_shape())
-        print("  [TL] DownSampling2dLayer %s: is_scale:%s size:%s method:%d, align_corners:%s" %
-                                (name, is_scale, size, method, align_corners))
+        print("  [TL] DownSampling2dLayer %s: is_scale:%s size:%s method:%d, align_corners:%s" % (name, is_scale, size, method, align_corners))
         with tf.variable_scope(name) as vs:
             try:
                 self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners)
-            except: # for TF 0.10
+            except:  # for TF 0.10
                 self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
 
 
 # ## 2D deformable convolutional layer
@@ -1751,13 +1763,14 @@ def _to_bc_h_w(x, x_shape):
     x = tf.reshape(x, (-1, x_shape[1], x_shape[2]))
     return x
 
+
 def _to_b_h_w_n_c(x, x_shape):
     """(b*c, h, w, n) -> (b, h, w, n, c)"""
-    x = tf.reshape(
-        x, (-1, x_shape[4], x_shape[1], x_shape[2], x_shape[3]))
+    x = tf.reshape(x, (-1, x_shape[4], x_shape[1], x_shape[2], x_shape[3]))
     x = tf.transpose(x, [0, 2, 3, 4, 1])
     return x
 
+
 def tf_repeat(a, repeats):
     """TensorFlow version of np.repeat for 1D"""
     # https://github.com/tensorflow/tensorflow/issues/8521
@@ -1768,6 +1781,7 @@ def tf_repeat(a, repeats):
     a = tf_flatten(a)
     return a
 
+
 def tf_batch_map_coordinates(inputs, coords):
     """Batch version of tf_map_coordinates
 
@@ -1811,6 +1825,7 @@ def tf_batch_map_coordinates(inputs, coords):
 
     return mapped_vals
 
+
 def tf_batch_map_offsets(inputs, offsets, grid_offset):
     """Batch map offsets into input
 
@@ -1827,7 +1842,7 @@ def tf_batch_map_offsets(inputs, offsets, grid_offset):
 
     input_shape = inputs.get_shape()
     batch_size = tf.shape(inputs)[0]
-    kernel_n = int(int(offsets.get_shape()[3])/2)
+    kernel_n = int(int(offsets.get_shape()[3]) / 2)
     input_h = input_shape[1]
     input_w = input_shape[2]
     channel = input_shape[3]
@@ -1844,8 +1859,12 @@ def tf_batch_map_offsets(inputs, offsets, grid_offset):
     coords = tf.tile(coords, [batch_size, 1, 1, 1, 1]) + offsets  # grid_offset --> (b, h, w, n, 2)
 
     # clip out of bound
-    coords = tf.stack([tf.clip_by_value(coords[:, :, :, :, 0], 0.0, tf.cast(input_h - 1, 'float32')),
-                       tf.clip_by_value(coords[:, :, :, :, 1], 0.0, tf.cast(input_w - 1, 'float32'))], axis=-1)
+    coords = tf.stack(
+        [
+            tf.clip_by_value(coords[:, :, :, :, 0], 0.0, tf.cast(input_h - 1, 'float32')),
+            tf.clip_by_value(coords[:, :, :, :, 1], 0.0, tf.cast(input_w - 1, 'float32'))
+        ],
+        axis=-1)
     coords = tf.tile(coords, [channel, 1, 1, 1, 1])
 
     mapped_vals = tf_batch_map_coordinates(inputs, coords)
@@ -1854,6 +1873,7 @@ def tf_batch_map_offsets(inputs, offsets, grid_offset):
 
     return mapped_vals
 
+
 class DeformableConv2dLayer(Layer):
     """The :class:`DeformableConv2dLayer` class is a
     `Deformable Convolutional Networks <https://arxiv.org/abs/1703.06211>`_ .
@@ -1898,18 +1918,17 @@ class DeformableConv2dLayer(Layer):
     - The padding is fixed as 'SAME'.
     - The current implementation is memory-inefficient, please use carefully.
     """
-    def __init__(
-            self,
-            layer=None,
-            act=tf.identity,
-            offset_layer=None,
-            shape=[3, 3, 1, 100],
-            name='deformable_conv_2d_layer',
-            W_init=tf.truncated_normal_initializer(stddev=0.02),
-            b_init=tf.constant_initializer(value=0.0),
-            W_init_args={},
-            b_init_args={}
-    ):
+
+    def __init__(self,
+                 layer=None,
+                 act=tf.identity,
+                 offset_layer=None,
+                 shape=[3, 3, 1, 100],
+                 name='deformable_conv_2d_layer',
+                 W_init=tf.truncated_normal_initializer(stddev=0.02),
+                 b_init=tf.constant_initializer(value=0.0),
+                 W_init_args={},
+                 b_init_args={}):
         if tf.__version__ < "1.4":
             raise Exception("Deformable CNN layer requires tensrflow 1.4 or higher version")
 
@@ -1917,8 +1936,7 @@ def __init__(
         self.inputs = layer.outputs
         self.offset_layer = offset_layer
 
-        print("  [TL] DeformableConv2dLayer %s: shape:%s, act:%s" %
-              (self.name, str(shape), act.__name__))
+        print("  [TL] DeformableConv2dLayer %s: shape:%s, act:%s" % (self.name, str(shape), act.__name__))
 
         with tf.variable_scope(name) as vs:
 
@@ -1929,17 +1947,16 @@ def __init__(
             input_h = int(self.inputs.get_shape()[1])
             input_w = int(self.inputs.get_shape()[2])
             kernel_n = shape[0] * shape[1]
-            initial_offsets = tf.stack(tf.meshgrid(tf.range(shape[0]),
-                                                   tf.range(shape[1]),
-                                                   indexing='ij'))  # initial_offsets --> (kh, kw, 2)
+            initial_offsets = tf.stack(tf.meshgrid(tf.range(shape[0]), tf.range(shape[1]), indexing='ij'))  # initial_offsets --> (kh, kw, 2)
             initial_offsets = tf.reshape(initial_offsets, (-1, 2))  # initial_offsets --> (n, 2)
             initial_offsets = tf.expand_dims(initial_offsets, 0)  # initial_offsets --> (1, n, 2)
             initial_offsets = tf.expand_dims(initial_offsets, 0)  # initial_offsets --> (1, 1, n, 2)
             initial_offsets = tf.tile(initial_offsets, [input_h, input_w, 1, 1])  # initial_offsets --> (h, w, n, 2)
             initial_offsets = tf.cast(initial_offsets, 'float32')
             grid = tf.meshgrid(
-                tf.range(- int((shape[0] - 1)/2.0), int(input_h - int((shape[0] - 1)/2.0)), 1),
-                tf.range(- int((shape[1] - 1)/2.0), int(input_w - int((shape[1] - 1)/2.0)), 1), indexing='ij')
+                tf.range(-int((shape[0] - 1) / 2.0), int(input_h - int((shape[0] - 1) / 2.0)), 1),
+                tf.range(-int((shape[1] - 1) / 2.0), int(input_w - int((shape[1] - 1) / 2.0)), 1),
+                indexing='ij')
 
             grid = tf.stack(grid, axis=-1)
             grid = tf.cast(grid, 'float32')  # grid --> (h, w, 2)
@@ -1949,13 +1966,11 @@ def __init__(
 
             input_deform = tf_batch_map_offsets(self.inputs, offset, grid_offset)
 
-            W = tf.get_variable(name='W_conv2d', shape=[1, 1, shape[0] * shape[1], shape[-2], shape[-1]],
-                              initializer=W_init, dtype=D_TYPE, **W_init_args)
-            b = tf.get_variable(name='b_conv2d', shape=(shape[-1]),
-                                initializer=b_init, dtype=D_TYPE, **b_init_args)
+            W = tf.get_variable(name='W_conv2d', shape=[1, 1, shape[0] * shape[1], shape[-2], shape[-1]], initializer=W_init, dtype=D_TYPE, **W_init_args)
+            b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args)
 
-            self.outputs = tf.reshape(act(
-                tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None) + b),
+            self.outputs = tf.reshape(
+                act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None) + b),
                 (tf.shape(self.inputs)[0], input_h, input_w, shape[-1]))
 
         ## fixed
@@ -1975,11 +1990,23 @@ def __init__(
         self.all_layers.extend([self.outputs])
         self.all_params.extend([W, b])
 
-def AtrousConv1dLayer(net, n_filter=32, filter_size=2, stride=1, dilation=1, act=None,
-        padding='SAME', use_cudnn_on_gpu=None,data_format='NWC',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {}, b_init_args = {},name ='conv1d',):
+
+def AtrousConv1dLayer(
+        net,
+        n_filter=32,
+        filter_size=2,
+        stride=1,
+        dilation=1,
+        act=None,
+        padding='SAME',
+        use_cudnn_on_gpu=None,
+        data_format='NWC',
+        W_init=tf.truncated_normal_initializer(stddev=0.02),
+        b_init=tf.constant_initializer(value=0.0),
+        W_init_args={},
+        b_init_args={},
+        name='conv1d',
+):
     """Wrapper for :class:`AtrousConv1dLayer`, if you don't understand how to use :class:`Conv1dLayer`, this function may be easier.
 
     Parameters
@@ -1994,22 +2021,24 @@ def AtrousConv1dLayer(net, n_filter=32, filter_size=2, stride=1, dilation=1, act
     """
     if act is None:
         act = tf.identity
-    net = Conv1dLayer(layer = net,
-            act = act,
-            shape = [filter_size, int(net.outputs.get_shape()[-1]), n_filter],
-            stride = stride,
-            padding = padding,
-            dilation_rate = dilation,
-            use_cudnn_on_gpu = use_cudnn_on_gpu,
-            data_format = data_format,
-            W_init = W_init,
-            b_init = b_init,
-            W_init_args = W_init_args,
-            b_init_args = b_init_args,
-            name = name,
-        )
+    net = Conv1dLayer(
+        layer=net,
+        act=act,
+        shape=[filter_size, int(net.outputs.get_shape()[-1]), n_filter],
+        stride=stride,
+        padding=padding,
+        dilation_rate=dilation,
+        use_cudnn_on_gpu=use_cudnn_on_gpu,
+        data_format=data_format,
+        W_init=W_init,
+        b_init=b_init,
+        W_init_args=W_init_args,
+        b_init_args=b_init_args,
+        name=name,
+    )
     return net
 
+
 class AtrousConv2dLayer(Layer):
     """The :class:`AtrousConv2dLayer` class is Atrous convolution (a.k.a. convolution with holes or dilated convolution) 2D layer, see `tf.nn.atrous_conv2d <https://www.tensorflow.org/versions/master/api_docs/python/nn.html#atrous_conv2d>`_.
 
@@ -2028,31 +2057,29 @@ class AtrousConv2dLayer(Layer):
     b_init_args : dictionary. The arguments for the biases tf.get_variable().
     name : a string or None, an optional name to attach to this layer.
     """
-    def __init__(
-        self,
-        layer = None,
-        n_filter = 32,
-        filter_size = (3,3),
-        rate = 2,
-        act = None,
-        padding = 'SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name = 'atrou2d'
-    ):
+
+    def __init__(self,
+                 layer=None,
+                 n_filter=32,
+                 filter_size=(3, 3),
+                 rate=2,
+                 act=None,
+                 padding='SAME',
+                 W_init=tf.truncated_normal_initializer(stddev=0.02),
+                 b_init=tf.constant_initializer(value=0.0),
+                 W_init_args={},
+                 b_init_args={},
+                 name='atrou2d'):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
         if act is None:
             act = tf.identity
-        print("  [TL] AtrousConv2dLayer %s: n_filter:%d filter_size:%s rate:%d pad:%s act:%s" %
-                            (self.name, n_filter, filter_size, rate, padding, act.__name__))
+        print("  [TL] AtrousConv2dLayer %s: n_filter:%d filter_size:%s rate:%d pad:%s act:%s" % (self.name, n_filter, filter_size, rate, padding, act.__name__))
         with tf.variable_scope(name) as vs:
             shape = [filter_size[0], filter_size[1], int(self.inputs.get_shape()[-1]), n_filter]
-            filters = tf.get_variable(name='filter', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args )
+            filters = tf.get_variable(name='filter', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args)
             if b_init:
-                b = tf.get_variable(name='b', shape=(n_filter), initializer=b_init, dtype=D_TYPE, **b_init_args )
+                b = tf.get_variable(name='b', shape=(n_filter), initializer=b_init, dtype=D_TYPE, **b_init_args)
                 self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding) + b)
             else:
                 self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding))
@@ -2060,13 +2087,14 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         if b_init:
-            self.all_params.extend( [filters, b] )
+            self.all_params.extend([filters, b])
         else:
-            self.all_params.extend( [filters] )
+            self.all_params.extend([filters])
+
 
-class SeparableConv2dLayer(Layer):# Untested
+class SeparableConv2dLayer(Layer):  # Untested
     """The :class:`SeparableConv2dLayer` class is 2-D convolution with separable filters, see `tf.layers.separable_conv2d <https://www.tensorflow.org/api_docs/python/tf/layers/separable_conv2d>`_.
 
     Parameters
@@ -2090,27 +2118,26 @@ class SeparableConv2dLayer(Layer):# Untested
     activity_regularizer : Regularizer function for the output.
     name : a string or None, an optional name to attach to this layer.
     """
-    def __init__(
-        self,
-        layer = None,
-        filters = None,
-        kernel_size=5,
-        strides=(1, 1),
-        padding='valid',
-        data_format='channels_last',
-        dilation_rate=(1, 1),
-        depth_multiplier=1,
-        act=None,
-        use_bias=True,
-        depthwise_initializer=None,
-        pointwise_initializer=None,
-        bias_initializer=tf.zeros_initializer,
-        depthwise_regularizer=None,
-        pointwise_regularizer=None,
-        bias_regularizer=None,
-        activity_regularizer=None,
-        name = 'atrou2d'
-    ):
+
+    def __init__(self,
+                 layer=None,
+                 filters=None,
+                 kernel_size=5,
+                 strides=(1, 1),
+                 padding='valid',
+                 data_format='channels_last',
+                 dilation_rate=(1, 1),
+                 depth_multiplier=1,
+                 act=None,
+                 use_bias=True,
+                 depthwise_initializer=None,
+                 pointwise_initializer=None,
+                 bias_initializer=tf.zeros_initializer,
+                 depthwise_regularizer=None,
+                 pointwise_regularizer=None,
+                 bias_regularizer=None,
+                 activity_regularizer=None,
+                 name='atrou2d'):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
         assert filters is not None
@@ -2121,24 +2148,37 @@ def __init__(
         bias_initializer = bias_initializer()
 
         print("  [TL] SeparableConv2dLayer %s: filters:%s kernel_size:%s strides:%s padding:%s dilation_rate:%s depth_multiplier:%s act:%s" %
-                            (self.name, str(filters), str(kernel_size), str(strides), padding, str(dilation_rate), str(depth_multiplier), act.__name__))
+              (self.name, str(filters), str(kernel_size), str(strides), padding, str(dilation_rate), str(depth_multiplier), act.__name__))
 
         with tf.variable_scope(name) as vs:
-            self.outputs = tf.layers.separable_conv2d(self.inputs, filters, kernel_size,
-                 strides=strides, padding=padding, data_format=data_format,
-                 dilation_rate=dilation_rate, depth_multiplier=depth_multiplier, activation=act,
-                 use_bias=use_bias, depthwise_initializer=depthwise_initializer, pointwise_initializer=pointwise_initializer,
-                 bias_initializer=bias_initializer, depthwise_regularizer=depthwise_regularizer,
-                 pointwise_regularizer=pointwise_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer,)
-                 #trainable=True, name=None, reuse=None)
+            self.outputs = tf.layers.separable_conv2d(
+                self.inputs,
+                filters,
+                kernel_size,
+                strides=strides,
+                padding=padding,
+                data_format=data_format,
+                dilation_rate=dilation_rate,
+                depth_multiplier=depth_multiplier,
+                activation=act,
+                use_bias=use_bias,
+                depthwise_initializer=depthwise_initializer,
+                pointwise_initializer=pointwise_initializer,
+                bias_initializer=bias_initializer,
+                depthwise_regularizer=depthwise_regularizer,
+                pointwise_regularizer=pointwise_regularizer,
+                bias_regularizer=bias_regularizer,
+                activity_regularizer=activity_regularizer,
+            )
+            #trainable=True, name=None, reuse=None)
 
             variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
 
 
 ## Initializers for Convuolutional Layers
@@ -2177,7 +2217,7 @@ def deconv2d_bilinear_upsampling_initializer(shape):
     """
     if shape[0] != shape[1]:
         raise Exception('deconv2d_bilinear_upsampling_initializer only supports symmetrical filter sizes')
-    if shape[3] < shape [2]:
+    if shape[3] < shape[2]:
         raise Exception('deconv2d_bilinear_upsampling_initializer behaviour is not defined for num_in_channels < num_out_channels ')
 
     filter_size = shape[0]
@@ -2200,15 +2240,27 @@ def deconv2d_bilinear_upsampling_initializer(shape):
         weights[:, :, i, i] = bilinear_kernel
 
     #assign numpy array to constant_initalizer and pass to get_variable
-    bilinear_weights_init = tf.constant_initializer(value=weights, dtype=D_TYPE) #dtype=tf.float32)
+    bilinear_weights_init = tf.constant_initializer(value=weights, dtype=D_TYPE)  #dtype=tf.float32)
     return bilinear_weights_init
 
+
 ## Convolutional layer (Simplified)
-def Conv1d(net, n_filter=32, filter_size=5, stride=1, dilation_rate=1, act=None,
-        padding='SAME', use_cudnn_on_gpu=None, data_format="NWC",
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {}, b_init_args = {}, name ='conv1d',):
+def Conv1d(
+        net,
+        n_filter=32,
+        filter_size=5,
+        stride=1,
+        dilation_rate=1,
+        act=None,
+        padding='SAME',
+        use_cudnn_on_gpu=None,
+        data_format="NWC",
+        W_init=tf.truncated_normal_initializer(stddev=0.02),
+        b_init=tf.constant_initializer(value=0.0),
+        W_init_args={},
+        b_init_args={},
+        name='conv1d',
+):
     """Wrapper for :class:`Conv1dLayer`, if you don't understand how to use :class:`Conv1dLayer`, this function may be easier.
 
     Parameters
@@ -2240,25 +2292,39 @@ def Conv1d(net, n_filter=32, filter_size=5, stride=1, dilation_rate=1, act=None,
     """
     if act is None:
         act = tf.identity
-    net = Conv1dLayer(layer = net,
-            act = act,
-            shape = [filter_size, int(net.outputs.get_shape()[-1]), n_filter],
-            stride = stride,
-            dilation_rate = dilation_rate,
-            padding = padding,
-            use_cudnn_on_gpu = use_cudnn_on_gpu,
-            data_format = data_format,
-            W_init = W_init,
-            b_init = b_init,
-            W_init_args = W_init_args,
-            b_init_args = b_init_args,
-            name = name,
-        )
+    net = Conv1dLayer(
+        layer=net,
+        act=act,
+        shape=[filter_size, int(net.outputs.get_shape()[-1]), n_filter],
+        stride=stride,
+        dilation_rate=dilation_rate,
+        padding=padding,
+        use_cudnn_on_gpu=use_cudnn_on_gpu,
+        data_format=data_format,
+        W_init=W_init,
+        b_init=b_init,
+        W_init_args=W_init_args,
+        b_init_args=b_init_args,
+        name=name,
+    )
     return net
 
-def Conv2d(net, n_filter=32, filter_size=(3, 3), strides=(1, 1), act = None,
-        padding='SAME', W_init = tf.truncated_normal_initializer(stddev=0.02), b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {}, b_init_args = {}, use_cudnn_on_gpu = None, data_format = None,name ='conv2d',):
+
+def Conv2d(
+        net,
+        n_filter=32,
+        filter_size=(3, 3),
+        strides=(1, 1),
+        act=None,
+        padding='SAME',
+        W_init=tf.truncated_normal_initializer(stddev=0.02),
+        b_init=tf.constant_initializer(value=0.0),
+        W_init_args={},
+        b_init_args={},
+        use_cudnn_on_gpu=None,
+        data_format=None,
+        name='conv2d',
+):
     """Wrapper for :class:`Conv2dLayer`, if you don't understand how to use :class:`Conv2dLayer`, this function may be easier.
 
     Parameters
@@ -2288,27 +2354,38 @@ def Conv2d(net, n_filter=32, filter_size=(3, 3), strides=(1, 1), act = None,
 
     try:
         pre_channel = int(net.outputs.get_shape()[-1])
-    except: # if pre_channel is ?, it happens when using Spatial Transformer Net
+    except:  # if pre_channel is ?, it happens when using Spatial Transformer Net
         pre_channel = 1
         print("[warnings] unknow input channels, set to 1")
-    net = Conv2dLayer(net,
-                       act = act,
-                       shape = [filter_size[0], filter_size[1], pre_channel, n_filter],  # 32 features for each 5x5 patch
-                       strides = [1, strides[0], strides[1], 1],
-                       padding = padding,
-                       W_init = W_init,
-                       W_init_args = W_init_args,
-                       b_init = b_init,
-                       b_init_args = b_init_args,
-                       use_cudnn_on_gpu = use_cudnn_on_gpu,
-                       data_format = data_format,
-                       name = name)
+    net = Conv2dLayer(
+        net,
+        act=act,
+        shape=[filter_size[0], filter_size[1], pre_channel, n_filter],  # 32 features for each 5x5 patch
+        strides=[1, strides[0], strides[1], 1],
+        padding=padding,
+        W_init=W_init,
+        W_init_args=W_init_args,
+        b_init=b_init,
+        b_init_args=b_init_args,
+        use_cudnn_on_gpu=use_cudnn_on_gpu,
+        data_format=data_format,
+        name=name)
     return net
 
-def DeConv2d(net, n_out_channel = 32, filter_size=(3, 3),
-        out_size = (30, 30), strides = (2, 2), padding = 'SAME', batch_size = None, act = None,
-        W_init = tf.truncated_normal_initializer(stddev=0.02), b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {}, b_init_args = {}, name ='decnn2d'):
+
+def DeConv2d(net,
+             n_out_channel=32,
+             filter_size=(3, 3),
+             out_size=(30, 30),
+             strides=(2, 2),
+             padding='SAME',
+             batch_size=None,
+             act=None,
+             W_init=tf.truncated_normal_initializer(stddev=0.02),
+             b_init=tf.constant_initializer(value=0.0),
+             W_init_args={},
+             b_init_args={},
+             name='decnn2d'):
     """Wrapper for :class:`DeConv2dLayer`, if you don't understand how to use :class:`DeConv2dLayer`, this function may be easier.
 
     Parameters
@@ -2326,27 +2403,29 @@ def DeConv2d(net, n_out_channel = 32, filter_size=(3, 3),
     if act is None:
         act = tf.identity
     if batch_size is None:
-    #     batch_size = tf.shape(net.outputs)[0]
+        #     batch_size = tf.shape(net.outputs)[0]
         fixed_batch_size = net.outputs.get_shape().with_rank_at_least(1)[0]
         if fixed_batch_size.value:
             batch_size = fixed_batch_size.value
         else:
             from tensorflow.python.ops import array_ops
             batch_size = array_ops.shape(net.outputs)[0]
-    net = DeConv2dLayer(layer = net,
-                    act = act,
-                    shape = [filter_size[0], filter_size[1], n_out_channel, int(net.outputs.get_shape()[-1])],
-                    output_shape = [batch_size, int(out_size[0]), int(out_size[1]), n_out_channel],
-                    strides = [1, strides[0], strides[1], 1],
-                    padding = padding,
-                    W_init = W_init,
-                    b_init = b_init,
-                    W_init_args = W_init_args,
-                    b_init_args = b_init_args,
-                    name = name)
+    net = DeConv2dLayer(
+        layer=net,
+        act=act,
+        shape=[filter_size[0], filter_size[1], n_out_channel, int(net.outputs.get_shape()[-1])],
+        output_shape=[batch_size, int(out_size[0]), int(out_size[1]), n_out_channel],
+        strides=[1, strides[0], strides[1], 1],
+        padding=padding,
+        W_init=W_init,
+        b_init=b_init,
+        W_init_args=W_init_args,
+        b_init_args=b_init_args,
+        name=name)
     return net
 
-def MaxPool1d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None): #Untested
+
+def MaxPool1d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None):  #Untested
     """Wrapper for `tf.layers.max_pooling1d <https://www.tensorflow.org/api_docs/python/tf/layers/max_pooling1d>`_ .
 
     Parameters
@@ -2362,16 +2441,16 @@ def MaxPool1d(net, filter_size, strides, padding='valid', data_format='channels_
     --------
     - A :class:`Layer` which the output tensor, of rank 3.
     """
-    print("  [TL] MaxPool1d %s: filter_size:%s strides:%s padding:%s" %
-                        (name, str(filter_size), str(strides), str(padding)))
+    print("  [TL] MaxPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
     outputs = tf.layers.max_pooling1d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
 
     net_new = copy.copy(net)
     net_new.outputs = outputs
-    net_new.all_layers.extend( [outputs] )
+    net_new.all_layers.extend([outputs])
     return net_new
 
-def MeanPool1d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None): #Untested
+
+def MeanPool1d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None):  #Untested
     """Wrapper for `tf.layers.average_pooling1d <https://www.tensorflow.org/api_docs/python/tf/layers/average_pooling1d>`_ .
 
     Parameters
@@ -2387,15 +2466,15 @@ def MeanPool1d(net, filter_size, strides, padding='valid', data_format='channels
     --------
     - A :class:`Layer` which the output tensor, of rank 3.
     """
-    print("  [TL] MeanPool1d %s: filter_size:%s strides:%s padding:%s" %
-                        (name, str(filter_size), str(strides), str(padding)))
+    print("  [TL] MeanPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
     outputs = tf.layers.average_pooling1d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
 
     net_new = copy.copy(net)
     net_new.outputs = outputs
-    net_new.all_layers.extend( [outputs] )
+    net_new.all_layers.extend([outputs])
     return net_new
 
+
 def MaxPool2d(net, filter_size=(2, 2), strides=None, padding='SAME', name='maxpool'):
     """Wrapper for :class:`PoolLayer`.
 
@@ -2409,13 +2488,10 @@ def MaxPool2d(net, filter_size=(2, 2), strides=None, padding='SAME', name='maxpo
     if strides is None:
         strides = filter_size
     assert len(strides) == 2, "len(strides) should be 2, MaxPool2d and PoolLayer are different."
-    net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1],
-            strides=[1, strides[0], strides[1], 1],
-            padding=padding,
-            pool = tf.nn.max_pool,
-            name = name)
+    net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1], strides=[1, strides[0], strides[1], 1], padding=padding, pool=tf.nn.max_pool, name=name)
     return net
 
+
 def MeanPool2d(net, filter_size=(2, 2), strides=None, padding='SAME', name='meanpool'):
     """Wrapper for :class:`PoolLayer`.
 
@@ -2429,14 +2505,11 @@ def MeanPool2d(net, filter_size=(2, 2), strides=None, padding='SAME', name='mean
     if strides is None:
         strides = filter_size
     assert len(strides) == 2, "len(strides) should be 2, MeanPool2d and PoolLayer are different."
-    net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1],
-            strides=[1, strides[0], strides[1], 1],
-            padding=padding,
-            pool = tf.nn.avg_pool,
-            name = name)
+    net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1], strides=[1, strides[0], strides[1], 1], padding=padding, pool=tf.nn.avg_pool, name=name)
     return net
 
-def MaxPool3d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None): #Untested
+
+def MaxPool3d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None):  #Untested
     """Wrapper for `tf.layers.max_pooling3d <https://www.tensorflow.org/api_docs/python/tf/layers/max_pooling3d>`_ .
 
     Parameters
@@ -2448,16 +2521,16 @@ def MaxPool3d(net, filter_size, strides, padding='valid', data_format='channels_
     data_format : A string. The ordering of the dimensions in the inputs. channels_last (default) and channels_first are supported. channels_last corresponds to inputs with shape (batch, depth, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, depth, height, width).
     name : A string, the name of the layer.
     """
-    print("  [TL] MaxPool3d %s: filter_size:%s strides:%s padding:%s" %
-                        (name, str(filter_size), str(strides), str(padding)))
+    print("  [TL] MaxPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
     outputs = tf.layers.max_pooling3d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
 
     net_new = copy.copy(net)
     net_new.outputs = outputs
-    net_new.all_layers.extend( [outputs] )
+    net_new.all_layers.extend([outputs])
     return net_new
 
-def MeanPool3d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None): #Untested
+
+def MeanPool3d(net, filter_size, strides, padding='valid', data_format='channels_last', name=None):  #Untested
     """Wrapper for `tf.layers.average_pooling3d <https://www.tensorflow.org/api_docs/python/tf/layers/average_pooling3d>`_
 
     Parameters
@@ -2469,15 +2542,15 @@ def MeanPool3d(net, filter_size, strides, padding='valid', data_format='channels
     data_format : A string. The ordering of the dimensions in the inputs. channels_last (default) and channels_first are supported. channels_last corresponds to inputs with shape (batch, depth, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, depth, height, width).
     name : A string, the name of the layer.
     """
-    print("  [TL] MeanPool3d %s: filter_size:%s strides:%s padding:%s" %
-                        (name, str(filter_size), str(strides), str(padding)))
+    print("  [TL] MeanPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding)))
     outputs = tf.layers.average_pooling3d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name)
 
     net_new = copy.copy(net)
     net_new.outputs = outputs
-    net_new.all_layers.extend( [outputs] )
+    net_new.all_layers.extend([outputs])
     return net_new
 
+
 class DepthwiseConv2d(Layer):
     """Separable/Depthwise Convolutional 2D, see `tf.nn.depthwise_conv2d <https://www.tensorflow.org/versions/master/api_docs/python/tf/nn/depthwise_conv2d>`_.
 
@@ -2519,20 +2592,21 @@ class DepthwiseConv2d(Layer):
     - tflearn's `grouped_conv_2d <https://github.com/tflearn/tflearn/blob/3e0c3298ff508394f3ef191bcd7d732eb8860b2e/tflearn/layers/conv.py>`_
     - keras's `separableconv2d <https://keras.io/layers/convolutional/#separableconv2d>`_
     """
+
     def __init__(
-        self,
-        layer = None,
-        # n_filter = 32,
-        channel_multiplier = 3,
-        shape = (3, 3),
-        strides = (1, 1),
-        act = None,
-        padding='SAME',
-        W_init = tf.truncated_normal_initializer(stddev=0.02),
-        b_init = tf.constant_initializer(value=0.0),
-        W_init_args = {},
-        b_init_args = {},
-        name ='depthwise_conv2d',
+            self,
+            layer=None,
+            # n_filter = 32,
+            channel_multiplier=3,
+            shape=(3, 3),
+            strides=(1, 1),
+            act=None,
+            padding='SAME',
+            W_init=tf.truncated_normal_initializer(stddev=0.02),
+            b_init=tf.constant_initializer(value=0.0),
+            W_init_args={},
+            b_init_args={},
+            name='depthwise_conv2d',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -2540,15 +2614,14 @@ def __init__(
         if act is None:
             act = tf.identity
 
-        print("  [TL] DepthwiseConv2d %s: shape:%s strides:%s pad:%s act:%s" %
-                            (self.name, str(shape), str(strides), padding, act.__name__))
+        print("  [TL] DepthwiseConv2d %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__))
 
         if act is None:
             act = tf.identity
 
         try:
             pre_channel = int(layer.outputs.get_shape()[-1])
-        except: # if pre_channel is ?, it happens when using Spatial Transformer Net
+        except:  # if pre_channel is ?, it happens when using Spatial Transformer Net
             pre_channel = 1
             print("[warnings] unknow input channels, set to 1")
 
@@ -2560,21 +2633,24 @@ def __init__(
         assert len(strides) == 4, "len(strides) should be 4."
 
         with tf.variable_scope(name) as vs:
-            W = tf.get_variable(name='W_sepconv2d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args ) # [filter_height, filter_width, in_channels, channel_multiplier]
+            W = tf.get_variable(
+                name='W_sepconv2d', shape=shape, initializer=W_init, dtype=D_TYPE,
+                **W_init_args)  # [filter_height, filter_width, in_channels, channel_multiplier]
             if b_init:
-                b = tf.get_variable(name='b_sepconv2d', shape=(pre_channel*channel_multiplier), initializer=b_init, dtype=D_TYPE, **b_init_args )
-                self.outputs = act( tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding) + b )
+                b = tf.get_variable(name='b_sepconv2d', shape=(pre_channel * channel_multiplier), initializer=b_init, dtype=D_TYPE, **b_init_args)
+                self.outputs = act(tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding) + b)
             else:
-                self.outputs = act( tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding) )
+                self.outputs = act(tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding))
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         if b_init:
-            self.all_params.extend( [W, b] )
+            self.all_params.extend([W, b])
         else:
-            self.all_params.extend( [W] )
+            self.all_params.extend([W])
+
 
 ## Super resolution
 def SubpixelConv2d(net, scale=2, n_out_channel=None, act=tf.identity, name='subpixel_conv2d'):
@@ -2635,7 +2711,7 @@ def SubpixelConv2d(net, scale=2, n_out_channel=None, act=tf.identity, name='subp
 
     def _PS(X, r, n_out_channel):
         if n_out_channel >= 1:
-            assert int(X.get_shape()[-1]) == (r ** 2) * n_out_channel, _err_log
+            assert int(X.get_shape()[-1]) == (r**2) * n_out_channel, _err_log
             '''
             bsize, a, b, c = X.get_shape().as_list()
             bsize = tf.shape(X)[0] # Handling Dimension(None) type for undefined batch dim
@@ -2643,7 +2719,7 @@ def _PS(X, r, n_out_channel):
             Xr=tf.concat(Xs,2) #b*h*(r*w)*r
             X=tf.reshape(Xr,(bsize,r*a,r*b,n_out_channel)) # b*(r*h)*(r*w)*c
             '''
-            X=tf.depth_to_space(X,r)
+            X = tf.depth_to_space(X, r)
         else:
             print(_err_log)
         return X
@@ -2651,8 +2727,8 @@ def _PS(X, r, n_out_channel):
     inputs = net.outputs
 
     if n_out_channel is None:
-        assert int(inputs.get_shape()[-1])/ (scale ** 2) % 1 == 0, _err_log
-        n_out_channel = int(int(inputs.get_shape()[-1])/ (scale ** 2))
+        assert int(inputs.get_shape()[-1]) / (scale**2) % 1 == 0, _err_log
+        n_out_channel = int(int(inputs.get_shape()[-1]) / (scale**2))
 
     print("  [TL] SubpixelConv2d  %s: scale: %d n_out_channel: %s act: %s" % (name, scale, n_out_channel, act.__name__))
 
@@ -2664,9 +2740,10 @@ def _PS(X, r, n_out_channel):
     net_new.all_layers = list(net.all_layers)
     net_new.all_params = list(net.all_params)
     net_new.all_drop = dict(net.all_drop)
-    net_new.all_layers.extend( [net_new.outputs] )
+    net_new.all_layers.extend([net_new.outputs])
     return net_new
 
+
 def SubpixelConv2d_old(net, scale=2, n_out_channel=None, act=tf.identity, name='subpixel_conv2d'):
     """It is a sub-pixel 2d upsampling layer, usually be used
     for Super-Resolution applications, `example code <https://github.com/zsdonghao/SRGAN/>`_.
@@ -2723,10 +2800,10 @@ def SubpixelConv2d_old(net, scale=2, n_out_channel=None, act=tf.identity, name='
 
     def _PS(X, r, n_out_channel):
         if n_out_channel > 1:
-            assert int(X.get_shape()[-1]) == (r ** 2) * n_out_channel, _err_log
-            X=tf.transpose(X,[0,2,1,3])
-            X=tf.depth_to_space(X,r)
-            X=tf.transpose(X,[0,2,1,3])
+            assert int(X.get_shape()[-1]) == (r**2) * n_out_channel, _err_log
+            X = tf.transpose(X, [0, 2, 1, 3])
+            X = tf.depth_to_space(X, r)
+            X = tf.transpose(X, [0, 2, 1, 3])
         else:
             print(_err_log)
         return X
@@ -2734,8 +2811,8 @@ def _PS(X, r, n_out_channel):
     inputs = net.outputs
 
     if n_out_channel is None:
-        assert int(inputs.get_shape()[-1])/ (scale ** 2) % 1 == 0, _err_log
-        n_out_channel = int(int(inputs.get_shape()[-1])/ (scale ** 2))
+        assert int(inputs.get_shape()[-1]) / (scale**2) % 1 == 0, _err_log
+        n_out_channel = int(int(inputs.get_shape()[-1]) / (scale**2))
 
     print("  [TL] SubpixelConv2d  %s: scale: %d n_out_channel: %s act: %s" % (name, scale, n_out_channel, act.__name__))
 
@@ -2747,7 +2824,7 @@ def _PS(X, r, n_out_channel):
     net_new.all_layers = list(net.all_layers)
     net_new.all_params = list(net.all_params)
     net_new.all_drop = dict(net.all_drop)
-    net_new.all_layers.extend( [net_new.outputs] )
+    net_new.all_layers.extend([net_new.outputs])
     return net_new
 
 
@@ -2776,10 +2853,11 @@ def SubpixelConv1d(net, scale=2, act=tf.identity, name='subpixel_conv1d'):
     -----------
     - `Audio Super Resolution Implementation <https://github.com/kuleshov/audio-super-res/blob/master/src/models/layers/subpixel.py>`_.
     """
+
     def _PS(I, r):
-        X = tf.transpose(I, [2,1,0]) # (r, w, b)
-        X = tf.batch_to_space_nd(X, [r], [[0,0]]) # (1, r*w, b)
-        X = tf.transpose(X, [2,1,0])
+        X = tf.transpose(I, [2, 1, 0])  # (r, w, b)
+        X = tf.batch_to_space_nd(X, [r], [[0, 0]])  # (1, r*w, b)
+        X = tf.transpose(X, [2, 1, 0])
         return X
 
     print("  [TL] SubpixelConv1d  %s: scale: %d act: %s" % (name, scale, act.__name__))
@@ -2792,9 +2870,10 @@ def _PS(I, r):
     net_new.all_layers = list(net.all_layers)
     net_new.all_params = list(net.all_params)
     net_new.all_drop = dict(net.all_drop)
-    net_new.all_layers.extend( [net_new.outputs] )
+    net_new.all_layers.extend([net_new.outputs])
     return net_new
 
+
 ## Spatial Transformer Nets
 def transformer(U, theta, out_size, name='SpatialTransformer2dAffine', **kwargs):
     """Spatial Transformer Layer for `2D Affine Transformation <https://en.wikipedia.org/wiki/Affine_transformation>`_
@@ -2827,8 +2906,9 @@ def transformer(U, theta, out_size, name='SpatialTransformer2dAffine', **kwargs)
 
     def _repeat(x, n_repeats):
         with tf.variable_scope('_repeat'):
-            rep = tf.transpose(
-                tf.expand_dims(tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0])
+            rep = tf.transpose(tf.expand_dims(tf.ones(shape=tf.stack([
+                n_repeats,
+            ])), 1), [1, 0])
             rep = tf.cast(rep, 'int32')
             x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
             return tf.reshape(x, [-1])
@@ -2852,8 +2932,8 @@ def _interpolate(im, x, y, out_size):
             max_x = tf.cast(tf.shape(im)[2] - 1, 'int32')
 
             # scale indices from [-1, 1] to [0, width/height]
-            x = (x + 1.0)*(width_f) / 2.0
-            y = (y + 1.0)*(height_f) / 2.0
+            x = (x + 1.0) * (width_f) / 2.0
+            y = (y + 1.0) * (height_f) / 2.0
 
             # do sampling
             x0 = tf.cast(tf.floor(x), 'int32')
@@ -2866,10 +2946,10 @@ def _interpolate(im, x, y, out_size):
             y0 = tf.clip_by_value(y0, zero, max_y)
             y1 = tf.clip_by_value(y1, zero, max_y)
             dim2 = width
-            dim1 = width*height
-            base = _repeat(tf.range(num_batch)*dim1, out_height*out_width)
-            base_y0 = base + y0*dim2
-            base_y1 = base + y1*dim2
+            dim1 = width * height
+            base = _repeat(tf.range(num_batch) * dim1, out_height * out_width)
+            base_y0 = base + y0 * dim2
+            base_y1 = base + y1 * dim2
             idx_a = base_y0 + x0
             idx_b = base_y1 + x0
             idx_c = base_y0 + x1
@@ -2889,11 +2969,11 @@ def _interpolate(im, x, y, out_size):
             x1_f = tf.cast(x1, 'float32')
             y0_f = tf.cast(y0, 'float32')
             y1_f = tf.cast(y1, 'float32')
-            wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1)
-            wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1)
-            wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1)
-            wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1)
-            output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])
+            wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1)
+            wb = tf.expand_dims(((x1_f - x) * (y - y0_f)), 1)
+            wc = tf.expand_dims(((x - x0_f) * (y1_f - y)), 1)
+            wd = tf.expand_dims(((x - x0_f) * (y - y0_f)), 1)
+            output = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
             return output
 
     def _meshgrid(height, width):
@@ -2903,10 +2983,8 @@ def _meshgrid(height, width):
             #                         np.linspace(-1, 1, height))
             #  ones = np.ones(np.prod(x_t.shape))
             #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
-            x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
-                            tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
-            y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
-                            tf.ones(shape=tf.stack([1, width])))
+            x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])), tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
+            y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), tf.ones(shape=tf.stack([1, width])))
 
             x_t_flat = tf.reshape(x_t, (1, -1))
             y_t_flat = tf.reshape(y_t, (1, -1))
@@ -2942,18 +3020,16 @@ def _transform(theta, input_dim, out_size):
             x_s_flat = tf.reshape(x_s, [-1])
             y_s_flat = tf.reshape(y_s, [-1])
 
-            input_transformed = _interpolate(
-                input_dim, x_s_flat, y_s_flat,
-                out_size)
+            input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, out_size)
 
-            output = tf.reshape(
-                input_transformed, tf.stack([num_batch, out_height, out_width, num_channels]))
+            output = tf.reshape(input_transformed, tf.stack([num_batch, out_height, out_width, num_channels]))
             return output
 
     with tf.variable_scope(name):
         output = _transform(theta, U, out_size)
         return output
 
+
 def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer2dAffine'):
     """Batch Spatial Transformer function for `2D Affine Transformation <https://en.wikipedia.org/wiki/Affine_transformation>`_.
 
@@ -2970,10 +3046,11 @@ def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer2dAffine
     """
     with tf.variable_scope(name):
         num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2])
-        indices = [[i]*num_transforms for i in xrange(num_batch)]
+        indices = [[i] * num_transforms for i in xrange(num_batch)]
         input_repeated = tf.gather(U, tf.reshape(indices, [-1]))
         return transformer(input_repeated, thetas, out_size)
 
+
 class SpatialTransformer2dAffineLayer(Layer):
     """The :class:`SpatialTransformer2dAffineLayer` class is a
     `Spatial Transformer Layer <https://arxiv.org/abs/1506.02025>`_ for
@@ -2992,23 +3069,23 @@ class SpatialTransformer2dAffineLayer(Layer):
     - `Spatial Transformer Networks <https://arxiv.org/abs/1506.02025>`_
     - `TensorFlow/Models <https://github.com/tensorflow/models/tree/master/transformer>`_
     """
+
     def __init__(
-        self,
-        layer = None,
-        theta_layer = None,
-        out_size = [40, 40],
-        name ='sapatial_trans_2d_affine',
+            self,
+            layer=None,
+            theta_layer=None,
+            out_size=[40, 40],
+            name='sapatial_trans_2d_affine',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
         self.theta_layer = theta_layer
-        print("  [TL] SpatialTransformer2dAffineLayer %s: in_size:%s out_size:%s" %
-                                (name, self.inputs.get_shape().as_list(), out_size))
+        print("  [TL] SpatialTransformer2dAffineLayer %s: in_size:%s out_size:%s" % (name, self.inputs.get_shape().as_list(), out_size))
 
         with tf.variable_scope(name) as vs:
             ## 1. make the localisation network to [batch, 6] via Flatten and Dense.
             if self.theta_layer.outputs.get_shape().ndims > 2:
-                 self.theta_layer.outputs = flatten_reshape(self.theta_layer.outputs, 'flatten')
+                self.theta_layer.outputs = flatten_reshape(self.theta_layer.outputs, 'flatten')
             ## 2. To initialize the network to the identity transform init.
             # 2.1 W
             n_in = int(self.theta_layer.outputs.get_shape()[-1])
@@ -3051,8 +3128,8 @@ def __init__(
         self.all_drop.update(theta_layer.all_drop)
 
         ## this layer
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
 
 
 # ## Normalization layer
@@ -3070,26 +3147,27 @@ class LocalResponseNormLayer(Layer):
     beta : An optional float. Defaults to 0.5. An exponent.
     name : A string or None, an optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        depth_radius = None,
-        bias = None,
-        alpha = None,
-        beta = None,
-        name ='lrn_layer',
+            self,
+            layer=None,
+            depth_radius=None,
+            bias=None,
+            alpha=None,
+            beta=None,
+            name='lrn_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] LocalResponseNormLayer %s: depth_radius: %d, bias: %f, alpha: %f, beta: %f" %
-                            (self.name, depth_radius, bias, alpha, beta))
+        print("  [TL] LocalResponseNormLayer %s: depth_radius: %d, bias: %f, alpha: %f, beta: %f" % (self.name, depth_radius, bias, alpha, beta))
         with tf.variable_scope(name) as vs:
             self.outputs = tf.nn.lrn(self.inputs, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
+
 
 class BatchNormLayer(Layer):
     """
@@ -3121,22 +3199,22 @@ class BatchNormLayer(Layer):
     - `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`_
     - `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`_
     """
+
     def __init__(
-        self,
-        layer = None,
-        decay = 0.9,
-        epsilon = 0.00001,
-        act = tf.identity,
-        is_train = False,
-        beta_init = tf.zeros_initializer,
-        gamma_init = tf.random_normal_initializer(mean=1.0, stddev=0.002), # tf.ones_initializer,
-        # dtype = tf.float32,
-        name ='batchnorm_layer',
+            self,
+            layer=None,
+            decay=0.9,
+            epsilon=0.00001,
+            act=tf.identity,
+            is_train=False,
+            beta_init=tf.zeros_initializer,
+            gamma_init=tf.random_normal_initializer(mean=1.0, stddev=0.002),  # tf.ones_initializer,
+            # dtype = tf.float32,
+            name='batchnorm_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] BatchNormLayer %s: decay:%f epsilon:%f act:%s is_train:%s" %
-                            (self.name, decay, epsilon, act.__name__, is_train))
+        print("  [TL] BatchNormLayer %s: decay:%f epsilon:%f act:%s is_train:%s" % (self.name, decay, epsilon, act.__name__, is_train))
         x_shape = self.inputs.get_shape()
         params_shape = x_shape[-1:]
 
@@ -3149,47 +3227,41 @@ def __init__(
             ## 1. beta, gamma
             if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer:
                 beta_init = beta_init()
-            beta = tf.get_variable('beta', shape=params_shape,
-                               initializer=beta_init,
-                               dtype=D_TYPE,
-                               trainable=is_train)#, restore=restore)
+            beta = tf.get_variable('beta', shape=params_shape, initializer=beta_init, dtype=D_TYPE, trainable=is_train)  #, restore=restore)
 
-            gamma = tf.get_variable('gamma', shape=params_shape,
-                                initializer=gamma_init,
-                                dtype=D_TYPE,
-                                trainable=is_train,
-                                )#restore=restore)
+            gamma = tf.get_variable(
+                'gamma',
+                shape=params_shape,
+                initializer=gamma_init,
+                dtype=D_TYPE,
+                trainable=is_train,
+            )  #restore=restore)
 
             ## 2.
             if tf.__version__ > '0.12.1':
                 moving_mean_init = tf.zeros_initializer()
             else:
                 moving_mean_init = tf.zeros_initializer
-            moving_mean = tf.get_variable('moving_mean',
-                                      params_shape,
-                                      initializer=moving_mean_init,
-                                      dtype=D_TYPE,
-                                      trainable=False)#   restore=restore)
-            moving_variance = tf.get_variable('moving_variance',
-                                          params_shape,
-                                          initializer=tf.constant_initializer(1.),
-                                          dtype=D_TYPE,
-                                          trainable=False,)#   restore=restore)
+            moving_mean = tf.get_variable('moving_mean', params_shape, initializer=moving_mean_init, dtype=D_TYPE, trainable=False)  #   restore=restore)
+            moving_variance = tf.get_variable(
+                'moving_variance',
+                params_shape,
+                initializer=tf.constant_initializer(1.),
+                dtype=D_TYPE,
+                trainable=False,
+            )  #   restore=restore)
 
             ## 3.
             # These ops will only be preformed when training.
             mean, variance = tf.nn.moments(self.inputs, axis)
-            try:    # TF12
-                update_moving_mean = moving_averages.assign_moving_average(
-                                moving_mean, mean, decay, zero_debias=False)     # if zero_debias=True, has bias
+            try:  # TF12
+                update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay, zero_debias=False)  # if zero_debias=True, has bias
                 update_moving_variance = moving_averages.assign_moving_average(
-                                moving_variance, variance, decay, zero_debias=False) # if zero_debias=True, has bias
+                    moving_variance, variance, decay, zero_debias=False)  # if zero_debias=True, has bias
                 # print("TF12 moving")
             except Exception as e:  # TF11
-                update_moving_mean = moving_averages.assign_moving_average(
-                                moving_mean, mean, decay)
-                update_moving_variance = moving_averages.assign_moving_average(
-                                moving_variance, variance, decay)
+                update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay)
+                update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, decay)
                 # print("TF11 moving")
 
             def mean_var_with_update():
@@ -3198,9 +3270,9 @@ def mean_var_with_update():
 
             if is_train:
                 mean, var = mean_var_with_update()
-                self.outputs = act( tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon) )
+                self.outputs = act(tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon))
             else:
-                self.outputs = act( tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon) )
+                self.outputs = act(tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon))
 
             variables = [beta, gamma, moving_mean, moving_variance]
 
@@ -3212,8 +3284,9 @@ def mean_var_with_update():
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
+
 
 # class BatchNormLayer_TF(Layer):   # Work well TF contrib https://github.com/tensorflow/tensorflow/blob/b826b79718e3e93148c3545e7aa3f90891744cc0/tensorflow/contrib/layers/python/layers/layers.py#L100
 #     """
@@ -3930,6 +4003,7 @@ def mean_var_with_update():
 #         self.all_layers.extend( [self.outputs] )
 #         self.all_params.extend( [beta, gamma] )
 
+
 class InstanceNormLayer(Layer):
     """The :class:`InstanceNormLayer` class is a for instance normalization.
 
@@ -3947,35 +4021,34 @@ class InstanceNormLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-    self,
-    layer = None,
-    act = tf.identity,
-    epsilon = 1e-5,
-    scale_init = tf.truncated_normal_initializer(mean=1.0, stddev=0.02),
-    offset_init = tf.constant_initializer(0.0),
-    name ='instan_norm',
+            self,
+            layer=None,
+            act=tf.identity,
+            epsilon=1e-5,
+            scale_init=tf.truncated_normal_initializer(mean=1.0, stddev=0.02),
+            offset_init=tf.constant_initializer(0.0),
+            name='instan_norm',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] InstanceNormLayer %s: epsilon:%f act:%s" %
-                            (self.name, epsilon, act.__name__))
+        print("  [TL] InstanceNormLayer %s: epsilon:%f act:%s" % (self.name, epsilon, act.__name__))
 
         with tf.variable_scope(name) as vs:
             mean, var = tf.nn.moments(self.inputs, [1, 2], keep_dims=True)
-            scale = tf.get_variable('scale',[self.inputs.get_shape()[-1]],
-                initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02), dtype=D_TYPE)
-            offset = tf.get_variable('offset',[self.inputs.get_shape()[-1]],
-                initializer=tf.constant_initializer(0.0), dtype=D_TYPE)
-            self.outputs = scale * tf.div(self.inputs-mean, tf.sqrt(var+epsilon)) + offset
+            scale = tf.get_variable('scale', [self.inputs.get_shape()[-1]], initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02), dtype=D_TYPE)
+            offset = tf.get_variable('offset', [self.inputs.get_shape()[-1]], initializer=tf.constant_initializer(0.0), dtype=D_TYPE)
+            self.outputs = scale * tf.div(self.inputs - mean, tf.sqrt(var + epsilon)) + offset
             self.outputs = act(self.outputs)
             variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
+
 
 class LayerNormLayer(Layer):
     """
@@ -3989,29 +4062,29 @@ class LayerNormLayer(Layer):
         The function that is applied to the layer activations.
     others : see  `tf.contrib.layers.layer_norm <https://www.tensorflow.org/api_docs/python/tf/contrib/layers/layer_norm>`_
     """
+
     def __init__(self,
-                layer=None,
-                center=True,
-                scale=True,
-                act=tf.identity,
-                reuse=None,
-                variables_collections=None,
-                outputs_collections=None,
-                trainable=True,
-                begin_norm_axis=1,
-                begin_params_axis=-1,
-                name='layernorm'
-                ):
+                 layer=None,
+                 center=True,
+                 scale=True,
+                 act=tf.identity,
+                 reuse=None,
+                 variables_collections=None,
+                 outputs_collections=None,
+                 trainable=True,
+                 begin_norm_axis=1,
+                 begin_params_axis=-1,
+                 name='layernorm'):
 
         if tf.__version__ < "1.3":
             raise Exception("Please use TF 1.3+")
 
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] LayerNormLayer %s: act:%s" %
-                            (self.name, act.__name__))
+        print("  [TL] LayerNormLayer %s: act:%s" % (self.name, act.__name__))
         with tf.variable_scope(name) as vs:
-            self.outputs = tf.contrib.layers.layer_norm(self.inputs,
+            self.outputs = tf.contrib.layers.layer_norm(
+                self.inputs,
                 center=center,
                 scale=scale,
                 activation_fn=act,
@@ -4022,14 +4095,15 @@ def __init__(self,
                 begin_norm_axis=begin_norm_axis,
                 begin_params_axis=begin_params_axis,
                 scope='var',
-                )
+            )
             variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
+
 
 ## Pooling layer
 class PoolLayer(Layer):
@@ -4061,26 +4135,27 @@ class PoolLayer(Layer):
     --------
     - see :class:`Conv2dLayer`.
     """
+
     def __init__(
-        self,
-        layer = None,
-        ksize=[1, 2, 2, 1],
-        strides=[1, 2, 2, 1],
-        padding='SAME',
-        pool = tf.nn.max_pool,
-        name ='pool_layer',
+            self,
+            layer=None,
+            ksize=[1, 2, 2, 1],
+            strides=[1, 2, 2, 1],
+            padding='SAME',
+            pool=tf.nn.max_pool,
+            name='pool_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] PoolLayer   %s: ksize:%s strides:%s padding:%s pool:%s" %
-                            (self.name, str(ksize), str(strides), padding, pool.__name__))
+        print("  [TL] PoolLayer   %s: ksize:%s strides:%s padding:%s pool:%s" % (self.name, str(ksize), str(strides), padding, pool.__name__))
 
         self.outputs = pool(self.inputs, ksize=ksize, strides=strides, padding=padding, name=name)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
+
 
 ## Padding layer
 class PadLayer(Layer):
@@ -4097,25 +4172,26 @@ class PadLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        paddings = None,
-        mode = 'CONSTANT',
-        name = 'pad_layer',
+            self,
+            layer=None,
+            paddings=None,
+            mode='CONSTANT',
+            name='pad_layer',
     ):
         Layer.__init__(self, name=name)
         assert paddings is not None, "paddings should be a Tensor of type int32. see https://www.tensorflow.org/api_docs/python/tf/pad"
         self.inputs = layer.outputs
-        print("  [TL] PadLayer   %s: paddings:%s mode:%s" %
-                            (self.name, list(paddings), mode))
+        print("  [TL] PadLayer   %s: paddings:%s mode:%s" % (self.name, list(paddings), mode))
 
         self.outputs = tf.pad(self.inputs, paddings=paddings, mode=mode, name=name)
 
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
+
 
 ## Object Detection
 class ROIPoolingLayer(Layer):
@@ -4135,18 +4211,19 @@ class ROIPoolingLayer(Layer):
     - This implementation is from `Deepsense-AI <https://github.com/deepsense-ai/roi-pooling>`_ .
     - Please install it by the instruction `HERE <https://github.com/zsdonghao/tensorlayer/blob/master/tensorlayer/third_party/roi_pooling/README.md>`_.
     """
+
     def __init__(
-        self,
-        #inputs = None,
-        layer = None,
-        rois = None,
-        pool_height = 2,
-        pool_width = 2,
-        name = 'roipooling_layer',
+            self,
+            #inputs = None,
+            layer=None,
+            rois=None,
+            pool_height=2,
+            pool_width=2,
+            name='roipooling_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print ("  [TL] ROIPoolingLayer %s: (%d, %d)" % (self.name, pool_height, pool_width))
+        print("  [TL] ROIPoolingLayer %s: (%d, %d)" % (self.name, pool_height, pool_width))
         try:
             from tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops import roi_pooling
         except Exception as e:
@@ -4157,7 +4234,7 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
 
 
 ## TimeDistributedLayer
@@ -4194,17 +4271,17 @@ class TimeDistributedLayer(Layer):
     ... param   1: (50,)              time_dense/dense/b:0
     ... num of params: 5050
     """
+
     def __init__(
-        self,
-        layer = None,
-        layer_class = None,
-        args = {},
-        name ='time_distributed',
+            self,
+            layer=None,
+            layer_class=None,
+            args={},
+            name='time_distributed',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
-        print("  [TL] TimeDistributedLayer %s: layer_class:%s args:%s" %
-                            (self.name, layer_class.__name__, args))
+        print("  [TL] TimeDistributedLayer %s: layer_class:%s args:%s" % (self.name, layer_class.__name__, args))
 
         if not args: args = dict()
         assert isinstance(args, dict), "'args' must be a dict."
@@ -4219,9 +4296,9 @@ def __init__(
 
         with ops.suppress_stdout():
             for i in range(0, timestep):
-                with tf.variable_scope(name, reuse=(set_keep['name_reuse'] if i==0 else True)) as vs:
-                    set_name_reuse((set_keep['name_reuse'] if i==0 else True))
-                    net = layer_class(InputLayer(x[i], name=args['name']+str(i)), **args)
+                with tf.variable_scope(name, reuse=(set_keep['name_reuse'] if i == 0 else True)) as vs:
+                    set_name_reuse((set_keep['name_reuse'] if i == 0 else True))
+                    net = layer_class(InputLayer(x[i], name=args['name'] + str(i)), **args)
                     # net = layer_class(InputLayer(x[i], name="input_"+args['name']), **args)
                     x[i] = net.outputs
                     variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
@@ -4231,9 +4308,8 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
-
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
 
 
 ## Recurrent layer
@@ -4381,19 +4457,20 @@ class RNNLayer(Layer):
     - `tensorflow/python/ops/rnn_cell.py <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn_cell.py>`_
     - see TensorFlow tutorial ``ptb_word_lm.py``, TensorLayer tutorials ``tutorial_ptb_lstm*.py`` and ``tutorial_generate_text.py``
     """
+
     def __init__(
-        self,
-        layer = None,
-        cell_fn = None,#tf.nn.rnn_cell.BasicRNNCell,
-        cell_init_args = {},
-        n_hidden = 100,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        n_steps = 5,
-        initial_state = None,
-        return_last = False,
-        # is_reshape = True,
-        return_seq_2d = False,
-        name = 'rnn_layer',
+            self,
+            layer=None,
+            cell_fn=None,  #tf.nn.rnn_cell.BasicRNNCell,
+            cell_init_args={},
+            n_hidden=100,
+            initializer=tf.random_uniform_initializer(-0.1, 0.1),
+            n_steps=5,
+            initial_state=None,
+            return_last=False,
+            # is_reshape = True,
+            return_seq_2d=False,
+            name='rnn_layer',
     ):
         Layer.__init__(self, name=name)
         if cell_fn is None:
@@ -4406,8 +4483,8 @@ def __init__(
 
         self.inputs = layer.outputs
 
-        print("  [TL] RNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s " % (self.name, n_hidden,
-            n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
+        print("  [TL] RNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s " % (self.name, n_hidden, n_steps, self.inputs.get_shape().ndims,
+                                                                                                self.inputs.get_shape(), cell_fn.__name__))
         # You can get the dimension by .get_shape() or ._shape, and check the
         # dimension by .with_rank() as follow.
         # self.inputs.get_shape().with_rank(2)
@@ -4419,7 +4496,6 @@ def __init__(
         except:
             raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]")
 
-
         # is_reshape : boolean (deprecate)
         #     Reshape the inputs to 3 dimension tensor.\n
         #     If input is［batch_size, n_steps, n_features], we do not need to reshape it.\n
@@ -4476,18 +4552,17 @@ def __init__(
             if return_seq_2d:
                 # PTB tutorial: stack dense layer after that, or compute the cost from the output
                 # 2D Tensor [n_example, n_hidden]
-                try: # TF1.0
+                try:  # TF1.0
                     self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden])
-                except: # TF0.12
+                except:  # TF0.12
                     self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden])
 
-
             else:
                 # <akara>: stack more RNN layer after that
                 # 3D Tensor [n_example/n_steps, n_steps, n_hidden]
-                try: # TF1.0
+                try:  # TF1.0
                     self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden])
-                except: # TF0.12
+                except:  # TF0.12
                     self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden])
 
         self.final_state = state
@@ -4496,8 +4571,9 @@ def __init__(
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
         # print(type(self.outputs))
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(rnn_variables)
+
 
 class BiRNNLayer(Layer):
     """
@@ -4568,21 +4644,23 @@ class BiRNNLayer(Layer):
     ----------
     - `Source <https://github.com/akaraspt/deepsleep/blob/master/deepsleep/model.py>`_
     """
+
     def __init__(
-        self,
-        layer = None,
-        cell_fn = None, #tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'use_peepholes':True, 'state_is_tuple':True},
-        n_hidden = 100,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        n_steps = 5,
-        fw_initial_state = None,
-        bw_initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        return_last = False,
-        return_seq_2d = False,
-        name = 'birnn_layer',
+            self,
+            layer=None,
+            cell_fn=None,  #tf.nn.rnn_cell.LSTMCell,
+            cell_init_args={'use_peepholes': True,
+                            'state_is_tuple': True},
+            n_hidden=100,
+            initializer=tf.random_uniform_initializer(-0.1, 0.1),
+            n_steps=5,
+            fw_initial_state=None,
+            bw_initial_state=None,
+            dropout=None,
+            n_layer=1,
+            return_last=False,
+            return_seq_2d=False,
+            name='birnn_layer',
     ):
         Layer.__init__(self, name=name)
         if cell_fn is None:
@@ -4595,8 +4673,10 @@ def __init__(
 
         self.inputs = layer.outputs
 
-        print("  [TL] BiRNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d " % (self.name, n_hidden,
-            n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
+        print("  [TL] BiRNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d " % (self.name, n_hidden, n_steps,
+                                                                                                                        self.inputs.get_shape().ndims,
+                                                                                                                        self.inputs.get_shape(),
+                                                                                                                        cell_fn.__name__, dropout, n_layer))
 
         fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
 
@@ -4624,15 +4704,12 @@ def __init__(
                 elif isinstance(dropout, float):
                     in_keep_prob, out_keep_prob = dropout, dropout
                 else:
-                    raise Exception("Invalid dropout type (must be a 2-D tuple of "
-                                    "float)")
-                try: # TF 1.0
+                    raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)")
+                try:  # TF 1.0
                     DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper
                 except:
                     DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper
-                cell_creator = lambda: DropoutWrapper_fn(rnn_creator(),
-                                                         input_keep_prob=in_keep_prob,
-                                                         output_keep_prob=1.0)  # out_keep_prob)
+                cell_creator = lambda: DropoutWrapper_fn(rnn_creator(), input_keep_prob=in_keep_prob, output_keep_prob=1.0)  # out_keep_prob)
             else:
                 cell_creator = rnn_creator
             self.fw_cell = cell_creator()
@@ -4640,7 +4717,7 @@ def __init__(
 
             # Apply multiple layers
             if n_layer > 1:
-                try: # TF1.0
+                try:  # TF1.0
                     MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell
                 except:
                     MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell
@@ -4654,31 +4731,30 @@ def __init__(
 
             # Initial state of RNN
             if fw_initial_state is None:
-                self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=D_TYPE) # dtype=tf.float32)
+                self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=D_TYPE)  # dtype=tf.float32)
             else:
                 self.fw_initial_state = fw_initial_state
             if bw_initial_state is None:
-                self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=D_TYPE) # dtype=tf.float32)
+                self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=D_TYPE)  # dtype=tf.float32)
             else:
                 self.bw_initial_state = bw_initial_state
             # exit()
             # Feedforward to MultiRNNCell
-            try: ## TF1.0
+            try:  ## TF1.0
                 list_rnn_inputs = tf.unstack(self.inputs, axis=1)
-            except: ## TF0.12
+            except:  ## TF0.12
                 list_rnn_inputs = tf.unpack(self.inputs, axis=1)
 
-            try: # TF1.0
+            try:  # TF1.0
                 bidirectional_rnn_fn = tf.contrib.rnn.static_bidirectional_rnn
             except:
                 bidirectional_rnn_fn = tf.nn.bidirectional_rnn
-            outputs, fw_state, bw_state = bidirectional_rnn_fn(               # outputs, fw_state, bw_state = tf.contrib.rnn.static_bidirectional_rnn(
+            outputs, fw_state, bw_state = bidirectional_rnn_fn(  # outputs, fw_state, bw_state = tf.contrib.rnn.static_bidirectional_rnn(
                 cell_fw=self.fw_cell,
                 cell_bw=self.bw_cell,
                 inputs=list_rnn_inputs,
                 initial_state_fw=self.fw_initial_state,
-                initial_state_bw=self.bw_initial_state
-            )
+                initial_state_bw=self.bw_initial_state)
 
             if return_last:
                 raise Exception("Do not support return_last at the moment.")
@@ -4687,18 +4763,18 @@ def __init__(
                 self.outputs = outputs
                 if return_seq_2d:
                     # 2D Tensor [n_example, n_hidden]
-                    try: # TF1.0
-                        self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden*2])
-                    except: # TF0.12
-                        self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden*2])
+                    try:  # TF1.0
+                        self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden * 2])
+                    except:  # TF0.12
+                        self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden * 2])
                 else:
                     # <akara>: stack more RNN layer after that
                     # 3D Tensor [n_example/n_steps, n_steps, n_hidden]
 
-                    try: # TF1.0
-                        self.outputs = tf.reshape(tf.concat(outputs,1), [-1, n_steps, n_hidden*2])
-                    except: # TF0.12
-                        self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden*2])
+                    try:  # TF1.0
+                        self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden * 2])
+                    except:  # TF0.12
+                        self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden * 2])
             self.fw_final_state = fw_state
             self.bw_final_state = bw_state
 
@@ -4710,8 +4786,8 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(rnn_variables)
 
 
 # ConvLSTM layer
@@ -4750,6 +4826,7 @@ def zero_state(self, batch_size, dtype):
         zeros = tf.zeros([batch_size, shape[0], shape[1], num_features * 2])
         return zeros
 
+
 class BasicConvLSTMCell(ConvRNNCell):
     """Basic Conv LSTM recurrent network cell.
 
@@ -4765,8 +4842,8 @@ class BasicConvLSTMCell(ConvRNNCell):
         along the column axis.  The latter behavior will soon be deprecated.
     activation : Activation function of the inner states.
     """
-    def __init__(self, shape, filter_size, num_features, forget_bias=1.0, input_size=None,
-                 state_is_tuple=False, activation=tf.nn.tanh):
+
+    def __init__(self, shape, filter_size, num_features, forget_bias=1.0, input_size=None, state_is_tuple=False, activation=tf.nn.tanh):
         """Initialize the basic Conv LSTM cell.
         """
         # if not state_is_tuple:
@@ -4784,8 +4861,7 @@ def __init__(self, shape, filter_size, num_features, forget_bias=1.0, input_size
     @property
     def state_size(self):
         """ State size of the LSTMStateTuple. """
-        return (LSTMStateTuple(self._num_units, self._num_units)
-                if self._state_is_tuple else 2 * self._num_units)
+        return (LSTMStateTuple(self._num_units, self._num_units) if self._state_is_tuple else 2 * self._num_units)
 
     @property
     def output_size(self):
@@ -4808,8 +4884,7 @@ def __call__(self, inputs, state, scope=None):
             # i, j, f, o = tf.split(3, 4, concat)
             i, j, f, o = tf.split(concat, 4, 3)
 
-            new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *
-                     self._activation(j))
+            new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j))
             new_h = self._activation(new_c) * tf.nn.sigmoid(o)
 
             if self._state_is_tuple:
@@ -4818,6 +4893,7 @@ def __call__(self, inputs, state, scope=None):
                 new_state = tf.concat([new_c, new_h], 3)
             return new_h, new_state
 
+
 def _conv_linear(args, filter_size, num_features, bias, bias_start=0.0, scope=None):
     """convolution:
 
@@ -4853,21 +4929,17 @@ def _conv_linear(args, filter_size, num_features, bias, bias_start=0.0, scope=No
 
     # Now the computation.
     with tf.variable_scope(scope or "Conv"):
-        matrix = tf.get_variable(
-            "Matrix", [filter_size[0], filter_size[1], total_arg_size_depth, num_features], dtype=dtype)
+        matrix = tf.get_variable("Matrix", [filter_size[0], filter_size[1], total_arg_size_depth, num_features], dtype=dtype)
         if len(args) == 1:
             res = tf.nn.conv2d(args[0], matrix, strides=[1, 1, 1, 1], padding='SAME')
         else:
             res = tf.nn.conv2d(tf.concat(args, 3), matrix, strides=[1, 1, 1, 1], padding='SAME')
         if not bias:
             return res
-        bias_term = tf.get_variable(
-            "Bias", [num_features],
-            dtype=dtype,
-            initializer=tf.constant_initializer(
-                bias_start, dtype=dtype))
+        bias_term = tf.get_variable("Bias", [num_features], dtype=dtype, initializer=tf.constant_initializer(bias_start, dtype=dtype))
     return res + bias_term
 
+
 class ConvLSTMLayer(Layer):
     """
     The :class:`ConvLSTMLayer` class is a Convolutional LSTM layer,
@@ -4921,6 +4993,7 @@ class ConvLSTMLayer(Layer):
     batch_size : int or tensor
         Is int, if able to compute the batch_size, otherwise, tensor for ``?``.
     """
+
     def __init__(
             self,
             layer=None,
@@ -4938,9 +5011,7 @@ def __init__(
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
         print("  [TL] ConvLSTMLayer %s: feature_map:%d, n_steps:%d, "
-              "in_dim:%d %s, cell_fn:%s " % (self.name, feature_map,
-                                             n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(),
-                                             cell_fn.__name__))
+              "in_dim:%d %s, cell_fn:%s " % (self.name, feature_map, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__))
         # You can get the dimension by .get_shape() or ._shape, and check the
         # dimension by .with_rank() as follow.
         # self.inputs.get_shape().with_rank(2)
@@ -4950,8 +5021,7 @@ def __init__(
         try:
             self.inputs.get_shape().with_rank(5)
         except:
-            raise Exception("RNN : Input dimension should be rank 5 : [batch_size, n_steps, input_x, "
-                            "input_y, feature_map]")
+            raise Exception("RNN : Input dimension should be rank 5 : [batch_size, n_steps, input_x, " "input_y, feature_map]")
 
         fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0]
 
@@ -4964,11 +5034,10 @@ def __init__(
             print("     non specified batch_size, uses a tensor instead.")
         self.batch_size = batch_size
 
-
         outputs = []
         self.cell = cell = cell_fn(shape=cell_shape, filter_size=filter_size, num_features=feature_map)
         if initial_state is None:
-            self.initial_state = cell.zero_state(batch_size, dtype=D_TYPE) # dtype=tf.float32)  # 1.2.3
+            self.initial_state = cell.zero_state(batch_size, dtype=D_TYPE)  # dtype=tf.float32)  # 1.2.3
         state = self.initial_state
         # with tf.variable_scope("model", reuse=None, initializer=initializer):
         with tf.variable_scope(name, initializer=initializer) as vs:
@@ -4994,8 +5063,7 @@ def __init__(
             else:
                 # <akara>: stack more RNN layer after that
                 # 5D Tensor [n_example/n_steps, n_steps, h, w, c]
-                self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, cell_shape[0],
-                                                                  cell_shape[1], feature_map])
+                self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, cell_shape[0], cell_shape[1], feature_map])
 
         self.final_state = state
 
@@ -5006,7 +5074,6 @@ def __init__(
         self.all_params.extend(rnn_variables)
 
 
-
 # Advanced Ops for Dynamic RNN
 def advanced_indexing_op(input, index):
     """Advanced Indexing for Sequences, returns the outputs by given sequence lengths.
@@ -5047,13 +5114,14 @@ def advanced_indexing_op(input, index):
     """
     batch_size = tf.shape(input)[0]
     # max_length = int(input.get_shape()[1])    # for fixed length rnn, length is given
-    max_length = tf.shape(input)[1]             # for dynamic_rnn, length is unknown
+    max_length = tf.shape(input)[1]  # for dynamic_rnn, length is unknown
     dim_size = int(input.get_shape()[2])
     index = tf.range(0, batch_size) * max_length + (index - 1)
     flat = tf.reshape(input, [-1, dim_size])
     relevant = tf.gather(flat, index)
     return relevant
 
+
 def retrieve_seq_length_op(data):
     """An op to compute the length of a sequence from input shape of [batch_size, n_step(max), n_features],
     it can be used when the features of padding (on right hand side) are all zeros.
@@ -5099,6 +5167,7 @@ def retrieve_seq_length_op(data):
         length = tf.cast(length, tf.int32)
     return length
 
+
 def retrieve_seq_length_op2(data):
     """An op to compute the length of a sequence, from input shape of [batch_size, n_step(max)],
     it can be used when the features of padding (on right hand side) are all zeros.
@@ -5121,7 +5190,8 @@ def retrieve_seq_length_op2(data):
     """
     return tf.reduce_sum(tf.cast(tf.greater(data, tf.zeros_like(data)), tf.int32), 1)
 
-def retrieve_seq_length_op3(data, pad_val=0): # HangSheng: return tensor for sequence length, if input is tf.string
+
+def retrieve_seq_length_op3(data, pad_val=0):  # HangSheng: return tensor for sequence length, if input is tf.string
     data_shape_size = data.get_shape().ndims
     if data_shape_size == 3:
         return tf.reduce_sum(tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32), 1)
@@ -5132,7 +5202,8 @@ def retrieve_seq_length_op3(data, pad_val=0): # HangSheng: return tensor for seq
     else:
         raise ValueError("retrieve_seq_length_op3: handling data_shape_size %s hasn't been implemented!" % (data_shape_size))
 
-def target_mask_op(data, pad_val=0):        # HangSheng: return tensor for mask,if input is tf.string
+
+def target_mask_op(data, pad_val=0):  # HangSheng: return tensor for mask,if input is tf.string
     data_shape_size = data.get_shape().ndims
     if data_shape_size == 3:
         return tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32)
@@ -5235,21 +5306,22 @@ class DynamicRNNLayer(Layer):
     - `tflearn rnn <https://github.com/tflearn/tflearn/blob/master/tflearn/layers/recurrent.py>`_
     - ``tutorial_dynamic_rnn.py``
     """
+
     def __init__(
-        self,
-        layer = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple' : True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        sequence_length = None,
-        initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        return_last = False,
-        return_seq_2d = False,
-        dynamic_rnn_init_args={},
-        name = 'dyrnn_layer',
+            self,
+            layer=None,
+            cell_fn=None,  #tf.nn.rnn_cell.LSTMCell,
+            cell_init_args={'state_is_tuple': True},
+            n_hidden=256,
+            initializer=tf.random_uniform_initializer(-0.1, 0.1),
+            sequence_length=None,
+            initial_state=None,
+            dropout=None,
+            n_layer=1,
+            return_last=False,
+            return_seq_2d=False,
+            dynamic_rnn_init_args={},
+            name='dyrnn_layer',
     ):
         Layer.__init__(self, name=name)
         if cell_fn is None:
@@ -5261,8 +5333,8 @@ def __init__(
                 pass
         self.inputs = layer.outputs
 
-        print("  [TL] DynamicRNNLayer %s: n_hidden:%d, in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden,
-             self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
+        print("  [TL] DynamicRNNLayer %s: n_hidden:%d, in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" %
+              (self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer))
 
         # Input dimension should be rank 3 [batch_size, n_steps(max), n_features]
         try:
@@ -5293,9 +5365,8 @@ def __init__(
             elif isinstance(dropout, float):
                 in_keep_prob, out_keep_prob = dropout, dropout
             else:
-                raise Exception("Invalid dropout type (must be a 2-D tuple of "
-                                "float)")
-            try: # TF1.0
+                raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)")
+            try:  # TF1.0
                 DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper
             except:
                 DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper
@@ -5305,8 +5376,7 @@ def __init__(
             #                     cell_instance_fn1(),
             #                     input_keep_prob=in_keep_prob,
             #                     output_keep_prob=out_keep_prob)
-            cell_creator = lambda: DropoutWrapper_fn(rnn_creator(),
-                      input_keep_prob=in_keep_prob, output_keep_prob=1.0)#out_keep_prob)
+            cell_creator = lambda: DropoutWrapper_fn(rnn_creator(), input_keep_prob=in_keep_prob, output_keep_prob=1.0)  #out_keep_prob)
         else:
             cell_creator = rnn_creator
         self.cell = cell_creator()
@@ -5321,42 +5391,38 @@ def __init__(
             try:
                 # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)], state_is_tuple=True) # HanSheng
                 self.cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True)
-            except: # when GRU
+            except:  # when GRU
                 # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)]) # HanSheng
                 self.cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)])
 
         if dropout:
-            self.cell = DropoutWrapper_fn(self.cell,
-                      input_keep_prob=1.0, output_keep_prob=out_keep_prob)
+            self.cell = DropoutWrapper_fn(self.cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob)
 
         # self.cell=cell_instance_fn() # HanSheng
 
         # Initialize initial_state
         if initial_state is None:
-            self.initial_state = self.cell.zero_state(batch_size, dtype=D_TYPE) # dtype=tf.float32)
+            self.initial_state = self.cell.zero_state(batch_size, dtype=D_TYPE)  # dtype=tf.float32)
         else:
             self.initial_state = initial_state
 
         # Computes sequence_length
         if sequence_length is None:
-            try: ## TF1.0
-                sequence_length = retrieve_seq_length_op(
-                            self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs))
-            except: ## TF0.12
-                sequence_length = retrieve_seq_length_op(
-                            self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
+            try:  ## TF1.0
+                sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs))
+            except:  ## TF0.12
+                sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
 
         # Main - Computes outputs and last_states
         with tf.variable_scope(name, initializer=initializer) as vs:
             outputs, last_states = tf.nn.dynamic_rnn(
                 cell=self.cell,
                 # inputs=X
-                inputs = self.inputs,
+                inputs=self.inputs,
                 # dtype=tf.float64,
                 sequence_length=sequence_length,
-                initial_state = self.initial_state,
-                **dynamic_rnn_init_args
-                )
+                initial_state=self.initial_state,
+                **dynamic_rnn_init_args)
             rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
 
             # print("     n_params : %d" % (len(rnn_variables)))
@@ -5372,9 +5438,9 @@ def __init__(
                 if return_seq_2d:
                     # PTB tutorial:
                     # 2D Tensor [n_example, n_hidden]
-                    try: # TF1.0
+                    try:  # TF1.0
                         self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden])
-                    except: # TF0.12
+                    except:  # TF0.12
                         self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden])
                 else:
                     # <akara>:
@@ -5382,10 +5448,9 @@ def __init__(
                     max_length = tf.shape(outputs)[1]
                     batch_size = tf.shape(outputs)[0]
 
-
-                    try: # TF1.0
+                    try:  # TF1.0
                         self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, n_hidden])
-                    except: # TF0.12
+                    except:  # TF0.12
                         self.outputs = tf.reshape(tf.concat(1, outputs), [batch_size, max_length, n_hidden])
                     # self.outputs = tf.reshape(tf.concat(1, outputs), [-1, max_length, n_hidden])
 
@@ -5398,8 +5463,9 @@ def __init__(
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
 
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(rnn_variables)
+
 
 # Bidirectional Dynamic RNN
 class BiDynamicRNNLayer(Layer):
@@ -5476,22 +5542,23 @@ class BiDynamicRNNLayer(Layer):
     - `Wild-ML Blog <http://www.wildml.com/2016/08/rnns-in-tensorflow-a-practical-guide-and-undocumented-features/>`_
     - `bidirectional_rnn.ipynb <https://github.com/dennybritz/tf-rnn/blob/master/bidirectional_rnn.ipynb>`_
     """
+
     def __init__(
-        self,
-        layer = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple':True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        sequence_length = None,
-        fw_initial_state = None,
-        bw_initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        return_last = False,
-        return_seq_2d = False,
-        dynamic_rnn_init_args={},
-        name = 'bi_dyrnn_layer',
+            self,
+            layer=None,
+            cell_fn=None,  #tf.nn.rnn_cell.LSTMCell,
+            cell_init_args={'state_is_tuple': True},
+            n_hidden=256,
+            initializer=tf.random_uniform_initializer(-0.1, 0.1),
+            sequence_length=None,
+            fw_initial_state=None,
+            bw_initial_state=None,
+            dropout=None,
+            n_layer=1,
+            return_last=False,
+            return_seq_2d=False,
+            dynamic_rnn_init_args={},
+            name='bi_dyrnn_layer',
     ):
         Layer.__init__(self, name=name)
         if cell_fn is None:
@@ -5536,8 +5603,7 @@ def __init__(
                 elif isinstance(dropout, float):
                     in_keep_prob, out_keep_prob = dropout, dropout
                 else:
-                    raise Exception("Invalid dropout type (must be a 2-D tuple of "
-                                    "float)")
+                    raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)")
                 try:
                     DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper
                 except:
@@ -5548,9 +5614,7 @@ def __init__(
                     #                     cell_instance_fn1(),
                     #                     input_keep_prob=in_keep_prob,
                     #                     output_keep_prob=out_keep_prob)
-                cell_creator = lambda: DropoutWrapper_fn(rnn_creator(),
-                                                         input_keep_prob=in_keep_prob,
-                                                         output_keep_prob=1.0)  # out_keep_prob)
+                cell_creator = lambda: DropoutWrapper_fn(rnn_creator(), input_keep_prob=in_keep_prob, output_keep_prob=1.0)  # out_keep_prob)
             else:
                 cell_creator = rnn_creator
             self.fw_cell = cell_creator()
@@ -5568,30 +5632,26 @@ def __init__(
                 self.bw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)])
 
             if dropout:
-                self.fw_cell = DropoutWrapper_fn(self.fw_cell,
-                          input_keep_prob=1.0, output_keep_prob=out_keep_prob)
-                self.bw_cell = DropoutWrapper_fn(self.bw_cell,
-                          input_keep_prob=1.0, output_keep_prob=out_keep_prob)
+                self.fw_cell = DropoutWrapper_fn(self.fw_cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob)
+                self.bw_cell = DropoutWrapper_fn(self.bw_cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob)
 
             # self.fw_cell=cell_instance_fn()
             # self.bw_cell=cell_instance_fn()
             # Initial state of RNN
             if fw_initial_state is None:
-                self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=D_TYPE) # dtype=tf.float32)
+                self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=D_TYPE)  # dtype=tf.float32)
             else:
                 self.fw_initial_state = fw_initial_state
             if bw_initial_state is None:
-                self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=D_TYPE) # dtype=tf.float32)
+                self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=D_TYPE)  # dtype=tf.float32)
             else:
                 self.bw_initial_state = bw_initial_state
             # Computes sequence_length
             if sequence_length is None:
-                try: ## TF1.0
-                    sequence_length = retrieve_seq_length_op(
-                        self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs))
-                except: ## TF0.12
-                    sequence_length = retrieve_seq_length_op(
-                        self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
+                try:  ## TF1.0
+                    sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs))
+                except:  ## TF0.12
+                    sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs))
 
             outputs, (states_fw, states_bw) = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=self.fw_cell,
@@ -5600,15 +5660,14 @@ def __init__(
                 sequence_length=sequence_length,
                 initial_state_fw=self.fw_initial_state,
                 initial_state_bw=self.bw_initial_state,
-                **dynamic_rnn_init_args
-            )
+                **dynamic_rnn_init_args)
             rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
 
             print("     n_params : %d" % (len(rnn_variables)))
             # Manage the outputs
-            try: # TF1.0
+            try:  # TF1.0
                 outputs = tf.concat(outputs, 2)
-            except: # TF0.12
+            except:  # TF0.12
                 outputs = tf.concat(2, outputs)
             if return_last:
                 # [batch_size, 2 * n_hidden]
@@ -5619,18 +5678,18 @@ def __init__(
                 if return_seq_2d:
                     # PTB tutorial:
                     # 2D Tensor [n_example, 2 * n_hidden]
-                    try: # TF1.0
+                    try:  # TF1.0
                         self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, 2 * n_hidden])
-                    except: # TF0.12
+                    except:  # TF0.12
                         self.outputs = tf.reshape(tf.concat(1, outputs), [-1, 2 * n_hidden])
                 else:
                     # <akara>:
                     # 3D Tensor [batch_size, n_steps(max), 2 * n_hidden]
                     max_length = tf.shape(outputs)[1]
                     batch_size = tf.shape(outputs)[0]
-                    try: # TF1.0
+                    try:  # TF1.0
                         self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, 2 * n_hidden])
-                    except: # TF0.12
+                    except:  # TF0.12
                         self.outputs = tf.reshape(tf.concat(1, outputs), [batch_size, max_length, 2 * n_hidden])
                     # self.outputs = tf.reshape(tf.concat(1, outputs), [-1, max_length, 2 * n_hidden])
 
@@ -5644,8 +5703,9 @@ def __init__(
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
 
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( rnn_variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(rnn_variables)
+
 
 # Seq2seq
 class Seq2Seq(Layer):
@@ -5752,23 +5812,24 @@ class Seq2Seq(Layer):
 
 
     """
+
     def __init__(
-        self,
-        net_encode_in = None,
-        net_decode_in = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple':True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        encode_sequence_length = None,
-        decode_sequence_length = None,
-        initial_state_encode = None,
-        initial_state_decode = None,
-        dropout = None,
-        n_layer = 1,
-        # return_last = False,
-        return_seq_2d = False,
-        name = 'seq2seq',
+            self,
+            net_encode_in=None,
+            net_decode_in=None,
+            cell_fn=None,  #tf.nn.rnn_cell.LSTMCell,
+            cell_init_args={'state_is_tuple': True},
+            n_hidden=256,
+            initializer=tf.random_uniform_initializer(-0.1, 0.1),
+            encode_sequence_length=None,
+            decode_sequence_length=None,
+            initial_state_encode=None,
+            initial_state_decode=None,
+            dropout=None,
+            n_layer=1,
+            # return_last = False,
+            return_seq_2d=False,
+            name='seq2seq',
     ):
         Layer.__init__(self, name=name)
         if cell_fn is None:
@@ -5779,36 +5840,37 @@ def __init__(
             except:
                 pass
         # self.inputs = layer.outputs
-        print("  [**] Seq2Seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" %
-              (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
+        print("  [**] Seq2Seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
 
-        with tf.variable_scope(name) as vs:#, reuse=reuse):
+        with tf.variable_scope(name) as vs:  #, reuse=reuse):
             # tl.layers.set_name_reuse(reuse)
             # network = InputLayer(self.inputs, name=name+'/input')
-            network_encode = DynamicRNNLayer(net_encode_in,
-                     cell_fn = cell_fn,
-                     cell_init_args = cell_init_args,
-                     n_hidden = n_hidden,
-                     initial_state = initial_state_encode,
-                     dropout = dropout,
-                     n_layer = n_layer,
-                     sequence_length = encode_sequence_length,
-                     return_last = False,
-                     return_seq_2d = True,
-                     name = name+'_encode')
+            network_encode = DynamicRNNLayer(
+                net_encode_in,
+                cell_fn=cell_fn,
+                cell_init_args=cell_init_args,
+                n_hidden=n_hidden,
+                initial_state=initial_state_encode,
+                dropout=dropout,
+                n_layer=n_layer,
+                sequence_length=encode_sequence_length,
+                return_last=False,
+                return_seq_2d=True,
+                name=name + '_encode')
             # vs.reuse_variables()
             # tl.layers.set_name_reuse(True)
-            network_decode = DynamicRNNLayer(net_decode_in,
-                     cell_fn = cell_fn,
-                     cell_init_args = cell_init_args,
-                     n_hidden = n_hidden,
-                     initial_state = (network_encode.final_state if initial_state_decode is None else initial_state_decode),
-                     dropout = dropout,
-                     n_layer = n_layer,
-                     sequence_length = decode_sequence_length,
-                     return_last = False,
-                     return_seq_2d = return_seq_2d,
-                     name = name+'_decode')
+            network_decode = DynamicRNNLayer(
+                net_decode_in,
+                cell_fn=cell_fn,
+                cell_init_args=cell_init_args,
+                n_hidden=n_hidden,
+                initial_state=(network_encode.final_state if initial_state_decode is None else initial_state_decode),
+                dropout=dropout,
+                n_layer=n_layer,
+                sequence_length=decode_sequence_length,
+                return_last=False,
+                return_seq_2d=return_seq_2d,
+                name=name + '_decode')
             self.outputs = network_decode.outputs
 
             # rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
@@ -5830,41 +5892,43 @@ def __init__(
         self.all_params.extend(list(network_decode.all_params))
         self.all_drop.update(dict(network_decode.all_drop))
 
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         # self.all_params.extend( rnn_variables )
 
         self.all_layers = list_remove_repeat(self.all_layers)
         self.all_params = list_remove_repeat(self.all_params)
 
+
 class PeekySeq2Seq(Layer):
     """
     Waiting for contribution.
     The :class:`PeekySeq2Seq` class, see `Model <https://camo.githubusercontent.com/7f690d451036938a51e62feb77149c8bb4be6675/687474703a2f2f6936342e74696e797069632e636f6d2f333032617168692e706e67>`_
     and `Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation <https://arxiv.org/abs/1406.1078>`_ .
     """
+
     def __init__(
-        self,
-        net_encode_in = None,
-        net_decode_in = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple':True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        in_sequence_length = None,
-        out_sequence_length = None,
-        initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        # return_last = False,
-        return_seq_2d = False,
-        name = 'peeky_seq2seq',
+            self,
+            net_encode_in=None,
+            net_decode_in=None,
+            cell_fn=None,  #tf.nn.rnn_cell.LSTMCell,
+            cell_init_args={'state_is_tuple': True},
+            n_hidden=256,
+            initializer=tf.random_uniform_initializer(-0.1, 0.1),
+            in_sequence_length=None,
+            out_sequence_length=None,
+            initial_state=None,
+            dropout=None,
+            n_layer=1,
+            # return_last = False,
+            return_seq_2d=False,
+            name='peeky_seq2seq',
     ):
         Layer.__init__(self, name=name)
         if cell_fn is None:
             raise Exception("Please put in cell_fn")
         # self.inputs = layer.outputs
-        print("  [TL] PeekySeq2seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" %
-              (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
+        print("  [TL] PeekySeq2seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
+
 
 class AttentionSeq2Seq(Layer):
     """
@@ -5872,29 +5936,30 @@ class AttentionSeq2Seq(Layer):
     The :class:`AttentionSeq2Seq` class, see `Model <https://camo.githubusercontent.com/0e2e4e5fb2dd47846c2fe027737a5df5e711df1b/687474703a2f2f6936342e74696e797069632e636f6d2f6132727733642e706e67>`_
     and `Neural Machine Translation by Jointly Learning to Align and Translate <https://arxiv.org/pdf/1409.0473v6.pdf>`_ .
     """
+
     def __init__(
-        self,
-        net_encode_in = None,
-        net_decode_in = None,
-        cell_fn = None,#tf.nn.rnn_cell.LSTMCell,
-        cell_init_args = {'state_is_tuple':True},
-        n_hidden = 256,
-        initializer = tf.random_uniform_initializer(-0.1, 0.1),
-        in_sequence_length = None,
-        out_sequence_length = None,
-        initial_state = None,
-        dropout = None,
-        n_layer = 1,
-        # return_last = False,
-        return_seq_2d = False,
-        name = 'attention_seq2seq',
+            self,
+            net_encode_in=None,
+            net_decode_in=None,
+            cell_fn=None,  #tf.nn.rnn_cell.LSTMCell,
+            cell_init_args={'state_is_tuple': True},
+            n_hidden=256,
+            initializer=tf.random_uniform_initializer(-0.1, 0.1),
+            in_sequence_length=None,
+            out_sequence_length=None,
+            initial_state=None,
+            dropout=None,
+            n_layer=1,
+            # return_last = False,
+            return_seq_2d=False,
+            name='attention_seq2seq',
     ):
         Layer.__init__(self, name=name)
         if cell_fn is None:
             raise Exception("Please put in cell_fn")
         # self.inputs = layer.outputs
-        print("  [TL] PeekySeq2seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" %
-              (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
+        print("  [TL] PeekySeq2seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden, cell_fn.__name__, dropout, n_layer))
+
 
 ## Shape layer
 class FlattenLayer(Layer):
@@ -5930,10 +5995,11 @@ class FlattenLayer(Layer):
     ...                    name ='pool_layer',)
     >>> net = tl.layers.FlattenLayer(net, name='flatten_layer')
     """
+
     def __init__(
-        self,
-        layer = None,
-        name ='flatten_layer',
+            self,
+            layer=None,
+            name='flatten_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -5943,7 +6009,8 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
+
 
 class ReshapeLayer(Layer):
     """
@@ -5973,11 +6040,12 @@ class ReshapeLayer(Layer):
     ... [ 5.  5.  5.]
     ... [ 6.  6.  6.]]]
     """
+
     def __init__(
-        self,
-        layer = None,
-        shape = [],
-        name ='reshape_layer',
+            self,
+            layer=None,
+            shape=[],
+            name='reshape_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -5986,7 +6054,8 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
+
 
 class TransposeLayer(Layer):
     """
@@ -6001,11 +6070,12 @@ class TransposeLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        perm = None,
-        name = 'transpose',
+            self,
+            layer=None,
+            perm=None,
+            name='transpose',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -6017,9 +6087,10 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         # self.all_params.extend( variables )
 
+
 ## Lambda
 class LambdaLayer(Layer):
     """
@@ -6046,12 +6117,13 @@ class LambdaLayer(Layer):
     >>> out = sess.run(y, feed_dict={x : [[1],[2]]})
     ... [[2],[4]]
     """
+
     def __init__(
-        self,
-        layer = None,
-        fn = None,
-        fn_args = {},
-        name = 'lambda_layer',
+            self,
+            layer=None,
+            fn=None,
+            fn_args={},
+            name='lambda_layer',
     ):
         Layer.__init__(self, name=name)
         assert layer is not None
@@ -6065,8 +6137,9 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
+
 
 ## Merge layer
 class ConcatLayer(Layer):
@@ -6105,19 +6178,20 @@ class ConcatLayer(Layer):
     ...     layer 0: ("Relu:0", shape=(?, 800), dtype=float32)
     ...     layer 1: Tensor("Relu_1:0", shape=(?, 300), dtype=float32)
     """
+
     def __init__(
-        self,
-        layer = [],
-        concat_dim = 1,
-        name ='concat_layer',
+            self,
+            layer=[],
+            concat_dim=1,
+            name='concat_layer',
     ):
         Layer.__init__(self, name=name)
         self.inputs = []
         for l in layer:
             self.inputs.append(l.outputs)
-        try: # TF1.0
+        try:  # TF1.0
             self.outputs = tf.concat(self.inputs, concat_dim, name=name)
-        except: # TF0.12
+        except:  # TF0.12
             self.outputs = tf.concat(concat_dim, self.inputs, name=name)
 
         print("  [TL] ConcatLayer %s: axis: %d" % (self.name, concat_dim))
@@ -6135,6 +6209,7 @@ def __init__(
         self.all_params = list_remove_repeat(self.all_params)
         #self.all_drop = list_remove_repeat(self.all_drop) # it is a dict
 
+
 class ElementwiseLayer(Layer):
     """
     The :class:`ElementwiseLayer` class combines multiple :class:`Layer` which have the same output shapes by a given elemwise-wise operation.
@@ -6160,11 +6235,12 @@ class ElementwiseLayer(Layer):
     ...                         combine_fn = tf.minimum,
     ...                         name = 'combine_layer')
     """
+
     def __init__(
-        self,
-        layer = [],
-        combine_fn = tf.minimum,
-        name ='elementwise_layer',
+            self,
+            layer=[],
+            combine_fn=tf.minimum,
+            name='elementwise_layer',
     ):
         Layer.__init__(self, name=name)
 
@@ -6173,7 +6249,8 @@ def __init__(
         self.outputs = layer[0].outputs
         # print(self.outputs._shape, type(self.outputs._shape))
         for l in layer[1:]:
-            assert str(self.outputs.get_shape()) == str(l.outputs.get_shape()), "Hint: the input shapes should be the same. %s != %s" %  (self.outputs.get_shape() , str(l.outputs.get_shape()))
+            assert str(self.outputs.get_shape()) == str(
+                l.outputs.get_shape()), "Hint: the input shapes should be the same. %s != %s" % (self.outputs.get_shape(), str(l.outputs.get_shape()))
             self.outputs = combine_fn(self.outputs, l.outputs, name=name)
 
         self.all_layers = list(layer[0].all_layers)
@@ -6189,6 +6266,7 @@ def __init__(
         self.all_params = list_remove_repeat(self.all_params)
         # self.all_drop = list_remove_repeat(self.all_drop)
 
+
 ## Extend
 class ExpandDimsLayer(Layer):
     """
@@ -6204,27 +6282,29 @@ class ExpandDimsLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        axis = None,
-        name = 'expand_dims',
+            self,
+            layer=None,
+            axis=None,
+            name='expand_dims',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
 
         print("  [TL] ExpandDimsLayer  %s: axis:%d" % (self.name, axis))
         with tf.variable_scope(name) as vs:
-            try:    # TF12 TF1.0
+            try:  # TF12 TF1.0
                 self.outputs = tf.expand_dims(self.inputs, axis=axis)
-            except: # TF11
+            except:  # TF11
                 self.outputs = tf.expand_dims(self.inputs, dim=axis)
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         # self.all_params.extend( variables )
 
+
 class TileLayer(Layer):
     """
     The :class:`TileLayer` class constructs a tensor by tiling a given tensor,
@@ -6239,11 +6319,12 @@ class TileLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        multiples = None,
-        name = 'tile',
+            self,
+            layer=None,
+            multiples=None,
+            name='tile',
     ):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
@@ -6254,9 +6335,10 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
+        self.all_layers.extend([self.outputs])
         # self.all_params.extend( variables )
 
+
 ## Stack Unstack
 class StackLayer(Layer):
     """
@@ -6271,11 +6353,12 @@ class StackLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = [],
-        axis = 0,
-        name ='stack',
+            self,
+            layer=[],
+            axis=0,
+            name='stack',
     ):
         Layer.__init__(self, name=name)
         self.inputs = []
@@ -6298,11 +6381,13 @@ def __init__(
         self.all_layers = list_remove_repeat(self.all_layers)
         self.all_params = list_remove_repeat(self.all_params)
 
+
 def UnStackLayer(
-        layer = None,
-        num = None,
-        axis = 0,
-        name ='unstack',):
+        layer=None,
+        num=None,
+        axis=0,
+        name='unstack',
+):
     """
     The :class:`UnStackLayer` is layer for unstacking the given dimension of a rank-R tensor into rank-(R-1) tensors., see `tf.unstack() <https://www.tensorflow.org/api_docs/python/tf/unstack>`_.
 
@@ -6335,17 +6420,18 @@ def UnStackLayer(
         whole_name = name
 
     for i in range(len(outputs)):
-        n = Layer(None, name=whole_name+str(i))
+        n = Layer(None, name=whole_name + str(i))
         n.outputs = outputs[i]
         n.all_layers = list(layer.all_layers)
         n.all_params = list(layer.all_params)
         n.all_drop = dict(layer.all_drop)
-        n.all_layers.extend( [inputs] )
+        n.all_layers.extend([inputs])
 
         net_new.append(n)
 
     return net_new
 
+
 ## TF-Slim layer
 class SlimNetsLayer(Layer):
     """
@@ -6370,12 +6456,13 @@ class SlimNetsLayer(Layer):
     The due to TF-Slim stores the layers as dictionary, the ``all_layers`` in this
     network is not in order ! Fortunately, the ``all_params`` are in order.
     """
+
     def __init__(
-        self,
-        layer = None,
-        slim_layer = None,
-        slim_args = {},
-        name ='tfslim_layer',
+            self,
+            layer=None,
+            slim_layer=None,
+            slim_args={},
+            name='tfslim_layer',
     ):
         Layer.__init__(self, name=name)
         assert slim_layer is not None
@@ -6391,8 +6478,9 @@ def __init__(
 
         slim_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=name)
         if slim_variables == []:
-            print("No variables found under %s : the name of SlimNetsLayer should be matched with the begining of the ckpt file, see tutorial_inceptionV3_tfslim.py for more details" % name)
-
+            print(
+                "No variables found under %s : the name of SlimNetsLayer should be matched with the begining of the ckpt file, see tutorial_inceptionV3_tfslim.py for more details"
+                % name)
 
         self.outputs = net
 
@@ -6405,8 +6493,9 @@ def __init__(
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
 
-        self.all_layers.extend( slim_layers )
-        self.all_params.extend( slim_variables )
+        self.all_layers.extend(slim_layers)
+        self.all_params.extend(slim_variables)
+
 
 ## Keras layer
 class KerasLayer(Layer):
@@ -6425,12 +6514,13 @@ class KerasLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        keras_layer = None,
-        keras_args = {},
-        name ='keras_layer',
+            self,
+            layer=None,
+            keras_layer=None,
+            keras_args={},
+            name='keras_layer',
     ):
         Layer.__init__(self, name=name)
         assert layer is not None
@@ -6444,8 +6534,9 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
+
 
 ## Estimator layer
 class EstimatorLayer(Layer):
@@ -6464,12 +6555,13 @@ class EstimatorLayer(Layer):
     name : a string or None
         An optional name to attach to this layer.
     """
+
     def __init__(
-        self,
-        layer = None,
-        model_fn = None,
-        args = {},
-        name ='estimator_layer',
+            self,
+            layer=None,
+            model_fn=None,
+            args={},
+            name='estimator_layer',
     ):
         Layer.__init__(self, name=name)
         assert layer is not None
@@ -6483,8 +6575,9 @@ def __init__(
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( variables )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend(variables)
+
 
 ## Special activation
 class PReluLayer(Layer):
@@ -6506,38 +6599,38 @@ class PReluLayer(Layer):
     -----------
     - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification <http://arxiv.org/pdf/1502.01852v1.pdf>`_
     """
+
     def __init__(
-        self,
-        layer = None,
-        channel_shared = False,
-        a_init = tf.constant_initializer(value=0.0),
-        a_init_args = {},
-        # restore = True,
-        name="prelu_layer"
-    ):
+            self,
+            layer=None,
+            channel_shared=False,
+            a_init=tf.constant_initializer(value=0.0),
+            a_init_args={},
+            # restore = True,
+            name="prelu_layer"):
         Layer.__init__(self, name=name)
         self.inputs = layer.outputs
         print("  [TL] PReluLayer %s: channel_shared:%s" % (self.name, channel_shared))
         if channel_shared:
-            w_shape = (1,)
+            w_shape = (1, )
         else:
             w_shape = int(self.inputs.get_shape()[-1])
 
         # with tf.name_scope(name) as scope:
         with tf.variable_scope(name) as vs:
-            alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=a_init, dtype=D_TYPE, **a_init_args )
+            alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=a_init, dtype=D_TYPE, **a_init_args)
             try:  ## TF 1.0
                 self.outputs = tf.nn.relu(self.inputs) + tf.multiply(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5
-            except: ## TF 0.12
+            except:  ## TF 0.12
                 self.outputs = tf.nn.relu(self.inputs) + tf.mul(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5
 
-
         self.all_layers = list(layer.all_layers)
         self.all_params = list(layer.all_params)
         self.all_drop = dict(layer.all_drop)
 
-        self.all_layers.extend( [self.outputs] )
-        self.all_params.extend( [alphas] )
+        self.all_layers.extend([self.outputs])
+        self.all_params.extend([alphas])
+
 
 ## Flow control layer
 class MultiplexerLayer(Layer):
@@ -6592,24 +6685,23 @@ class MultiplexerLayer(Layer):
     ------------
     - See ``tf.pack() for TF0.12 or tf.stack() for TF1.0`` and ``tf.gather()`` at `TensorFlow - Slicing and Joining <https://www.tensorflow.org/versions/master/api_docs/python/array_ops.html#slicing-and-joining>`_
     """
-    def __init__(self,
-               layer = [],
-               name='mux_layer'):
+
+    def __init__(self, layer=[], name='mux_layer'):
         Layer.__init__(self, name=name)
         self.n_inputs = len(layer)
 
         self.inputs = []
         for l in layer:
             self.inputs.append(l.outputs)
-        try: ## TF1.0
-            all_inputs = tf.stack(self.inputs, name=name) # pack means concat a list of tensor in a new dim  # 1.2
+        try:  ## TF1.0
+            all_inputs = tf.stack(self.inputs, name=name)  # pack means concat a list of tensor in a new dim  # 1.2
         except:
-            all_inputs = tf.pack(self.inputs, name=name) # pack means concat a list of tensor in a new dim  # 1.2
+            all_inputs = tf.pack(self.inputs, name=name)  # pack means concat a list of tensor in a new dim  # 1.2
 
         print("  [TL] MultiplexerLayer %s: n_inputs:%d" % (self.name, self.n_inputs))
 
         self.sel = tf.placeholder(tf.int32)
-        self.outputs = tf.gather(all_inputs, self.sel, name=name) # [sel, :, : ...] # 1.2
+        self.outputs = tf.gather(all_inputs, self.sel, name=name)  # [sel, :, : ...] # 1.2
 
         # print(self.outputs, vars(self.outputs))
         #         # tf.reshape(self.outputs, shape=)
@@ -6627,6 +6719,8 @@ def __init__(self,
         self.all_layers = list_remove_repeat(self.all_layers)
         self.all_params = list_remove_repeat(self.all_params)
         # self.all_drop = list_remove_repeat(self.all_drop)
+
+
 ## We can Duplicate the network instead of DemultiplexerLayer
 # class DemultiplexerLayer(Layer):
 #     """
@@ -6658,9 +6752,10 @@ def __init__(self,
 #         Layer.__init__(self, name=name)
 #         self.outputs = []
 
+
 ## Wrapper
 class EmbeddingAttentionSeq2seqWrapper(Layer):
-  """Sequence-to-sequence model with attention and for multiple buckets (Deprecated after TF0.12).
+    """Sequence-to-sequence model with attention and for multiple buckets (Deprecated after TF0.12).
 
     This example implements a multi-layer recurrent neural network as encoder,
     and an attention-based decoder. This is the same as the model described in
@@ -6698,146 +6793,144 @@ class EmbeddingAttentionSeq2seqWrapper(Layer):
     name : a string or None
         An optional name to attach to this layer.
   """
-  def __init__(self,
-               source_vocab_size,
-               target_vocab_size,
-               buckets,
-               size,
-               num_layers,
-               max_gradient_norm,
-               batch_size,
-               learning_rate,
-               learning_rate_decay_factor,
-               use_lstm=False,
-               num_samples=512,
-               forward_only=False,
-               name='wrapper'):
-    Layer.__init__(self)#, name=name)
-
-    self.source_vocab_size = source_vocab_size
-    self.target_vocab_size = target_vocab_size
-    self.buckets = buckets
-    self.batch_size = batch_size
-    self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate')
-    self.learning_rate_decay_op = self.learning_rate.assign(
-        self.learning_rate * learning_rate_decay_factor)
-    self.global_step = tf.Variable(0, trainable=False, name='global_step')
-
-    if tf.__version__ >= "0.12":
-        raise Exception("Deprecated after TF0.12 : use other seq2seq layers instead.")
-
-    # =========== Fake output Layer for compute cost ======
-    # If we use sampled softmax, we need an output projection.
-    with tf.variable_scope(name) as vs:
-        output_projection = None
-        softmax_loss_function = None
-        # Sampled softmax only makes sense if we sample less than vocabulary size.
-        if num_samples > 0 and num_samples < self.target_vocab_size:
-          w = tf.get_variable("proj_w", [size, self.target_vocab_size], dtype=D_TYPE)
-          w_t = tf.transpose(w)
-          b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=D_TYPE)
-          output_projection = (w, b)
-
-          def sampled_loss(inputs, labels):
-            labels = tf.reshape(labels, [-1, 1])
-            return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
-                    self.target_vocab_size)
-          softmax_loss_function = sampled_loss
-
-        # ============ Seq Encode Layer =============
-        # Create the internal multi-layer cell for our RNN.
-        try: # TF1.0
-          cell_creator = lambda: tf.contrib.rnn.GRUCell(size)
-        except:
-          cell_creator = lambda: tf.nn.rnn_cell.GRUCell(size)
-
-        if use_lstm:
-          try: # TF1.0
-            cell_creator = lambda: tf.contrib.rnn.BasicLSTMCell(size)
-          except:
-            cell_creator = lambda: tf.nn.rnn_cell.BasicLSTMCell(size)
-
-        cell = cell_creator()
-        if num_layers > 1:
-          try: # TF1.0
-            cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)
-          except:
-            cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)
-
-        # ============== Seq Decode Layer ============
-        # The seq2seq function: we use embedding for the input and attention.
-        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
-          return tf.nn.seq2seq.embedding_attention_seq2seq(
-              encoder_inputs, decoder_inputs, cell,
-              num_encoder_symbols=source_vocab_size,
-              num_decoder_symbols=target_vocab_size,
-              embedding_size=size,
-              output_projection=output_projection,
-              feed_previous=do_decode)
-
-        #=============================================================
-        # Feeds for inputs.
-        self.encoder_inputs = []
-        self.decoder_inputs = []
-        self.target_weights = []
-        for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
-          self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
-                                                    name="encoder{0}".format(i)))
-        for i in xrange(buckets[-1][1] + 1):
-          self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
-                                                    name="decoder{0}".format(i)))
-          self.target_weights.append(tf.placeholder(tf.float32, shape=[None],
-                                                    name="weight{0}".format(i)))
-
-        # Our targets are decoder inputs shifted by one.
-        targets = [self.decoder_inputs[i + 1]
-                   for i in xrange(len(self.decoder_inputs) - 1)]
-        self.targets = targets  # DH add for debug
-
-
-        # Training outputs and losses.
-        if forward_only:
-          self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
-              self.encoder_inputs, self.decoder_inputs, targets,
-              self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True),
-              softmax_loss_function=softmax_loss_function)
-          # If we use output projection, we need to project outputs for decoding.
-          if output_projection is not None:
-            for b in xrange(len(buckets)):
-              self.outputs[b] = [
-                  tf.matmul(output, output_projection[0]) + output_projection[1]
-                  for output in self.outputs[b]
-              ]
-        else:
-          self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
-              self.encoder_inputs, self.decoder_inputs, targets,
-              self.target_weights, buckets,
-              lambda x, y: seq2seq_f(x, y, False),
-              softmax_loss_function=softmax_loss_function)
-
-        # Gradients and SGD update operation for training the model.
-        params = tf.trainable_variables()
-        if not forward_only:
-          self.gradient_norms = []
-          self.updates = []
-          opt = tf.train.GradientDescentOptimizer(self.learning_rate)
-          for b in xrange(len(buckets)):
-            gradients = tf.gradients(self.losses[b], params)
-            clipped_gradients, norm = tf.clip_by_global_norm(gradients,
-                                                             max_gradient_norm)
-            self.gradient_norms.append(norm)
-            self.updates.append(opt.apply_gradients(
-                zip(clipped_gradients, params), global_step=self.global_step))
-
-        # if save into npz
-        self.all_params = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
-
-    # if save into ckpt
-    self.saver = tf.train.Saver(tf.all_variables())
-
-  def step(self, session, encoder_inputs, decoder_inputs, target_weights,
-           bucket_id, forward_only):
-    """Run a step of the model feeding the given inputs.
+
+    def __init__(self,
+                 source_vocab_size,
+                 target_vocab_size,
+                 buckets,
+                 size,
+                 num_layers,
+                 max_gradient_norm,
+                 batch_size,
+                 learning_rate,
+                 learning_rate_decay_factor,
+                 use_lstm=False,
+                 num_samples=512,
+                 forward_only=False,
+                 name='wrapper'):
+        Layer.__init__(self)  #, name=name)
+
+        self.source_vocab_size = source_vocab_size
+        self.target_vocab_size = target_vocab_size
+        self.buckets = buckets
+        self.batch_size = batch_size
+        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate')
+        self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * learning_rate_decay_factor)
+        self.global_step = tf.Variable(0, trainable=False, name='global_step')
+
+        if tf.__version__ >= "0.12":
+            raise Exception("Deprecated after TF0.12 : use other seq2seq layers instead.")
+
+        # =========== Fake output Layer for compute cost ======
+        # If we use sampled softmax, we need an output projection.
+        with tf.variable_scope(name) as vs:
+            output_projection = None
+            softmax_loss_function = None
+            # Sampled softmax only makes sense if we sample less than vocabulary size.
+            if num_samples > 0 and num_samples < self.target_vocab_size:
+                w = tf.get_variable("proj_w", [size, self.target_vocab_size], dtype=D_TYPE)
+                w_t = tf.transpose(w)
+                b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=D_TYPE)
+                output_projection = (w, b)
+
+                def sampled_loss(inputs, labels):
+                    labels = tf.reshape(labels, [-1, 1])
+                    return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, self.target_vocab_size)
+
+                softmax_loss_function = sampled_loss
+
+            # ============ Seq Encode Layer =============
+            # Create the internal multi-layer cell for our RNN.
+            try:  # TF1.0
+                cell_creator = lambda: tf.contrib.rnn.GRUCell(size)
+            except:
+                cell_creator = lambda: tf.nn.rnn_cell.GRUCell(size)
+
+            if use_lstm:
+                try:  # TF1.0
+                    cell_creator = lambda: tf.contrib.rnn.BasicLSTMCell(size)
+                except:
+                    cell_creator = lambda: tf.nn.rnn_cell.BasicLSTMCell(size)
+
+            cell = cell_creator()
+            if num_layers > 1:
+                try:  # TF1.0
+                    cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)
+                except:
+                    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)
+
+            # ============== Seq Decode Layer ============
+            # The seq2seq function: we use embedding for the input and attention.
+            def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
+                return tf.nn.seq2seq.embedding_attention_seq2seq(
+                    encoder_inputs,
+                    decoder_inputs,
+                    cell,
+                    num_encoder_symbols=source_vocab_size,
+                    num_decoder_symbols=target_vocab_size,
+                    embedding_size=size,
+                    output_projection=output_projection,
+                    feed_previous=do_decode)
+
+            #=============================================================
+            # Feeds for inputs.
+            self.encoder_inputs = []
+            self.decoder_inputs = []
+            self.target_weights = []
+            for i in xrange(buckets[-1][0]):  # Last bucket is the biggest one.
+                self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i)))
+            for i in xrange(buckets[-1][1] + 1):
+                self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)))
+                self.target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)))
+
+            # Our targets are decoder inputs shifted by one.
+            targets = [self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1)]
+            self.targets = targets  # DH add for debug
+
+            # Training outputs and losses.
+            if forward_only:
+                self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
+                    self.encoder_inputs,
+                    self.decoder_inputs,
+                    targets,
+                    self.target_weights,
+                    buckets,
+                    lambda x, y: seq2seq_f(x, y, True),
+                    softmax_loss_function=softmax_loss_function)
+                # If we use output projection, we need to project outputs for decoding.
+                if output_projection is not None:
+                    for b in xrange(len(buckets)):
+                        self.outputs[b] = [tf.matmul(output, output_projection[0]) + output_projection[1] for output in self.outputs[b]]
+            else:
+                self.outputs, self.losses = tf.nn.seq2seq.model_with_buckets(
+                    self.encoder_inputs,
+                    self.decoder_inputs,
+                    targets,
+                    self.target_weights,
+                    buckets,
+                    lambda x, y: seq2seq_f(x, y, False),
+                    softmax_loss_function=softmax_loss_function)
+
+            # Gradients and SGD update operation for training the model.
+            params = tf.trainable_variables()
+            if not forward_only:
+                self.gradient_norms = []
+                self.updates = []
+                opt = tf.train.GradientDescentOptimizer(self.learning_rate)
+                for b in xrange(len(buckets)):
+                    gradients = tf.gradients(self.losses[b], params)
+                    clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
+                    self.gradient_norms.append(norm)
+                    self.updates.append(opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step))
+
+            # if save into npz
+            self.all_params = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name)
+
+        # if save into ckpt
+        self.saver = tf.train.Saver(tf.all_variables())
+
+    def step(self, session, encoder_inputs, decoder_inputs, target_weights, bucket_id, forward_only):
+        """Run a step of the model feeding the given inputs.
 
     Parameters
     ----------
@@ -6858,54 +6951,53 @@ def step(self, session, encoder_inputs, decoder_inputs, target_weights,
     ValueError : if length of encoder_inputs, decoder_inputs, or
         target_weights disagrees with bucket size for the specified bucket_id.
     """
-    # Check if the sizes match.
-    encoder_size, decoder_size = self.buckets[bucket_id]
-    if len(encoder_inputs) != encoder_size:
-      raise ValueError("Encoder length must be equal to the one in bucket,"
-                       " %d != %d." % (len(encoder_inputs), encoder_size))
-    if len(decoder_inputs) != decoder_size:
-      raise ValueError("Decoder length must be equal to the one in bucket,"
-                       " %d != %d." % (len(decoder_inputs), decoder_size))
-    if len(target_weights) != decoder_size:
-      raise ValueError("Weights length must be equal to the one in bucket,"
-                       " %d != %d." % (len(target_weights), decoder_size))
-    # print('in model.step()')
-    # print('a',bucket_id, encoder_size, decoder_size)
-
-    # Input feed: encoder inputs, decoder inputs, target_weights, as provided.
-    input_feed = {}
-    for l in xrange(encoder_size):
-      input_feed[self.encoder_inputs[l].name] = encoder_inputs[l]
-    for l in xrange(decoder_size):
-      input_feed[self.decoder_inputs[l].name] = decoder_inputs[l]
-      input_feed[self.target_weights[l].name] = target_weights[l]
-    # print(self.encoder_inputs[l].name)
-    # print(self.decoder_inputs[l].name)
-    # print(self.target_weights[l].name)
-
-    # Since our targets are decoder inputs shifted by one, we need one more.
-    last_target = self.decoder_inputs[decoder_size].name
-    input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32)
-    # print('last_target', last_target)
-
-    # Output feed: depends on whether we do a backward step or not.
-    if not forward_only:
-      output_feed = [self.updates[bucket_id],  # Update Op that does SGD.
-                     self.gradient_norms[bucket_id],  # Gradient norm.
-                     self.losses[bucket_id]]  # Loss for this batch.
-    else:
-      output_feed = [self.losses[bucket_id]]  # Loss for this batch.
-      for l in xrange(decoder_size):  # Output logits.
-        output_feed.append(self.outputs[bucket_id][l])
+        # Check if the sizes match.
+        encoder_size, decoder_size = self.buckets[bucket_id]
+        if len(encoder_inputs) != encoder_size:
+            raise ValueError("Encoder length must be equal to the one in bucket," " %d != %d." % (len(encoder_inputs), encoder_size))
+        if len(decoder_inputs) != decoder_size:
+            raise ValueError("Decoder length must be equal to the one in bucket," " %d != %d." % (len(decoder_inputs), decoder_size))
+        if len(target_weights) != decoder_size:
+            raise ValueError("Weights length must be equal to the one in bucket," " %d != %d." % (len(target_weights), decoder_size))
+        # print('in model.step()')
+        # print('a',bucket_id, encoder_size, decoder_size)
+
+        # Input feed: encoder inputs, decoder inputs, target_weights, as provided.
+        input_feed = {}
+        for l in xrange(encoder_size):
+            input_feed[self.encoder_inputs[l].name] = encoder_inputs[l]
+        for l in xrange(decoder_size):
+            input_feed[self.decoder_inputs[l].name] = decoder_inputs[l]
+            input_feed[self.target_weights[l].name] = target_weights[l]
+        # print(self.encoder_inputs[l].name)
+        # print(self.decoder_inputs[l].name)
+        # print(self.target_weights[l].name)
+
+        # Since our targets are decoder inputs shifted by one, we need one more.
+        last_target = self.decoder_inputs[decoder_size].name
+        input_feed[last_target] = np.zeros([self.batch_size], dtype=np.int32)
+        # print('last_target', last_target)
+
+        # Output feed: depends on whether we do a backward step or not.
+        if not forward_only:
+            output_feed = [
+                self.updates[bucket_id],  # Update Op that does SGD.
+                self.gradient_norms[bucket_id],  # Gradient norm.
+                self.losses[bucket_id]
+            ]  # Loss for this batch.
+        else:
+            output_feed = [self.losses[bucket_id]]  # Loss for this batch.
+            for l in xrange(decoder_size):  # Output logits.
+                output_feed.append(self.outputs[bucket_id][l])
 
-    outputs = session.run(output_feed, input_feed)
-    if not forward_only:
-      return outputs[1], outputs[2], None  # Gradient norm, loss, no outputs.
-    else:
-      return None, outputs[0], outputs[1:]  # No gradient norm, loss, outputs.
+        outputs = session.run(output_feed, input_feed)
+        if not forward_only:
+            return outputs[1], outputs[2], None  # Gradient norm, loss, no outputs.
+        else:
+            return None, outputs[0], outputs[1:]  # No gradient norm, loss, outputs.
 
-  def get_batch(self, data, bucket_id, PAD_ID=0, GO_ID=1, EOS_ID=2, UNK_ID=3):
-    """ Get a random batch of data from the specified bucket, prepare for step.
+    def get_batch(self, data, bucket_id, PAD_ID=0, GO_ID=1, EOS_ID=2, UNK_ID=3):
+        """ Get a random batch of data from the specified bucket, prepare for step.
 
     To feed data in step(..) it must be a list of batch-major vectors, while
     data here contains single length-major cases. So the main logic of this
@@ -6930,49 +7022,45 @@ def get_batch(self, data, bucket_id, PAD_ID=0, GO_ID=1, EOS_ID=2, UNK_ID=3):
     The triple (encoder_inputs, decoder_inputs, target_weights) for
     the constructed batch that has the proper format to call step(...) later.
     """
-    encoder_size, decoder_size = self.buckets[bucket_id]
-    encoder_inputs, decoder_inputs = [], []
-
-    # Get a random batch of encoder and decoder inputs from data,
-    # pad them if needed, reverse encoder inputs and add GO to decoder.
-    for _ in xrange(self.batch_size):
-      encoder_input, decoder_input = random.choice(data[bucket_id])
-
-      # Encoder inputs are padded and then reversed.
-      encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input))
-      encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))
-
-      # Decoder inputs get an extra "GO" symbol, and are padded then.
-      decoder_pad_size = decoder_size - len(decoder_input) - 1
-      decoder_inputs.append([GO_ID] + decoder_input +
-                            [PAD_ID] * decoder_pad_size)
-
-    # Now we create batch-major vectors from the data selected above.
-    batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []
-
-    # Batch encoder inputs are just re-indexed encoder_inputs.
-    for length_idx in xrange(encoder_size):
-      batch_encoder_inputs.append(
-          np.array([encoder_inputs[batch_idx][length_idx]
-                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))
-
-    # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
-    for length_idx in xrange(decoder_size):
-      batch_decoder_inputs.append(
-          np.array([decoder_inputs[batch_idx][length_idx]
-                    for batch_idx in xrange(self.batch_size)], dtype=np.int32))
-
-      # Create target_weights to be 0 for targets that are padding.
-      batch_weight = np.ones(self.batch_size, dtype=np.float32)
-      for batch_idx in xrange(self.batch_size):
-        # We set weight to 0 if the corresponding target is a PAD symbol.
-        # The corresponding target is decoder_input shifted by 1 forward.
-        if length_idx < decoder_size - 1:
-          target = decoder_inputs[batch_idx][length_idx + 1]
-        if length_idx == decoder_size - 1 or target == PAD_ID:
-          batch_weight[batch_idx] = 0.0
-      batch_weights.append(batch_weight)
-    return batch_encoder_inputs, batch_decoder_inputs, batch_weights
+        encoder_size, decoder_size = self.buckets[bucket_id]
+        encoder_inputs, decoder_inputs = [], []
+
+        # Get a random batch of encoder and decoder inputs from data,
+        # pad them if needed, reverse encoder inputs and add GO to decoder.
+        for _ in xrange(self.batch_size):
+            encoder_input, decoder_input = random.choice(data[bucket_id])
+
+            # Encoder inputs are padded and then reversed.
+            encoder_pad = [PAD_ID] * (encoder_size - len(encoder_input))
+            encoder_inputs.append(list(reversed(encoder_input + encoder_pad)))
+
+            # Decoder inputs get an extra "GO" symbol, and are padded then.
+            decoder_pad_size = decoder_size - len(decoder_input) - 1
+            decoder_inputs.append([GO_ID] + decoder_input + [PAD_ID] * decoder_pad_size)
+
+        # Now we create batch-major vectors from the data selected above.
+        batch_encoder_inputs, batch_decoder_inputs, batch_weights = [], [], []
+
+        # Batch encoder inputs are just re-indexed encoder_inputs.
+        for length_idx in xrange(encoder_size):
+            batch_encoder_inputs.append(np.array([encoder_inputs[batch_idx][length_idx] for batch_idx in xrange(self.batch_size)], dtype=np.int32))
+
+        # Batch decoder inputs are re-indexed decoder_inputs, we create weights.
+        for length_idx in xrange(decoder_size):
+            batch_decoder_inputs.append(np.array([decoder_inputs[batch_idx][length_idx] for batch_idx in xrange(self.batch_size)], dtype=np.int32))
+
+            # Create target_weights to be 0 for targets that are padding.
+            batch_weight = np.ones(self.batch_size, dtype=np.float32)
+            for batch_idx in xrange(self.batch_size):
+                # We set weight to 0 if the corresponding target is a PAD symbol.
+                # The corresponding target is decoder_input shifted by 1 forward.
+                if length_idx < decoder_size - 1:
+                    target = decoder_inputs[batch_idx][length_idx + 1]
+                if length_idx == decoder_size - 1 or target == PAD_ID:
+                    batch_weight[batch_idx] = 0.0
+            batch_weights.append(batch_weight)
+        return batch_encoder_inputs, batch_decoder_inputs, batch_weights
+
 
 ## Developing or Untested
 # class MaxoutLayer(Layer):
@@ -7012,28 +7100,4 @@ def get_batch(self, data, bucket_id, PAD_ID=0, GO_ID=1, EOS_ID=2, UNK_ID=3):
 #         self.all_layers.extend( [self.outputs] )
 #         self.all_params.extend( [W, b] )
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 #
diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py
index 237628b3e..a42604480 100755
--- a/tensorlayer/nlp.py
+++ b/tensorlayer/nlp.py
@@ -16,7 +16,6 @@
 from six.moves import urllib, xrange
 from tensorflow.python.platform import gfile
 
-
 # Iteration functions
 
 
@@ -135,6 +134,7 @@ def sample(a=[], temperature=1.0):
         # exit()
         message = "For large vocabulary_size, choice a higher temperature\
          to avoid log error. Hint : use ``sample_top``. "
+
         warnings.warn(message, Warning)
         # print(a)
         # print(b)
@@ -231,12 +231,7 @@ class Vocabulary(object):
     >>> is 97322
     """
 
-    def __init__(self,
-                 vocab_file,
-                 start_word="<S>",
-                 end_word="</S>",
-                 unk_word="<UNK>",
-                 pad_word="<PAD>"):
+    def __init__(self, vocab_file, start_word="<S>", end_word="</S>", unk_word="<UNK>", pad_word="<PAD>"):
         if not tf.gfile.Exists(vocab_file):
             tf.logging.fatal("Vocab file %s not found.", vocab_file)
         tf.logging.info("Initializing vocabulary from file: %s", vocab_file)
@@ -444,7 +439,7 @@ def read_words(filename="nietzsche.txt", replace=['\n', '<eos>']):
     - `tensorflow.models.rnn.ptb.reader <https://github.com/tensorflow/tensorflow/tree/master/tensorflow/models/rnn/ptb>`_
     """
     with tf.gfile.GFile(filename, "r") as f:
-        try:    # python 3.4 or older
+        try:  # python 3.4 or older
             context_list = f.read().replace(*replace).split()
         except:  # python 3.5
             f.seek(0)
@@ -755,6 +750,7 @@ def save_vocab(count=[], name='vocab.txt'):
             f.write("%s %d\n" % (tf.compat.as_text(count[i][0]), count[i][1]))
     print("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd))
 
+
 # Functions for translation
 
 
@@ -794,8 +790,11 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")):
     return [w for w in words if w]
 
 
-def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size,
-                      tokenizer=None, normalize_digits=True,
+def create_vocabulary(vocabulary_path,
+                      data_path,
+                      max_vocabulary_size,
+                      tokenizer=None,
+                      normalize_digits=True,
                       _DIGIT_RE=re.compile(br"\d"),
                       _START_VOCAB=[b"_PAD", b"_GO", b"_EOS", b"_UNK"]):
     """Create vocabulary file (if it does not exist yet) from data file.
@@ -894,9 +893,7 @@ def initialize_vocabulary(vocabulary_path):
         raise ValueError("Vocabulary file %s not found.", vocabulary_path)
 
 
-def sentence_to_token_ids(sentence, vocabulary,
-                          tokenizer=None, normalize_digits=True,
-                          UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
+def sentence_to_token_ids(sentence, vocabulary, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
     """Convert a string to list of integers representing token-ids.
 
     For example, a sentence "I have a dog" may become tokenized into
@@ -929,9 +926,7 @@ def sentence_to_token_ids(sentence, vocabulary,
     return [vocabulary.get(re.sub(_DIGIT_RE, b"0", w), UNK_ID) for w in words]
 
 
-def data_to_token_ids(data_path, target_path, vocabulary_path,
-                      tokenizer=None, normalize_digits=True,
-                      UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
+def data_to_token_ids(data_path, target_path, vocabulary_path, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")):
     """Tokenize data file and turn into token-ids using given vocabulary file.
 
     This function loads data line-by-line from data_path, calls the above
@@ -961,16 +956,12 @@ def data_to_token_ids(data_path, target_path, vocabulary_path,
                     counter += 1
                     if counter % 100000 == 0:
                         print("  tokenizing line %d" % counter)
-                    token_ids = sentence_to_token_ids(line, vocab, tokenizer,
-                                                      normalize_digits, UNK_ID=UNK_ID,
-                                                      _DIGIT_RE=_DIGIT_RE)
+                    token_ids = sentence_to_token_ids(line, vocab, tokenizer, normalize_digits, UNK_ID=UNK_ID, _DIGIT_RE=_DIGIT_RE)
                     tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n")
     else:
         print("Target path %s exists" % target_path)
 
 
-
-
 def moses_multi_bleu(hypotheses, references, lowercase=False):  # tl.nlp
     """Calculate the bleu score for hypotheses and references
     using the MOSES ulti-bleu.perl script.
@@ -1001,9 +992,7 @@ def moses_multi_bleu(hypotheses, references, lowercase=False):  # tl.nlp
 
     # Get MOSES multi-bleu script
     try:
-        multi_bleu_path, _ = urllib.request.urlretrieve(
-            "https://raw.githubusercontent.com/moses-smt/mosesdecoder/"
-            "master/scripts/generic/multi-bleu.perl")
+        multi_bleu_path, _ = urllib.request.urlretrieve("https://raw.githubusercontent.com/moses-smt/mosesdecoder/" "master/scripts/generic/multi-bleu.perl")
         os.chmod(multi_bleu_path, 0o755)
     except:  # pylint: disable=W0702
         tf.logging.info("Unable to fetch multi-bleu.perl script, using local.")
@@ -1028,8 +1017,7 @@ def moses_multi_bleu(hypotheses, references, lowercase=False):  # tl.nlp
             bleu_cmd += ["-lc"]
         bleu_cmd += [reference_file.name]
         try:
-            bleu_out = subprocess.check_output(
-                bleu_cmd, stdin=read_pred, stderr=subprocess.STDOUT)
+            bleu_out = subprocess.check_output(bleu_cmd, stdin=read_pred, stderr=subprocess.STDOUT)
             bleu_out = bleu_out.decode("utf-8")
             bleu_score = re.search(r"BLEU = (.+?),", bleu_out).group(1)
             bleu_score = float(bleu_score)
diff --git a/tensorlayer/ops.py b/tensorlayer/ops.py
index b0d911b5a..0cc5a0c95 100644
--- a/tensorlayer/ops.py
+++ b/tensorlayer/ops.py
@@ -1,9 +1,6 @@
 #! /usr/bin/python
 # -*- coding: utf-8 -*-
 
-
-
-
 import os
 import subprocess
 import sys
@@ -34,18 +31,19 @@ def exit_tf(sess=None, port=6006):
     if _platform == "linux" or _platform == "linux2":
         print('linux: %s' % text)
         os.system('nvidia-smi')
-        os.system('fuser '+ port +'/tcp -k')  # kill tensorboard 6006
-        os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process
+        os.system('fuser ' + port + '/tcp -k')  # kill tensorboard 6006
+        os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill")  # kill all nvidia-smi python process
         _exit()
     elif _platform == "darwin":
         print('OS X: %s' % text)
-        subprocess.Popen("lsof -i tcp:"+ str(port) +"  | grep -v PID | awk '{print $2}' | xargs kill", shell=True) # kill tensorboard
+        subprocess.Popen("lsof -i tcp:" + str(port) + "  | grep -v PID | awk '{print $2}' | xargs kill", shell=True)  # kill tensorboard
     elif _platform == "win32":
         print(text2 + "Windows")
         # TODO
     else:
         print(text2 + _platform)
 
+
 def open_tb(logdir='/tmp/tensorflow', port=6006):
     """Open Tensorboard.
 
@@ -67,13 +65,16 @@ def open_tb(logdir='/tmp/tensorflow', port=6006):
         # TODO
     elif _platform == "darwin":
         print('OS X: %s' % text)
-        subprocess.Popen(sys.prefix + " | python -m tensorflow.tensorboard --logdir=" + logdir + " --port=" + str(port), shell=True) # open tensorboard in localhost:6006/ or whatever port you chose
+        subprocess.Popen(
+            sys.prefix + " | python -m tensorflow.tensorboard --logdir=" + logdir + " --port=" + str(port),
+            shell=True)  # open tensorboard in localhost:6006/ or whatever port you chose
     elif _platform == "win32":
         print('Windows%s' % text2)
         # TODO
     else:
         print(_platform + text2)
 
+
 def clear_all(printable=True):
     """Clears all the placeholder variables of keep prob,
     including keeping probabilities of all dropout, denoising, dropconnect etc.
@@ -96,6 +97,7 @@ def clear_all(printable=True):
 
         del globals()[var]
 
+
 # def clear_all2(vars, printable=True):
 #     """
 #     The :function:`clear_all()` Clears all the placeholder variables of keep prob,
@@ -116,6 +118,7 @@ def clear_all(printable=True):
 #
 #         del var
 
+
 def set_gpu_fraction(sess=None, gpu_fraction=0.3):
     """Set the GPU memory fraction for the application.
 
@@ -132,7 +135,7 @@ def set_gpu_fraction(sess=None, gpu_fraction=0.3):
     """
     print("[TL]: GPU MEM Fraction %f" % gpu_fraction)
     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
-    sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
+    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
     return sess
 
 
@@ -167,6 +170,7 @@ def disable_print():
     sys.stdout = None
     sys.stderr = os.devnull
 
+
 def enable_print():
     """Enable console output, ``suppress_stdout`` is recommended.
 
@@ -223,7 +227,6 @@ def suppress_stdout():
             sys.stdout = old_stdout
 
 
-
 def get_site_packages_directory():
     """Print and return the site-packages directory.
 
@@ -241,7 +244,6 @@ def get_site_packages_directory():
         return False
 
 
-
 def empty_trash():
     """Empty trash folder.
 
@@ -266,4 +268,5 @@ def empty_trash():
     else:
         print(_platform)
 
+
 #