From 655f76998b615d76afd04ad26c0ca6711045c234 Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Thu, 11 Jan 2018 15:18:58 +0800
Subject: [PATCH 1/3] refine tf mnist

---
 tensorflow/mnist.py | 248 ++++++++++++++++++++++----------------------
 1 file changed, 122 insertions(+), 126 deletions(-)

diff --git a/tensorflow/mnist.py b/tensorflow/mnist.py
index f14027e..e6866fd 100644
--- a/tensorflow/mnist.py
+++ b/tensorflow/mnist.py
@@ -10,140 +10,136 @@
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
 
-BATCH_SIZE = 128
-PASS_NUM = 5
-SEED = 1
 DTYPE = tf.float32
 
 
-def normal_scale(size, channels):
-    scale = (2.0 / (size**2 * channels))**0.5
-    return scale
-
-
-# NOTE(dzhwinter) : tensorflow use Phliox random algorithm
-# as normal generator, fetch out paddle random for comparization
-def paddle_random_normal(shape, loc=.0, scale=1., seed=1, dtype="float32"):
-    program = fluid.framework.Program()
-    block = program.global_block()
-    w = block.create_var(
-        dtype="float32",
-        shape=shape,
-        lod_level=0,
-        name="param",
-        initializer=fluid.initializer.NormalInitializer(
-            loc=.0, scale=scale, seed=seed))
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    out = exe.run(program, fetch_list=[w])
-    return np.array(out[0])
-
-
-train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE)
-images = tf.placeholder(DTYPE, shape=(None, 28, 28, 1))
-labels = tf.placeholder(tf.int64, shape=(None, ))
-
-# conv layer
-arg = tf.convert_to_tensor(
-    np.transpose(
-        paddle_random_normal(
-            [20, 1, 5, 5], scale=normal_scale(5, 1), seed=SEED, dtype=DTYPE),
-        axes=[2, 3, 1, 0]))
-conv1_weights = tf.Variable(arg)
-conv1_bias = tf.Variable(tf.zeros([20]), dtype=DTYPE)
-conv1 = tf.nn.conv2d(
-    images, conv1_weights, strides=[1, 1, 1, 1], padding="VALID")
-relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))
-pool1 = tf.nn.max_pool(
-    relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
-
-arg = tf.convert_to_tensor(
-    np.transpose(
-        paddle_random_normal(
-            [50, 20, 5, 5], scale=normal_scale(5, 20), seed=SEED, dtype=DTYPE),
-        axes=[2, 3, 1, 0]))
-conv2_weights = tf.Variable(arg)
-conv2_bias = tf.Variable(tf.zeros([50]), dtype=DTYPE)
-conv2 = tf.nn.conv2d(
-    pool1, conv2_weights, strides=[1, 1, 1, 1], padding="VALID")
-relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))
-pool2 = tf.nn.max_pool(
-    relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
-
-pool_shape = pool2.get_shape().as_list()
-hidden_dim = reduce(lambda a, b: a * b, pool_shape[1:], 1)
-reshape = tf.reshape(pool2, shape=(tf.shape(pool2)[0], hidden_dim))
-
-# fc layer
-# NOTE(dzhwinter) : paddle has a NCHW data format, tensorflow has a NHWC data format
-# need to convert the fc weight
-paddle_weight = paddle_random_normal(
-    [hidden_dim, 10],
-    scale=normal_scale(hidden_dim, 10),
-    seed=SEED,
-    dtype=DTYPE)
-new_shape = pool_shape[-1:] + pool_shape[1:-1] + [10]
-paddle_weight = np.reshape(paddle_weight, new_shape)
-paddle_weight = np.transpose(paddle_weight, [1, 2, 0, 3])
-
-arg = tf.convert_to_tensor(np.reshape(paddle_weight, [hidden_dim, 10]))
-fc_weights = tf.Variable(arg, dtype=DTYPE)
-fc_bias = tf.Variable(tf.zeros([10]), dtype=DTYPE)
-logits = tf.matmul(reshape, fc_weights) + fc_bias
-
-# cross entropy
-
-prediction = tf.nn.softmax(logits)
-
-one_hot_labels = tf.one_hot(labels, depth=10)
-cost = -tf.reduce_sum(tf.log(prediction) * one_hot_labels, [1])
-avg_cost = tf.reduce_mean(cost)
-
-correct = tf.equal(tf.argmax(prediction, 1), labels)
-accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
-g_accuracy = tf.metrics.accuracy(labels, tf.argmax(prediction, axis=1))
-
-opt = tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999)
-train_op = opt.minimize(avg_cost)
-
-
-def eval_test():
+def parse_args():
+    parser = argparse.ArgumentParser("mnist model benchmark.")
+    parser.add_argument(
+        '--batch_size', type=int, default=128, help='The minibatch size.')
+    parser.add_argument(
+        '--iterations', type=int, default=35, help='The number of minibatches.')
+    parser.add_argument(
+        '--pass_num', type=int, default=5, help='The number of passes.')
+    parser.add_argument(
+        '--device',
+        type=str,
+        default='GPU',
+        choices=['CPU', 'GPU'],
+        help='The device type.')
+    args = parser.parse_args()
+    return args
+
+
+def run_benchmark(args):
+    def weight_variable(dtype, shape):
+        initial = tf.truncated_normal(shape, stddev=0.1, dtype=dtype)
+        return tf.Variable(initial)
+
+    def bias_variable(dtype, shape):
+        initial = tf.constant(0.1, shape=shape, dtype=dtype)
+        return tf.Variable(initial)
+
+    device = '/cpu:0' if args.device == 'CPU' else '/device:GPU:0'
+    with tf.device(device):
+
+        images = tf.placeholder(DTYPE, shape=(None, 28, 28, 1))
+        labels = tf.placeholder(tf.int64, shape=(None, ))
+
+        conv1_weights = weight_variable(DTYPE, [5, 5, 1, 20])
+        conv1_bias = bias_variable(DTYPE, [20])
+        conv1 = tf.nn.conv2d(
+            images, conv1_weights, strides=[1, 1, 1, 1], padding="VALID")
+        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_bias))
+        pool1 = tf.nn.max_pool(
+            relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
+
+        conv2_weights = weight_variable(DTYPE, [5, 5, 20, 50])
+        conv2_bias = bias_variable(DTYPE, [50])
+        conv2 = tf.nn.conv2d(
+            pool1, conv2_weights, strides=[1, 1, 1, 1], padding="VALID")
+        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_bias))
+        pool2 = tf.nn.max_pool(
+            relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
+
+        pool_shape = pool2.get_shape().as_list()
+        hidden_dim = reduce(lambda a, b: a * b, pool_shape[1:], 1)
+        reshape = tf.reshape(pool2, shape=(tf.shape(pool2)[0], hidden_dim))
+
+        fc_weights = weight_variable(DTYPE, [hidden_dim, 10])
+        fc_bias = bias_variable(DTYPE, [10])
+        logits = tf.matmul(reshape, fc_weights) + fc_bias
+        prediction = tf.nn.softmax(logits)
+
+        one_hot_labels = tf.one_hot(labels, depth=10)
+        cost = -tf.reduce_sum(tf.log(prediction) * one_hot_labels, [1])
+        avg_cost = tf.reduce_mean(cost)
+
+        correct = tf.equal(tf.argmax(prediction, 1), labels)
+        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
+        g_accuracy = tf.metrics.accuracy(labels, tf.argmax(prediction, axis=1))
+
+        opt = tf.train.AdamOptimizer(
+            learning_rate=0.001, beta1=0.9, beta2=0.999)
+        train_op = opt.minimize(avg_cost)
+        # train_op = tf.train.AdamOptimizer(1e-4).minimize(avg_cost)
+
+    train_reader = paddle.batch(
+        paddle.dataset.mnist.train(), batch_size=args.batch_size)
     test_reader = paddle.batch(
-        paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
-    for batch_id, data in enumerate(test_reader()):
-        images_data = np.array(
-            map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
-        labels_data = np.array(map(lambda x: x[1], data)).astype("int64")
-        _, loss, acc, g_acc = sess.run(
-            [train_op, avg_cost, accuracy, g_accuracy],
-            feed_dict={images: images_data,
-                       labels: labels_data})
-    return g_acc[1]
-
-
-config = tf.ConfigProto(
-    intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
-with tf.Session(config=config) as sess:
-    init_g = tf.global_variables_initializer()
-    init_l = tf.local_variables_initializer()
-    sess.run(init_g)
-    sess.run(init_l)
-    for pass_id in range(PASS_NUM):
-        pass_start = time.time()
-        for batch_id, data in enumerate(train_reader()):
+        paddle.dataset.mnist.test(), batch_size=args.batch_size)
+
+    def eval_test():
+        for batch_id, data in enumerate(test_reader()):
             images_data = np.array(
                 map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
             labels_data = np.array(map(lambda x: x[1], data)).astype("int64")
-            start = time.time()
+
             _, loss, acc, g_acc = sess.run(
                 [train_op, avg_cost, accuracy, g_accuracy],
                 feed_dict={images: images_data,
                            labels: labels_data})
-            end = time.time()
-
-            print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
-                  (pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
-        pass_end = time.time()
-        test_avg_acc = eval_test()
-        print("pass=%d, training_avg_accuracy=%f, test_avg_acc=%f, elapse=%f" %
-              (pass_id, g_acc[1], test_avg_acc, (pass_end - pass_start) / 1000))
+        return g_acc[1]
+
+    config = tf.ConfigProto(
+        intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
+    with tf.Session(config=config) as sess:
+        init_g = tf.global_variables_initializer()
+        init_l = tf.local_variables_initializer()
+        sess.run(init_g)
+        sess.run(init_l)
+        for pass_id in range(args.pass_num):
+            pass_start = time.time()
+            for batch_id, data in enumerate(train_reader()):
+                images_data = np.array(
+                    map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
+                labels_data = np.array(map(lambda x: x[1], data)).astype(
+                    "int64")
+                start = time.time()
+                _, loss, acc, g_acc = sess.run(
+                    [train_op, avg_cost, accuracy, g_accuracy],
+                    feed_dict={images: images_data,
+                               labels: labels_data})
+                end = time.time()
+
+                print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
+                      (pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
+            pass_end = time.time()
+            test_avg_acc = eval_test()
+            print(
+                "pass=%d, training_avg_accuracy=%f, test_avg_acc=%f, elapse=%f"
+                % (pass_id, g_acc[1], test_avg_acc,
+                   (pass_end - pass_start) / 1000))
+
+
+def print_arguments(args):
+    print('-----------  Configuration Arguments -----------')
+    for arg, value in sorted(vars(args).iteritems()):
+        print('%s: %s' % (arg, value))
+    print('------------------------------------------------')
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    print_arguments(args)
+    run_benchmark(args)

From 059c98d38536ab82bbee6ea077768ef12604af6d Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 12 Jan 2018 11:28:08 +0800
Subject: [PATCH 2/3] follow comments

---
 tensorflow/mnist.py | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/tensorflow/mnist.py b/tensorflow/mnist.py
index e6866fd..992d350 100644
--- a/tensorflow/mnist.py
+++ b/tensorflow/mnist.py
@@ -42,10 +42,10 @@ def bias_variable(dtype, shape):
 
     device = '/cpu:0' if args.device == 'CPU' else '/device:GPU:0'
     with tf.device(device):
-
         images = tf.placeholder(DTYPE, shape=(None, 28, 28, 1))
         labels = tf.placeholder(tf.int64, shape=(None, ))
 
+        # conv1, relu, pool1
         conv1_weights = weight_variable(DTYPE, [5, 5, 1, 20])
         conv1_bias = bias_variable(DTYPE, [20])
         conv1 = tf.nn.conv2d(
@@ -54,6 +54,7 @@ def bias_variable(dtype, shape):
         pool1 = tf.nn.max_pool(
             relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
 
+        # conv2, relu, pool2
         conv2_weights = weight_variable(DTYPE, [5, 5, 20, 50])
         conv2_bias = bias_variable(DTYPE, [50])
         conv2 = tf.nn.conv2d(
@@ -62,23 +63,36 @@ def bias_variable(dtype, shape):
         pool2 = tf.nn.max_pool(
             relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
 
+        # FC 
         pool_shape = pool2.get_shape().as_list()
         hidden_dim = reduce(lambda a, b: a * b, pool_shape[1:], 1)
         reshape = tf.reshape(pool2, shape=(tf.shape(pool2)[0], hidden_dim))
-
         fc_weights = weight_variable(DTYPE, [hidden_dim, 10])
         fc_bias = bias_variable(DTYPE, [10])
         logits = tf.matmul(reshape, fc_weights) + fc_bias
+
+        # Get prediction
         prediction = tf.nn.softmax(logits)
 
+        # Loss 
         one_hot_labels = tf.one_hot(labels, depth=10)
         cost = -tf.reduce_sum(tf.log(prediction) * one_hot_labels, [1])
         avg_cost = tf.reduce_mean(cost)
 
+        # Get accuracy
         correct = tf.equal(tf.argmax(prediction, 1), labels)
         accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
-        g_accuracy = tf.metrics.accuracy(labels, tf.argmax(prediction, axis=1))
 
+        # metrics, g_accuracy
+        with tf.variable_scope("reset_metrics_accuracy_scope") as scope:
+            g_accuracy = tf.metrics.accuracy(
+                labels, tf.argmax(
+                    prediction, axis=1))
+            vars = tf.contrib.framework.get_variables(
+                scope, collection=tf.GraphKeys.LOCAL_VARIABLES)
+            g_accuracy_reset_op = tf.variables_initializer(vars)
+
+        # Optimizer 
         opt = tf.train.AdamOptimizer(
             learning_rate=0.001, beta1=0.9, beta2=0.999)
         train_op = opt.minimize(avg_cost)
@@ -90,6 +104,7 @@ def bias_variable(dtype, shape):
         paddle.dataset.mnist.test(), batch_size=args.batch_size)
 
     def eval_test():
+        sess.run(g_accuracy_reset_op)
         for batch_id, data in enumerate(test_reader()):
             images_data = np.array(
                 map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
@@ -109,12 +124,15 @@ def eval_test():
         sess.run(init_g)
         sess.run(init_l)
         for pass_id in range(args.pass_num):
+            sess.run(g_accuracy_reset_op)
+
             pass_start = time.time()
             for batch_id, data in enumerate(train_reader()):
                 images_data = np.array(
                     map(lambda x: np.transpose(x[0].reshape([1, 28, 28]), axes=[1,2,0]), data)).astype("float32")
                 labels_data = np.array(map(lambda x: x[1], data)).astype(
                     "int64")
+
                 start = time.time()
                 _, loss, acc, g_acc = sess.run(
                     [train_op, avg_cost, accuracy, g_accuracy],
@@ -124,8 +142,10 @@ def eval_test():
 
                 print("pass=%d, batch=%d, loss=%f, error=%f, elapse=%f" %
                       (pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
+
             pass_end = time.time()
             test_avg_acc = eval_test()
+
             print(
                 "pass=%d, training_avg_accuracy=%f, test_avg_acc=%f, elapse=%f"
                 % (pass_id, g_acc[1], test_avg_acc,

From b796d23acfeb7cb12ea5dbd5e6d6b2cf30309b5d Mon Sep 17 00:00:00 2001
From: chengduoZH <zhaochengduo@163.com>
Date: Fri, 12 Jan 2018 12:40:39 +0800
Subject: [PATCH 3/3] fix fluid/mnist.py

---
 fluid/mnist.py      | 11 +++++++----
 tensorflow/mnist.py |  1 -
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fluid/mnist.py b/fluid/mnist.py
index ebe5493..710d256 100644
--- a/fluid/mnist.py
+++ b/fluid/mnist.py
@@ -13,8 +13,9 @@
 
 SEED = 1
 DTYPE = "float32"
+
 # random seed must set before configuring the network.
-fluid.default_startup_program().random_seed = SEED
+# fluid.default_startup_program().random_seed = SEED
 
 
 def parse_args():
@@ -149,10 +150,12 @@ def run_benchmark(model, args):
                   (pass_id, batch_id, loss, 1 - acc, (end - start) / 1000))
 
         pass_end = time.time()
+        train_avg_acc = accuracy.eval(exe)
         test_avg_acc = eval_test(exe, accuracy, avg_cost)
-        pass_acc = accuracy.eval(exe)
-        print("pass=%d, test_avg_acc=%f, test_avg_acc=%f, elapse=%f" %
-              (pass_id, pass_acc, test_avg_acc, (pass_end - pass_start) / 1000))
+
+        print("pass=%d, train_avg_acc=%f, test_avg_acc=%f, elapse=%f" %
+              (pass_id, train_avg_acc, test_avg_acc,
+               (pass_end - pass_start) / 1000))
 
 
 if __name__ == '__main__':
diff --git a/tensorflow/mnist.py b/tensorflow/mnist.py
index 992d350..2f800ba 100644
--- a/tensorflow/mnist.py
+++ b/tensorflow/mnist.py
@@ -8,7 +8,6 @@
 
 import tensorflow as tf
 import paddle.v2 as paddle
-import paddle.v2.fluid as fluid
 
 DTYPE = tf.float32