forked from tensorpack/tensorpack
-
Notifications
You must be signed in to change notification settings - Fork 0
/
mnist-tflayers.py
executable file
·130 lines (107 loc) · 5.41 KB
/
mnist-tflayers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: mnist-tflayers.py
import tensorflow as tf
"""
MNIST ConvNet example using tf.layers
Mostly the same as 'mnist-convnet.py',
the only differences are:
1. use tf.layers
2. use tf.layers variable names to summarize weights
"""
# Just import everything into current namespace
from tensorpack import *
from tensorpack.tfutils import summary, get_current_tower_context
from tensorpack.dataflow import dataset
IMAGE_SIZE = 28
# Monkey-patch tf.layers to support argscope.
enable_argscope_for_module(tf.layers)
class Model(ModelDesc):
def inputs(self):
"""
Define all the inputs (with type, shape, name) that the graph will need.
"""
return [tf.placeholder(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
tf.placeholder(tf.int32, (None,), 'label')]
def build_graph(self, image, label):
"""This function should build the model which takes the input variables
and return cost at the end"""
# In tensorflow, inputs to convolution function are assumed to be
# NHWC. Add a single channel here.
image = tf.expand_dims(image, 3)
image = image * 2 - 1 # center the pixels values at zero
# The context manager `argscope` sets the default option for all the layers under
# this context. Here we use 32 channel convolution with shape 3x3
with argscope([tf.layers.conv2d], padding='same', activation=tf.nn.relu):
l = tf.layers.conv2d(image, 32, 3, name='conv0')
l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
l = tf.layers.conv2d(l, 32, 3, name='conv1')
l = tf.layers.conv2d(l, 32, 3, name='conv2')
l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
l = tf.layers.conv2d(l, 32, 3, name='conv3')
l = tf.layers.flatten(l)
l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
l = tf.layers.dropout(l, rate=0.5,
training=get_current_tower_context().is_training)
logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')
# a vector of length B with loss of each sample
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss
correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
accuracy = tf.reduce_mean(correct, name='accuracy')
# This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
# 1. written to tensosrboard
# 2. written to stat.json
# 3. printed after each epoch
train_error = tf.reduce_mean(1 - correct, name='train_error')
summary.add_moving_summary(train_error, accuracy)
# Use a regex to find parameters to apply weight decay.
# Here we apply a weight decay on all W (weight matrix) of all fc layers
# If you don't like regex, you can certainly define the cost in any other methods.
wd_cost = tf.multiply(1e-5,
regularize_cost('fc.*/kernel', tf.nn.l2_loss),
name='regularize_loss')
total_cost = tf.add_n([wd_cost, cost], name='total_cost')
summary.add_moving_summary(cost, wd_cost, total_cost)
# monitor histogram of all weight (of conv and fc layers) in tensorboard
summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
# the function should return the total cost to be optimized
return total_cost
def optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=468 * 10,
decay_rate=0.3, staircase=True, name='learning_rate')
# This will also put the summary in tensorboard, stat.json and print in terminal
# but this time without moving average
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr)
def get_data():
train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test
if __name__ == '__main__':
# automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir()
dataset_train, dataset_test = get_data()
# How many iterations you want in each epoch.
# This (data.size()) is the default value.
steps_per_epoch = dataset_train.size()
# get the config which contains everything necessary in a training
config = TrainConfig(
model=Model(),
# The input source for training. FeedInput is slow, this is just for demo purpose.
# In practice it's best to use QueueInput or others. See tutorials for details.
data=FeedInput(dataset_train),
callbacks=[
ModelSaver(), # save the model after every epoch
MaxSaver('validation_accuracy'), # save the model with highest accuracy (prefix 'validation_')
InferenceRunner( # run inference(for validation) after every epoch
dataset_test, # the DataFlow instance used for validation
ScalarStats(['cross_entropy_loss', 'accuracy'])),
],
steps_per_epoch=steps_per_epoch,
max_epoch=100,
)
launch_train_with_config(config, SimpleTrainer())