-
Notifications
You must be signed in to change notification settings - Fork 772
/
Copy pathDCGAN.py
199 lines (174 loc) · 8.93 KB
/
DCGAN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
"""
2017/01/09
"""
import sys
import numpy as np
import tensorflow as tf
from keras.datasets import mnist
from PIL import Image
# Batch normalization
def batch_norm(inpt, epsilon=1e-05, decay=0.9, is_training=True, name="batch_norm"):
"""
Implements the bacth normalization
The input is 4-D tensor
"""
bn = tf.contrib.layers.batch_norm(inpt, decay=decay, updates_collections=None,
epsilon=epsilon, scale=True, is_training=is_training, scope=name)
return bn
# Convolution 2-D
def conv2d(inpt, nb_filter, filter_size=5, strides=2, bias=True, stddev=0.02, padding="SAME",
name="conv2d"):
in_channels = inpt.get_shape().as_list()[-1]
with tf.variable_scope(name):
w = tf.get_variable("w", shape=[filter_size, filter_size, in_channels, nb_filter],
initializer=tf.truncated_normal_initializer(mean=0.0, stddev=stddev))
conv = tf.nn.conv2d(inpt, w, strides=[1, strides, strides, 1], padding=padding)
if bias:
b = tf.get_variable("b", shape=[nb_filter,], initializer=tf.constant_initializer(0.0))
conv = tf.nn.bias_add(conv, b)
return conv
# Convolution 2D Transpose
def deconv2d(inpt, output_shape, filter_size=5, strides=2, bias=True, stddev=0.02,
padding="SAME", name="deconv2d"):
in_channels = inpt.get_shape().as_list()[-1]
with tf.variable_scope(name):
# Note: filter with shape [height, width, output_channels, in_channels]
w = tf.get_variable("w", shape=[filter_size, filter_size, output_shape[-1], in_channels],
initializer=tf.truncated_normal_initializer(mean=0.0, stddev=stddev))
deconv = tf.nn.conv2d_transpose(inpt, w, output_shape=output_shape, strides=[1, strides, strides, 1],
padding=padding)
if bias:
b = tf.get_variable("b", shape=[output_shape[-1]], initializer=tf.constant_initializer(0.0))
deconv = tf.nn.bias_add(deconv, b)
return deconv
# Leaky ReLU
def lrelu(x, leak=0.2, name="lrelu"):
return tf.maximum(x, x*leak)
# Linear
def linear(x, output_dim, stddev=0.02, name="linear"):
input_dim = x.get_shape().as_list()[-1]
with tf.variable_scope(name):
w = tf.get_variable("w", shape=[input_dim, output_dim], initializer=\
tf.random_normal_initializer(stddev=stddev))
b = tf.get_variable("b", shape=[output_dim,], initializer=tf.constant_initializer(0.0))
return tf.nn.xw_plus_b(x, w, b)
class DCGAN(object):
"""A class of DCGAN model"""
def __init__(self, z_dim=100, output_dim=28, batch_size=100, c_dim=1, df_dim=64, gf_dim=64, dfc_dim=1024,
n_conv=3, n_deconv=2):
"""
:param z_dim: int, the dimension of z (the noise input of generator)
:param output_dim: int, the resolution in pixels of the images (height, width)
:param batch_size: int, the size of the mini-batch
:param c_dim: int, the dimension of image color, for minist, it is 1 (grayscale)
:param df_dim: int, the number of filters in the first convolution layer of discriminator
:param gf_dim: int, the number of filters in the penultimate deconvolution layer of generator (last is 1)
:param dfc_dim: int, the number of units in the penultimate fully-connected layer of discriminator (last is 1)
:param n_conv: int, number of convolution layer in discriminator (the number of filters is double increased)
:param n_deconv: int, number of deconvolution layer in generator (the number of filters is double reduced)
"""
self.z_dim = z_dim
self.output_dim = output_dim
self.c_dim = c_dim
self.df_dim = df_dim
self.gf_dim = gf_dim
self.dfc_dim = dfc_dim
self.n_conv = n_conv
self.n_deconv = n_deconv
self.batch_size = batch_size
self._build_model()
def _build_model(self):
# input
self.z = tf.placeholder(tf.float32, shape=[self.batch_size, self.z_dim])
self.x = tf.placeholder(tf.float32, shape=[self.batch_size, self.output_dim,
self.output_dim, self.c_dim])
# G
self.G = self._generator(self.z)
# D
self.D1, d1_logits = self._discriminator(self.x, reuse=False)
self.D2, d2_logits = self._discriminator(self.G, reuse=True)
self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(d2_logits, tf.ones_like(self.D2)))
real_loss = tf.nn.sigmoid_cross_entropy_with_logits(d1_logits, tf.ones_like(self.D1))
fake_loss = tf.nn.sigmoid_cross_entropy_with_logits(d2_logits, tf.zeros_like(self.D2))
self.d_loss = tf.reduce_mean(real_loss + fake_loss)
vars = tf.trainable_variables()
self.d_vars = [v for v in vars if "D" in v.name]
self.g_vars = [v for v in vars if "G" in v.name]
def _discriminator(self, input, reuse=False):
with tf.variable_scope("D", reuse=reuse):
h = lrelu(conv2d(input, nb_filter=self.df_dim, name="d_conv0"))
for i in range(1, self.n_conv):
conv = conv2d(h, nb_filter=self.df_dim*(2**i), name="d_conv{0}".format(i))
h = lrelu(batch_norm(conv, name="d_bn{0}".format(i)))
h = linear(tf.reshape(h, shape=[self.batch_size, -1]), self.dfc_dim, name="d_lin0")
h = linear(tf.nn.tanh(h), 1, name="d_lin1")
return tf.nn.sigmoid(h), h
def _generator(self, input):
with tf.variable_scope("G"):
nb_fliters = [self.gf_dim]
f_size = [self.output_dim//2]
for i in range(1, self.n_deconv):
nb_fliters.append(nb_fliters[-1]*2)
f_size.append(f_size[-1]//2)
h = linear(input, nb_fliters[-1]*f_size[-1]*f_size[-1], name="g_lin0")
h = tf.nn.relu(batch_norm(tf.reshape(h, shape=[-1, f_size[-1], f_size[-1], nb_fliters[-1]]),
name="g_bn0"))
for i in range(1, self.n_deconv):
h = deconv2d(h, [self.batch_size, f_size[-i-1], f_size[-i-1], nb_fliters[-i-1]],
name="g_deconv{0}".format(i-1))
h = tf.nn.relu(batch_norm(h, name="g_bn{0}".format(i)))
h = deconv2d(h, [self.batch_size, self.output_dim, self.output_dim, self.c_dim],
name="g_deconv{0}".format(self.n_deconv-1))
return tf.nn.tanh(h)
def combine_images(images):
"""Combine the bacth images"""
num = images.shape[0]
width = int(np.sqrt(num))
height = int(np.ceil(num/width))
h, w = images.shape[1:-1]
img = np.zeros((height*h, width*w), dtype=images.dtype)
for index, m in enumerate(images):
i = int(index/width)
j = index % width
img[i*h:(i+1)*h, j*w:(j+1)*w] = m[:, :, 0]
return img
if __name__ == "__main__":
# Load minist data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = (np.asarray(X_train, dtype=np.float32) - 127.5)/127.5
X_train = np.reshape(X_train, [-1, 28, 28, 1])
z_dim = 100
batch_size = 128
lr = 0.0002
n_epochs = 10
sess = tf.Session()
dcgan = DCGAN(z_dim=z_dim, output_dim=28, batch_size=128, c_dim=1)
# The optimizers
d_train_op = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(dcgan.d_loss,
var_list=dcgan.d_vars)
g_train_op = tf.train.AdamOptimizer(lr, beta1=0.5).minimize(dcgan.g_loss,
var_list=dcgan.g_vars)
sess.run(tf.global_variables_initializer())
num_batches = int(len(X_train)/batch_size)
for epoch in range(n_epochs):
print("Epoch", epoch)
d_losses = 0
g_losses = 0
for idx in range(num_batches):
# Train D
z = np.random.uniform(-1, 1, size=[batch_size, z_dim])
x = X_train[idx*batch_size:(idx+1)*batch_size]
_, d_loss = sess.run([d_train_op, dcgan.d_loss], feed_dict={dcgan.z: z,
dcgan.x: x})
d_losses += d_loss/num_batches
# Train G
z = np.random.uniform(-1, 1, size=[batch_size, z_dim])
_, g_loss = sess.run([g_train_op, dcgan.g_loss], feed_dict={dcgan.z: z})
g_losses += g_loss/num_batches
print("\td_loss {0}, g_loss {1}".format(d_losses, g_losses))
# Generate images
z = np.random.uniform(-1, 1, size=[batch_size, z_dim])
images = sess.run(dcgan.G, feed_dict={dcgan.z: z})
img = combine_images(images)
img = img*127.5 + 127.5
Image.fromarray(img.astype(np.uint8)).save("epoch{0}_g_images.png".format(epoch))