-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautoencoder.py
179 lines (153 loc) · 7.07 KB
/
autoencoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import tensorflow as tf
import numpy as np
import functools
def lazy_property(function):
'''
Danijar Hafner:
https://danijar.com/
https://gist.github.com/danijar
'''
attribute = '_cache_' + function.__name__
@property
@functools.wraps(function)
def decorator(self):
if not hasattr(self, attribute):
setattr(self, attribute, function(self))
return getattr(self, attribute)
return decorator
class cae:
'''
TensorFlow Model for a 2D convolutional autoencoder.
Model parameters:
self.videoData - placeholder for Video we want to compress
self.conv1_numNodes1 - number of nodes in 1st convolutional layer
self.conv1_kernel - kernel size for 1st convolutional layer
self.conv1_activation - activation function for 1st convolutional layer
self.conv2_numNodes2 - number of nodes in the 2nd convolutional layer
self.conv2_activation - activation function for 2nd convolutional layer
self.deconv1_activation - activation function for 1st deconvolutional layer
self.deconv2_activation - activation functino for 2nd deconvolutional layer
self.learning_rate - learning rate for our optimizer
self.global_step - keeps track of global step
Model Parts:
self.encoder - output from our 2D convolutional autoencoder
self.optimizer - Gradient Descent Optimizer with a MSE loss function
self.error - sum of l2 norm of the difference between the predicted and actual frame
'''
def __init__(self, videoData, numNodes1, kernel1, activ1, \
numNodes2, activ2, dactiv1, dactiv2, alpha, debug=False):
'''
Defining Tensorflow model properties
Inputs:
videoData - Video Data placeholder which we will train over, shape [batch_size, height, width, channels]
numNodes1 - 1st convolutional layer nodes
kernel1 - 1st convolutional layer kernel size
activ1 - 1st convolutional layer activiation function
numNodes2 - 2nd convolutional layer nodes
activ2 - 2nd convolutional layer activation function
dactiv1 - 1st deconvoltional activation fucntion
dactiv2 - 2nd deconvolitional activation function
alpha - learning rate for optimizer
'''
self.videoData = videoData
self.conv1_numNodes = numNodes1
self.conv1_kernel = kernel1
self.conv1_activation = activ1
self.conv2_numNodes = numNodes2
self.conv2_activation = activ2
self.deconv1_activation = dactiv1
self.deconv2_activation = dactiv2
self.learning_rate = alpha
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
self.debug = debug
self.encoder
self.error
self.optimizer
@lazy_property
def encoder(self):
''''
Our 2D convolutional AutoEncoder
'''
input_layer = self.videoData #tf.placeholder(tf.float32, shape = self.videoData.get_shape() )
# Convolutional API: https://www.tensorflow.org/api_docs/python/tf/layers/conv2d
# input shape is [batch, in_height, in_width, in_channels]
# Convolutional Layer #1
conv1 = tf.layers.conv2d(
inputs = input_layer,
filters = self.conv1_numNodes,
kernel_size = self.conv1_kernel,
padding = 'valid',
activation = self.conv1_activation )
if( self.debug ):
print( 'Conv1 shape ', conv1.get_shape().as_list())
# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) #going to leave as default, strides tell overlap (?)
#Kernel size for 2nd convolution is determined by 1st convolution
poolShape = pool1.get_shape().as_list() #gets tensorshape
conv2_kernel = [ poolShape[1], poolShape[2] ]
if( self.debug ):
print( 'Pool shape ', poolShape)
# Convolutional Layer #2
conv2 = tf.layers.conv2d(
inputs=pool1,
filters = self.conv2_numNodes,
kernel_size = conv2_kernel,
padding = 'valid',
activation = self.conv1_activation )
# OutPut shape of second convolutional layer
conv2Shape = conv2.get_shape().as_list() #gets tensorshape
kernelD1 = [ 1, conv2Shape[2] ]
if( self.debug ):
print('conv2 output shape ',conv2Shape )
# Deconvolution API: https://www.tensorflow.org/api_docs/python/tf/layers/conv2d_transpose
# Unpooling Ideas: https://github.com/tensorflow/tensorflow/issues/2169
# Deconvolution 1
deconv1 = tf.layers.conv2d_transpose(
inputs = conv2,
filters = conv2Shape[3],
kernel_size = kernelD1,
strides = (2, 2),
activation = self.deconv1_activation )
# Depooling, previous deconvolution should have taken care of
# Calc new kernel size such that we get original frame shape back
deconv1Shape = deconv1.get_shape().as_list()
nb_row = self.videoData.get_shape().as_list()[1] - deconv1Shape[1]+1
nb_column = self.videoData.get_shape().as_list()[2] - deconv1Shape[2]+1
kernelD2 = [ nb_row, nb_column ]
if( self.debug ):
print( 'deconve1 output shape ', deconv1Shape )
print('kernel deconv 2', kernelD2 )
# Deconvolution 2
deconv2 = tf.layers.conv2d_transpose(
inputs = deconv1,
filters = self.videoData.get_shape()[3],
kernel_size = kernelD2,
strides = (1, 1),
activation = self.deconv2_activation)
if(self.debug):
print('deconv 2 output shape ', deconv2.get_shape().as_list() )
encoder = deconv2
return encoder
@lazy_property
def optimizer(self):
'''
The optimizer to use for our autoencoder, using MSE as loss function
'''
# predictions - predicted output of model
# labels - ground truth output tensor, needs to be same dimension as predictions
loss = tf.losses.mean_squared_error( predictions=self.encoder, labels=self.videoData)
#optimize = tf.train.GradientDescentOptimizer( self.learning_rate )
optimize = tf.train.AdamOptimizer( self.learning_rate )
optimizer = optimize.minimize(loss, global_step=self.global_step)
return optimizer
@lazy_property
def error(self):
'''
Calculates the l2 error of the encoder during training.
'''
# Function API: https://www.tensorflow.org/api_docs/python/tf/global_norm
# Want to calc l2 norm of the difference, to see how closely approximating
#difference = [self.encoder - self.videoData]
#error = tf.global_norm( difference )
error = tf.losses.mean_squared_error( predictions=self.encoder, labels=self.frame )
return error