-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathnet.py
82 lines (71 loc) · 5.33 KB
/
net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from __future__ import division
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.contrib.layers.python.layers import utils
import numpy as np
SCALING = 1.0
BIAS = 0.
def resize_like(inputs, ref):
iH, iW = inputs.get_shape()[1], inputs.get_shape()[2]
rH, rW = ref.get_shape()[1], ref.get_shape()[2]
if iH == rH and iW == rW:
return inputs
return tf.image.resize_nearest_neighbor(inputs, [rH.value, rW.value])
def plane_pred_net(tgt_image, num_plane, is_training=True):
H = tgt_image.get_shape()[1].value
W = tgt_image.get_shape()[2].value
with tf.variable_scope('depth_net') as sc: # design the nn architecture for the depth network
end_points_collection = sc.original_name_scope + '_end_points'
with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], #define a conv2d operator with fixed params shown below
normalizer_fn=None,
weights_regularizer=slim.l2_regularizer(0.05), # using l2 regularizer with 0.05 weight
activation_fn=tf.nn.relu,
outputs_collections=end_points_collection):
#for slim.conv2d the default padding mode = 'same'
cnv1 = slim.conv2d(tgt_image, 32, [7, 7], stride=2, scope='cnv1') #4*96*160*32
cnv1b = slim.conv2d(cnv1, 32, [7, 7], stride=1, scope='cnv1b')
cnv2 = slim.conv2d(cnv1b, 64, [5, 5], stride=2, scope='cnv2') #4*48*80*64
cnv2b = slim.conv2d(cnv2, 64, [5, 5], stride=1, scope='cnv2b')
cnv3 = slim.conv2d(cnv2b, 128, [3, 3], stride=2, scope='cnv3') #4*24*40*128
cnv3b = slim.conv2d(cnv3, 128, [3, 3], stride=1, scope='cnv3b')
cnv4 = slim.conv2d(cnv3b, 256, [3, 3], stride=2, scope='cnv4') #4*12*20*256
cnv4b = slim.conv2d(cnv4, 256, [3, 3], stride=1, scope='cnv4b')
cnv5 = slim.conv2d(cnv4b, 512, [3, 3], stride=2, scope='cnv5')
cnv5b = slim.conv2d(cnv5, 512, [3, 3], stride=1, scope='cnv5b') # 4*6*10*256
with tf.variable_scope('param'):
cnv6_plane = slim.conv2d(cnv5b, 512, [3, 3], stride=2, scope='cnv6_plane') # 4*3*5*256
cnv7_plane = slim.conv2d(cnv6_plane, 512, [3, 3], stride=2, scope='cnv7_plane') # 4*2*3*256
param_pred = slim.conv2d(cnv7_plane, 3*(num_plane), [1, 1], scope='param', # 4*2*3*3n
stride=1, normalizer_fn=None, activation_fn=None)
param_avg = tf.reduce_mean(param_pred, [1, 2]) #4*3n
# Empirically we found that scaling by a small constant facilitates training.
param_final = 0.01 * tf.reshape(param_avg, [-1, (num_plane), 3]) #4*n*3, 2 for n planes in tgt, B*n*num_param
with tf.variable_scope('mask'):
upcnv5 = slim.conv2d_transpose(cnv5b, 256, [3, 3], stride=2, scope='upcnv5')
i5_in = tf.concat([upcnv5, cnv4b], axis=3)
icnv5 = slim.conv2d(i5_in, 256, [3, 3], stride=1, scope='icnv5') # 4*12*20*256
upcnv4 = slim.conv2d_transpose(icnv5, 128, [3, 3], stride=2, scope='upcnv4') # 4*24*40*128
i4_in = tf.concat([upcnv4, cnv3b], axis=3)
icnv4 = slim.conv2d(i4_in, 128, [3, 3], stride=1, scope='icnv4')
segm4 = SCALING * slim.conv2d(icnv4, num_plane + 1, [3, 3], stride=1,
activation_fn=None, normalizer_fn=None, scope='disp4') + BIAS # 4*24*40*(1+n)
segm4_up = tf.image.resize_bilinear(segm4, [np.int(H/4), np.int(W/4)])
upcnv3 = slim.conv2d_transpose(icnv4, 64, [3, 3], stride=2, scope='upcnv3') # 4*48*80*64
i3_in = tf.concat([upcnv3, cnv2b, segm4_up], axis=3)
icnv3 = slim.conv2d(i3_in, 64, [3, 3], stride=1, scope='icnv3')
segm3 = SCALING * slim.conv2d(icnv3, num_plane + 1, [3, 3], stride=1, #4*48*80*(1+n)
activation_fn=None, normalizer_fn=None, scope='disp3') + BIAS
segm3_up = tf.image.resize_bilinear(segm3, [np.int(H/2), np.int(W/2)])
upcnv2 = slim.conv2d_transpose(icnv3, 32, [3, 3], stride=2, scope='upcnv2') # 4*96*160*32
i2_in = tf.concat([upcnv2, cnv1b, segm3_up], axis=3)
icnv2 = slim.conv2d(i2_in, 32, [3, 3], stride=1, scope='icnv2')
segm2 = SCALING * slim.conv2d(icnv2, num_plane + 1, [3, 3], stride=1, #4*96*160*(n+1)
activation_fn=None, normalizer_fn=None, scope='disp2') + BIAS
segm2_up = tf.image.resize_bilinear(segm2, [H, W])
upcnv1 = slim.conv2d_transpose(icnv2, 16, [3, 3], stride=2, scope='upcnv1') #4*192*320*16
i1_in = tf.concat([upcnv1, segm2_up], axis=3)
icnv1 = slim.conv2d(i1_in, 16, [3, 3], stride=1, scope='icnv1') #4*192*320*(n+1)
segm1 = SCALING * slim.conv2d(icnv1, num_plane + 1, [3, 3], stride=1,
activation_fn=None, normalizer_fn=None, scope='disp1') + BIAS
end_points = utils.convert_collection_to_dict(end_points_collection)
return param_final, [segm1, segm2, segm3, segm4], end_points