-
Notifications
You must be signed in to change notification settings - Fork 0
/
deconv.py
173 lines (139 loc) · 6.42 KB
/
deconv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# function: classify new image using trained model, and visualize the feature map through convolution and deconvolution
# author: Qian Yang
# date: 2017/9/7
import numpy as np
import sys
import caffe
from skimage import io
import matplotlib.pyplot as plt
import FindMaxes as fm
#---------------------parameters--------------------------
caffe.set_mode_gpu()
model_path = '/home/qy/documents/caffe/examples/compact_bilinear/deconv/ft_last_layer_deconv_deploy.prototxt'
invmodel_path = '/home/qy/documents/caffe/examples/compact_bilinear/deconv/invdeploy_norelu.prototxt'
weight_path = '/home/qy/documents/caffe/examples/compact_bilinear/snapshot_iter_60000_last_layer.caffemodel'
image_name = '/home/qy/documents/PythonPrj/CaffePy/val.txt'
image_dir = '/home/qy/documents/PythonPrj/CaffePy/val_resize/'
plane_name = ['Airbus A300', 'Airbus A320', 'Hydroglider', 'Gyroplane', 'Old-fashioned propeller', \
'Airship', 'Helicopter', 'Jet aircraft', 'UAV', 'Turboprop']
# calculation of receptive field:
# bilinear cnn: conv1_1,conv1_2,pool1,conv2_1,conv2_2,pool2,conv3_1,conv3_2,conv3_3,pool3,conv4_1,conv4_2,conv4_3,pool4,conv5_1,conv5_2,conv5_3
# all convolutional layers have kernel size of 3*3, with appropriate padding to keep the size of feature maps unchanged
# all pooling layers have have kernel size of 2*2 and stride equals 2.
# therefore, the size of receptive field for one point on the feature map in layer conv5_3 is ((((1+2*3)*2+2*3)*2+2*3)*2+2*2)*2+2*2=196
receptive_field = 196
#----------------------------------------------------
# visualize the feature map in form of grid
def vis_square(data):
# normalization
data = (data - data.min()) / (data.max() - data.min())
# calculate the number of grid in each side
n = int(np.ceil(np.sqrt(data.shape[0])))
padding = (((0, n**2 - data.shape[0]), (0, 1), (0, 1))+((0, 0), )*(data.ndim - 3))
data = np.pad(data, padding, mode='constant', constant_values=1)
data = data.reshape((n,n) + data.shape[1:]).transpose((0, 2, 1, 3)+tuple(range(4, data.ndim + 1)))
data = data.reshape((n*data.shape[1], n*data.shape[3]) + data.shape[4:])
return data
def process(net, img_path):
# parameters of image preprocessing
transformer = caffe.io.Transformer({'data':net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
transformer.set_raw_scale('data', 255)
transformer.set_channel_swap('data', (2,1,0))
mu = np.load('/home/qy/documents/caffe/data/plane/plane_mean2.npy')
mu = mu.mean(1).mean(1)
transformer.set_mean('data', mu)
# reshape
net.blobs['data'].reshape(1, 3, 448, 448)
# load image
img = caffe.io.load_image(img_path)
# crop
h_off = (img.shape[0] - 448)/2
w_off = (img.shape[1] - 448)/2
img = img[h_off:h_off+448, w_off:w_off+448,:]
# begin to preprocess
net.blobs['data'].data[...] = transformer.preprocess('data', img)
# run
out = net.forward()
# visualize
n_top = 9
max_activation_point = fm.MaxActivationPoint(n_top)
data = net.blobs['conv5_3'].data
max_activation_point.update(data)
feat = data[0,max_activation_point.max_feature_map.max_vals_ind] #
data = vis_square(feat)
# image matting
loc = max_activation_point.max_loc
height = img.shape[0]
width = img.shape[1]
plt.figure(1)
grid_num = int(np.ceil(np.sqrt(n_top)))
for i in range(n_top):
tlx = max(loc[i,0]*16-receptive_field/2, 0)
brx = min(loc[i,0]*16+receptive_field/2,height)
tly = max(loc[i,1]*16-receptive_field/2, 0)
bry = min(loc[i,1]*16+receptive_field/2,width)
p = plt.subplot(grid_num,grid_num,i)
p.imshow(img[tlx:brx, tly:bry, :])
p.axis('off')
plt.figure(2)
plt.imshow(data)
plt.axis('off')
plt.figure(3)
plt.imshow(img)
plt.axis('off')
return out['prob'][0],transformer
def norm(x, s=1.0):
x -= x.min()
x /= x.max()
return x*s
#------------initialize-------------------
net = caffe.Net(model_path, weight_path, caffe.TEST) # load original network
invnet = caffe.Net(invmodel_path, caffe.TEST) # load deconvolutional network
image_name_list = np.loadtxt(image_name, str, delimiter=' ') # load image list
correctNum = 0 # record the number of samples which are classified into correct categories
plt.ion() # interactive mode
index = np.arange(image_name_list.shape[0])
prob = np.zeros((10,10)) # record the output probability
#np.random.shuffle(index) # shuffle
#-----------------------------------------
#--------------set parameters of deconv network----------------
for b in invnet.params:
if b!='conv5_3': # assign the parameters to the deconvolutional network except the input layer
invnet.params[b][0].data[...] = net.params[b][0].data.reshape(invnet.params[b][0].data.shape)
#------------------------------------------------------
for i in index:
img = image_dir + image_name_list[i][0]
label = image_name_list[i][1]
out_prob, transformer = process(net, img)
pred = out_prob.argmax()
#------------------feed input for deconv network----------------
data = net.blobs['conv5_3'].data
# just adopt one feature map with the strongest activation for deconvolution
max_feature_map = fm.MaxFeatureMap(9)
max_feature_map.update(data)
feat = data[:,max_feature_map.max_vals_ind[0]]
# assign the parameter to the input layer
invnet.params['conv5_3'][0].data[...] = net.params['conv5_3'][0].data[:,max_feature_map.max_vals_ind[0]].reshape(invnet.params['conv5_3'][0].data.shape)
# just filter the location with weaker activation
#feat[0][feat[0] < 100] = 0
invnet.blobs['conv5_3'].data[...] = feat
invnet.blobs['switches4'].data[...] = net.blobs['switches4'].data
invnet.blobs['switches3'].data[...] = net.blobs['switches3'].data
invnet.blobs['switches2'].data[...] = net.blobs['switches2'].data
invnet.blobs['switches1'].data[...] = net.blobs['switches1'].data
invnet.forward()
feat = norm(invnet.blobs['img'].data[0], 255.0)
plt.figure(4)
plt.imshow(transformer.deprocess('data', feat))
plt.figure(5)
plt.title('label: '+label)
top_k = 5
out_prob_ind = out_prob.argsort()[::-1]
out_prob = np.sort(out_prob)[::-1]
for k in range(top_k):
plt.text(0.4,0.8-0.1*k,str(out_prob_ind[k])+': '+str(out_prob[k]))
plt.show()
plt.waitforbuttonpress()
plt.clf()
print('test accuracy: %f' % (float(correctNum)/image_name_list.shape[0]))