From 31a30bb3b5aae1700169596b8d0b7aecff6cdefa Mon Sep 17 00:00:00 2001 From: Lv Tao Date: Thu, 5 Apr 2018 22:27:46 +0800 Subject: [PATCH 1/4] add ssd benchmark --- example/ssd/benchmark_score.py | 102 +++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 example/ssd/benchmark_score.py diff --git a/example/ssd/benchmark_score.py b/example/ssd/benchmark_score.py new file mode 100644 index 000000000000..ded6352ec797 --- /dev/null +++ b/example/ssd/benchmark_score.py @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import print_function +import os +import sys +import argparse +import importlib +import mxnet as mx +import time +#from dataset.iterator import DetRecordIter +#from config.config import cfg +#from evaluate.eval_metric import MApMetric, VOC07MApMetric +import logging +from symbol.symbol_factory import get_symbol +from symbol.symbol_factory import get_symbol_train +from symbol import symbol_builder + + +parser = argparse.ArgumentParser(description='MxNet SSD benchmark') +parser.add_argument('--network', '-n', type=str, default='vgg16_reduced') +parser.add_argument('--batch_size', '-b', type=int, default=0) +parser.add_argument('--shape', '-w', type=int, default=300) +parser.add_argument('--class_num', '-class', type=int, default=20) + + +def get_data_shapes(batch_size): + image_shape = (3, 300, 300) + return [('data', (batch_size,)+image_shape)] + +def get_data(batch_size): + data_shapes = get_data_shapes(batch_size) + data = [mx.random.uniform(-1.0, 1.0, shape=shape, ctx=mx.cpu()) for _, shape in data_shapes] + batch = mx.io.DataBatch(data, []) + return batch + + +if __name__ == '__main__': + args = parser.parse_args() + network = args.network + image_shape = args.shape + num_classes = args.class_num + b = args.batch_size + supported_image_shapes = [300, 512] + supported_networks = ['vgg16_reduced', 'inceptionv3', 'resnet50'] + + if network not in supported_networks: + raise Exception(network + " is not supported") + + if image_shape not in supported_image_shapes: + raise Exception("Image shape should be either 300*300 or 512*512!") + + if b == 0: + batch_sizes = [1, 4, 8, 16, 32, 64, 128] + else: + batch_sizes = [b] + + data_shape = (3, image_shape, image_shape) + net = get_symbol(network, data_shape[1], num_classes=num_classes, + nms_thresh=0.4, force_suppress=True) + + num_batches = 100 + dry_run = 5 # use 5 iterations to warm up + + for bs in batch_sizes: + batch = get_data(bs) + mod = mx.mod.Module(net, label_names=None, context=mx.cpu()) + mod.bind(for_training = False, + inputs_need_grad = False, + data_shapes = get_data_shapes(bs)) + mod.init_params(initializer=mx.init.Xavier(magnitude=2.)) + + # get data + data = [mx.random.uniform(-1.0, 1.0, shape=shape, ctx=mx.cpu()) for _, shape in mod.data_shapes] + batch = mx.io.DataBatch(data, []) + + for i in range(dry_run + num_batches): + if i == dry_run: + tic = time.time() + mod.forward(batch, is_train=False) + for output in mod.get_outputs(): + output.wait_to_read() + + avg_time = (time.time() - tic) / num_batches + fps = bs / avg_time + print("SSD-" + network + " with " + str(num_classes) + " classes and shape " + str(data_shape)) + print("batchsize=" + str(bs) + " " + str(1000*avg_time) + " ms") + print("batchsize=" + str(bs) + " " + str(fps) + " imgs/s") From b58585ac0763ba58edf773067d0f80cb3506d305 Mon Sep 17 00:00:00 2001 From: Lv Tao Date: Thu, 5 Apr 2018 22:28:29 +0800 Subject: [PATCH 2/4] optimize MultiBoxDetectionForward --- src/operator/contrib/multibox_detection.cc | 41 ++++++++++++++++------ 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/src/operator/contrib/multibox_detection.cc b/src/operator/contrib/multibox_detection.cc index a2e681a8e603..09bac42bd2a7 100644 --- a/src/operator/contrib/multibox_detection.cc +++ b/src/operator/contrib/multibox_detection.cc @@ -96,11 +96,16 @@ inline void MultiBoxDetectionForward(const Tensor &out, const int num_anchors = cls_prob.size(2); const int num_batches = cls_prob.size(0); const DType *p_anchor = anchors.dptr_; + + const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount(); + std::vector outputs; + outputs.reserve(num_anchors * 6); for (int nbatch = 0; nbatch < num_batches; ++nbatch) { const DType *p_cls_prob = cls_prob.dptr_ + nbatch * num_classes * num_anchors; const DType *p_loc_pred = loc_pred.dptr_ + nbatch * num_anchors * 4; DType *p_out = out.dptr_ + nbatch * num_anchors * 6; - int valid_count = 0; + +#pragma omp parallel for num_threads(omp_threads) for (int i = 0; i < num_anchors; ++i) { // find the predicted class id and probability DType score = -1; @@ -112,20 +117,33 @@ inline void MultiBoxDetectionForward(const Tensor &out, id = j; } } + if (id > 0 && score < threshold) { id = 0; } - if (id > 0) { - // [id, prob, xmin, ymin, xmax, ymax] - p_out[valid_count * 6] = id - 1; // remove background, restore original id - p_out[valid_count * 6 + 1] = (id == 0 ? DType(-1) : score); - int offset = i * 4; - TransformLocations(p_out + valid_count * 6 + 2, p_anchor + offset, - p_loc_pred + offset, clip, variances[0], variances[1], - variances[2], variances[3]); + + // [id, prob, xmin, ymin, xmax, ymax] + outputs[i * 6] = id - 1; + outputs[i * 6 + 1] = score; + int offset = i * 4; + TransformLocations(outputs.data() + i * 6 + 2, p_anchor + offset, p_loc_pred + offset, clip, + variances[0], variances[1], variances[2], variances[3]); + } + + int valid_count = 0; + for (int i = 0; i < num_anchors; ++i) { + int offset1 = valid_count * 6; + int offset2 = i * 6; + if (outputs[offset2] >= 0) { + p_out[offset1] = outputs[offset2]; + p_out[offset1 + 1] = outputs[offset2 + 1]; + p_out[offset1 + 2] = outputs[offset2 + 2]; + p_out[offset1 + 3] = outputs[offset2 + 3]; + p_out[offset1 + 4] = outputs[offset2 + 4]; + p_out[offset1 + 5] = outputs[offset2 + 5]; ++valid_count; } - } // end iter num_anchors + } if (valid_count < 1 || nms_threshold <= 0 || nms_threshold > 1) continue; @@ -138,6 +156,7 @@ inline void MultiBoxDetectionForward(const Tensor &out, sorter.push_back(SortElemDescend(p_out[i * 6 + 1], i)); } std::stable_sort(sorter.begin(), sorter.end()); + // re-order output DType *ptemp = temp_space.dptr_ + nbatch * num_anchors * 6; int nkeep = static_cast(sorter.size()); @@ -149,7 +168,9 @@ inline void MultiBoxDetectionForward(const Tensor &out, p_out[i * 6 + j] = ptemp[sorter[i].index * 6 + j]; } } + // apply nms +#pragma omp parallel for num_threads(omp_threads) for (int i = 0; i < valid_count; ++i) { int offset_i = i * 6; if (p_out[offset_i] < 0) continue; // skip eliminated From 6cf234f6355854e8bcac65f4e7416f2c3ac22bea Mon Sep 17 00:00:00 2001 From: Lv Tao Date: Tue, 10 Apr 2018 17:26:08 +0800 Subject: [PATCH 3/4] update default batch sizes for ssd benchmark --- example/ssd/benchmark_score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/ssd/benchmark_score.py b/example/ssd/benchmark_score.py index ded6352ec797..2c72db7c4d38 100644 --- a/example/ssd/benchmark_score.py +++ b/example/ssd/benchmark_score.py @@ -65,7 +65,7 @@ def get_data(batch_size): raise Exception("Image shape should be either 300*300 or 512*512!") if b == 0: - batch_sizes = [1, 4, 8, 16, 32, 64, 128] + batch_sizes = [1, 2, 4, 8, 16, 32] else: batch_sizes = [b] From 8449f7d2edc85e057f3abc8bed59d4d3440bcdec Mon Sep 17 00:00:00 2001 From: Lv Tao Date: Wed, 11 Apr 2018 09:30:24 +0800 Subject: [PATCH 4/4] remove commented python code --- example/ssd/benchmark_score.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/example/ssd/benchmark_score.py b/example/ssd/benchmark_score.py index 2c72db7c4d38..6af1b217e21a 100644 --- a/example/ssd/benchmark_score.py +++ b/example/ssd/benchmark_score.py @@ -22,10 +22,8 @@ import importlib import mxnet as mx import time -#from dataset.iterator import DetRecordIter -#from config.config import cfg -#from evaluate.eval_metric import MApMetric, VOC07MApMetric import logging + from symbol.symbol_factory import get_symbol from symbol.symbol_factory import get_symbol_train from symbol import symbol_builder