From 4ea200e8f1bf5cefe538f8d2409960c0db5ab561 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Mon, 24 Jun 2019 12:22:10 +0800 Subject: [PATCH 01/21] clear cache when tid == 1 and cache size exceeds max capacity test=develop --- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 8 ++++--- .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 13 +++++++---- paddle/fluid/platform/device_context.cc | 23 +++++++++++++++---- paddle/fluid/platform/device_context.h | 2 +- 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 647e09a92911e..e20dfb3568275 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -221,6 +221,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { user_weights_memory_p, pipeline, is_test); std::shared_ptr dst_memory_p; + std::shared_ptr user_residual_memory_p; if (fuse_residual_conn) { auto residual_param = ctx.Input("ResidualData"); @@ -243,7 +244,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { auto user_residual_md = platform::MKLDNNMemDesc( residual_data_tz, residual_data_type, residual_param->format()); - auto user_residual_memory_p = handler.AcquireResidualDataMemory( + user_residual_memory_p = handler.AcquireResidualDataMemory( user_residual_md, to_void_cast(residual_param_data)); dst_memory_p = handler.AcquireDstMemoryFromResidualDataMemory( @@ -263,14 +264,15 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { // create convolution op primitive std::shared_ptr conv_p; + std::shared_ptr user_bias_memory_p, bias_memory_p; if (bias) { const T* bias_data = bias->data(); auto user_bias_md = platform::MKLDNNMemDesc( {bias_tz}, platform::MKLDNNGetDataType(), memory::format::x); - auto user_bias_memory_p = + user_bias_memory_p = handler.AcquireBiasMemory(user_bias_md, to_void_cast(bias_data)); - auto bias_memory_p = + bias_memory_p = handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline); conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p, bias_memory_p, dst_memory_p); diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index c635fd11c37ae..1ceedc63d8100 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -128,6 +128,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { const std::string key_pool_workspace_memory = key + "@pool_workspace_memory"; + std::shared_ptr src_memory, dst_memory; + std::shared_ptr pool_pd; + std::shared_ptr pool_src_memory_p, pool_dst_memory_p; auto pool_p = std::static_pointer_cast(dev_ctx.GetBlob(key_pool_p)); if (pool_p == nullptr) { @@ -150,7 +153,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { auto propagation = src_md.data.data_type == mkldnn_f32 ? mkldnn::prop_kind::forward_training : mkldnn::prop_kind::forward_scoring; - std::shared_ptr pool_pd = + pool_pd = CreatePrimitiveDesc(src_md, dst_md, propagation, strides, padding_left_top, padding_right_bottom, ksize, pooling_type, mkldnn_engine, ceil_mode, is_test); @@ -158,9 +161,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { // save pool_pd into global device context to be referred in backward path if (!is_test) dev_ctx.SetBlob(key_pool_pd, pool_pd); - auto src_memory = std::make_shared(pool_pd->src_primitive_desc(), - to_void_cast(input_data)); - auto dst_memory = + src_memory = std::make_shared(pool_pd->src_primitive_desc(), + to_void_cast(input_data)); + dst_memory = std::make_shared(pool_pd->dst_primitive_desc(), output_data); dev_ctx.SetBlob(key_pool_src_mem_p, src_memory); @@ -186,7 +189,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { (memory::format)dst_memory->get_primitive_desc().desc().data.format; } else { // Primitives already exist - auto pool_src_memory_p = + pool_src_memory_p = std::static_pointer_cast(dev_ctx.GetBlob(key_pool_src_mem_p)); PADDLE_ENFORCE(pool_src_memory_p != nullptr, "Fail to find pooling src mem_p in device context"); diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 4f048d44685a8..7ba3a7a52bd13 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -407,6 +407,8 @@ thread_local int cur_thread_id = 0; void set_cur_thread_id(int tid) { cur_thread_id = tid; } int get_cur_thread_id(void) { return cur_thread_id; } +#define MKLDNN_CAP 100 +#define MKLDNN_CLEAR_PERCENTAGE 10 void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr data) const { @@ -429,14 +431,23 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } // Find Key in found (or newly created) KeyBlob - auto key_it = pBlob->find(name); + auto key_it = std::find_if( + pBlob->begin(), pBlob->end(), + [=](std::pair> const& obj) { + return obj.first == name; + }); if (key_it == pBlob->end()) { - (*pBlob)[name] = data; // create new blob + if ((tid == 1) && (pBlob->size() >= MKLDNN_CAP)) { + VLOG(3) << "remove head " << pBlob->begin()->first << " in SetBlob\n"; + pBlob->erase(pBlob->begin()); + // pBlob->clear(); + } + pBlob->push_back(std::make_pair(name, data)); } else { key_it->second = data; // set data to existing blob } - + VLOG(3) << "SetBlob " << name << "\n"; // lock will be automatically released when out of scope return; } @@ -456,7 +467,11 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( pBlob = map_it->second; // Find Blob via name - auto key_it = pBlob->find(name); + auto key_it = std::find_if( + pBlob->begin(), pBlob->end(), + [=](std::pair> const& obj) { + return obj.first == name; + }); if (key_it == pBlob->end()) return nullptr; diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 812181563e6e5..628273a110e3f 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -378,7 +378,7 @@ struct DefaultDeviceContextType { #endif #ifdef PADDLE_WITH_MKLDNN -using KeyBlob = std::unordered_map>; +using KeyBlob = std::vector>>; using BlobMap = std::unordered_map>; void set_cur_thread_id(int); From 3d2e563b94a15337bd6256711988b7d5f2fa393f Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Mon, 24 Jun 2019 17:45:31 +0800 Subject: [PATCH 02/21] add more logs to print blob status test=develop --- paddle/fluid/platform/device_context.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 7ba3a7a52bd13..831bf1eaf875f 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -439,7 +439,8 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, if (key_it == pBlob->end()) { if ((tid == 1) && (pBlob->size() >= MKLDNN_CAP)) { - VLOG(3) << "remove head " << pBlob->begin()->first << " in SetBlob\n"; + VLOG(3) << "SetBlob: tid=" << tid << ", remove head blob " + << pBlob->begin()->first << "\n"; pBlob->erase(pBlob->begin()); // pBlob->clear(); } @@ -447,7 +448,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } else { key_it->second = data; // set data to existing blob } - VLOG(3) << "SetBlob " << name << "\n"; + VLOG(3) << "SetBlob: tid=" << tid << ", add blob=" << name << "\n"; // lock will be automatically released when out of scope return; } @@ -463,7 +464,10 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( // Find KeyBlob for current thread firstly auto map_it = pMap->find(tid); - if (map_it == pMap->end()) return nullptr; + if (map_it == pMap->end()) { + VLOG(3) << "GetBlob: tid=" << tid << ", miss tid\n"; + return nullptr; + } pBlob = map_it->second; // Find Blob via name @@ -473,8 +477,12 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( return obj.first == name; }); - if (key_it == pBlob->end()) return nullptr; + if (key_it == pBlob->end()) { + VLOG(3) << "GetBlob tid=" << tid << ", miss blob=" << name << "\n"; + return nullptr; + } + VLOG(3) << "GetBlob tid=" << tid << ", get blob=" << name << "\n"; // lock will be automatically released when out of scope return key_it->second; } From 14c5b2ea03a8ec73bf803df9a639ace01e1d0b81 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Tue, 25 Jun 2019 13:29:40 +0800 Subject: [PATCH 03/21] 1. Add new interface in AnalysisConfig to set mkldnn thread id 2. Few fix in concat/pool mkldnn kernel for key generation 3. Enable cache clearing mechanism test=develop --- paddle/fluid/inference/api/analysis_config.cc | 10 ++++++ .../fluid/inference/api/analysis_predictor.cc | 35 +++++++++++++++++-- .../inference/api/paddle_analysis_config.h | 4 +++ .../operators/mkldnn/concat_mkldnn_op.cc | 7 ++++ .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 7 ++++ paddle/fluid/platform/device_context.cc | 7 ++-- paddle/fluid/platform/mkldnn_reuse.h | 3 ++ 7 files changed, 67 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 890c90697bcd5..b1221984f66b5 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -114,6 +114,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // MKLDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); + CP_MEMBER(mkldnn_thread_id_); // Quantization related. CP_MEMBER(use_mkldnn_quantizer_); CP_MEMBER(mkldnn_quantizer_config_); @@ -161,6 +162,15 @@ void AnalysisConfig::EnableMKLDNN() { Update(); } +void AnalysisConfig::SetMKLDNNThreadId(int id) { +#ifdef PADDLE_WITH_MKLDNN + mkldnn_thread_id_ = id; +#else + LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id"; + mkldnn_thread_id_ = 0; +#endif +} + void AnalysisConfig::EnableMkldnnQuantizer() { #ifdef PADDLE_WITH_MKLDNN if (!mkldnn_quantizer_config_) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 5d9d5a3178aaa..e5f1c87024cfc 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -197,6 +197,16 @@ bool AnalysisPredictor::Run(const std::vector &inputs, std::vector *output_data, int batch_size) { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + // TODO(intel): will refactor this code later + // Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case + VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id=" + << paddle::platform::get_cur_thread_id() + << ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n"; + if (paddle::platform::get_cur_thread_id() == 0) + paddle::platform::set_cur_thread_id(config_.mkldnn_thread_id_); +#endif + VLOG(3) << "Predictor::predict"; inference::Timer timer; timer.tic(); @@ -238,7 +248,13 @@ bool AnalysisPredictor::Run(const std::vector &inputs, // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); - +#ifdef PADDLE_WITH_MKLDNN + // TODO(intel): will refactor this code later + // reset thread id to avoid confusion when thread is reused from pool again + // mkldnn_thread_id_ = -1 is reserved for cache clearing mode only + if (paddle::platform::get_cur_thread_id() == -1) + paddle::platform::set_cur_thread_id(0); +#endif return true; } @@ -595,6 +611,15 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + // TODO(intel): will refactor this code later + // Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case + VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id=" + << paddle::platform::get_cur_thread_id() + << ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n"; + if (paddle::platform::get_cur_thread_id() == 0) + paddle::platform::set_cur_thread_id(config_.mkldnn_thread_id_); +#endif executor_->Run(); // Fix TensorArray reuse not cleaned bug. tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); @@ -603,7 +628,13 @@ bool AnalysisPredictor::ZeroCopyRun() { // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); - +#ifdef PADDLE_WITH_MKLDNN + // TODO(intel): will refactor this code later + // reset thread id to avoid confusion when thread is reused from pool again + // mkldnn_thread_id_ = -1 is reserved for cache clearing mode only + if (paddle::platform::get_cur_thread_id() == -1) + paddle::platform::set_cur_thread_id(0); +#endif return true; } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index e3682d27054a1..43fd321fa27ae 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -182,6 +182,9 @@ struct AnalysisConfig { /** A boolean state telling whether to use the MKLDNN. */ bool mkldnn_enabled() const { return use_mkldnn_; } + /** Set MKLDNN thread id. + */ + void SetMKLDNNThreadId(int id); /** Set and get the number of cpu math library threads. */ @@ -287,6 +290,7 @@ struct AnalysisConfig { bool use_ngraph_{false}; bool use_mkldnn_{false}; std::unordered_set mkldnn_enabled_op_types_; + int mkldnn_thread_id_{0}; bool model_from_memory_{false}; diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index a855ba8475a1b..ac9164a77f893 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -81,6 +81,13 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx, platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt)); platform::MKLDNNHandler::AppendKey(&key, std::to_string(multi_input[0]->format())); + if (platform::get_cur_thread_id() != -1) { + auto tid = std::this_thread::get_id(); + std::stringstream ss; + ss << tid; + platform::MKLDNNHandler::AppendKey(&key, "-t:"); + platform::MKLDNNHandler::AppendKey(&key, ss.str()); + } return key; } diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 1ceedc63d8100..5f797f3581ec5 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -48,6 +48,13 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx, platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt)); platform::MKLDNNHandler::AppendKey(&key, std::to_string(fmt)); platform::MKLDNNHandler::AppendKey(&key, suffix); + if (platform::get_cur_thread_id() != -1) { + auto tid = std::this_thread::get_id(); + std::stringstream ss; + ss << tid; + platform::MKLDNNHandler::AppendKey(&key, "-t:"); + platform::MKLDNNHandler::AppendKey(&key, ss.str()); + } return key; } diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 831bf1eaf875f..a54e3d4d6d9a3 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -407,8 +407,7 @@ thread_local int cur_thread_id = 0; void set_cur_thread_id(int tid) { cur_thread_id = tid; } int get_cur_thread_id(void) { return cur_thread_id; } -#define MKLDNN_CAP 100 -#define MKLDNN_CLEAR_PERCENTAGE 10 +#define MKLDNN_CAP 10000 void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr data) const { @@ -438,11 +437,11 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, }); if (key_it == pBlob->end()) { - if ((tid == 1) && (pBlob->size() >= MKLDNN_CAP)) { + // tid = -1 means cache clearing mode, MKLDNN_CAP defines max blob capacity + if ((tid == -1) && (pBlob->size() > MKLDNN_CAP)) { VLOG(3) << "SetBlob: tid=" << tid << ", remove head blob " << pBlob->begin()->first << "\n"; pBlob->erase(pBlob->begin()); - // pBlob->clear(); } pBlob->push_back(std::make_pair(name, data)); } else { diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index f1fb6b156aedc..76302d2bc4246 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -38,6 +38,9 @@ class MKLDNNHandler { std::stringstream ss; ss << tid; key_ = key_common_ + "-t:" + ss.str(); + if (platform::get_cur_thread_id() == -1) { + key_ = key_common_; + } } std::shared_ptr AcquireSrcMemory( From 29ca76079317b04dd8ca9f03501904f962776184 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Tue, 25 Jun 2019 14:09:48 +0800 Subject: [PATCH 04/21] change to use VLOG(2) test=develop --- paddle/fluid/inference/api/analysis_predictor.cc | 12 ++++++++---- paddle/fluid/platform/device_context.cc | 11 ++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e5f1c87024cfc..e839b6c2c93cf 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -200,7 +200,7 @@ bool AnalysisPredictor::Run(const std::vector &inputs, #ifdef PADDLE_WITH_MKLDNN // TODO(intel): will refactor this code later // Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case - VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id=" + VLOG(2) << "AnalysisPredictor::Run get_cur_thread_id=" << paddle::platform::get_cur_thread_id() << ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n"; if (paddle::platform::get_cur_thread_id() == 0) @@ -252,8 +252,10 @@ bool AnalysisPredictor::Run(const std::vector &inputs, // TODO(intel): will refactor this code later // reset thread id to avoid confusion when thread is reused from pool again // mkldnn_thread_id_ = -1 is reserved for cache clearing mode only - if (paddle::platform::get_cur_thread_id() == -1) + if (paddle::platform::get_cur_thread_id() == -1) { + VLOG(2) << "Clear previous mkldnn thread id -1\n"; paddle::platform::set_cur_thread_id(0); + } #endif return true; } @@ -614,7 +616,7 @@ bool AnalysisPredictor::ZeroCopyRun() { #ifdef PADDLE_WITH_MKLDNN // TODO(intel): will refactor this code later // Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case - VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id=" + VLOG(2) << "AnalysisPredictor::Run get_cur_thread_id=" << paddle::platform::get_cur_thread_id() << ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n"; if (paddle::platform::get_cur_thread_id() == 0) @@ -632,8 +634,10 @@ bool AnalysisPredictor::ZeroCopyRun() { // TODO(intel): will refactor this code later // reset thread id to avoid confusion when thread is reused from pool again // mkldnn_thread_id_ = -1 is reserved for cache clearing mode only - if (paddle::platform::get_cur_thread_id() == -1) + if (paddle::platform::get_cur_thread_id() == -1) { + VLOG(2) << "Clear previous mkldnn thread id setting\n"; paddle::platform::set_cur_thread_id(0); + } #endif return true; } diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index a54e3d4d6d9a3..41cdc92e20d2a 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -425,6 +425,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, // 1st time to set blob in current thread pBlob = std::shared_ptr(new KeyBlob()); (*pMap)[tid] = pBlob; + VLOG(2) << "SetBlob: tid=" << tid << ", add new tid\n"; } else { pBlob = map_it->second; } @@ -439,7 +440,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, if (key_it == pBlob->end()) { // tid = -1 means cache clearing mode, MKLDNN_CAP defines max blob capacity if ((tid == -1) && (pBlob->size() > MKLDNN_CAP)) { - VLOG(3) << "SetBlob: tid=" << tid << ", remove head blob " + VLOG(2) << "SetBlob: tid=" << tid << ", remove head blob " << pBlob->begin()->first << "\n"; pBlob->erase(pBlob->begin()); } @@ -447,7 +448,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } else { key_it->second = data; // set data to existing blob } - VLOG(3) << "SetBlob: tid=" << tid << ", add blob=" << name << "\n"; + VLOG(2) << "SetBlob: tid=" << tid << ", add blob=" << name << "\n"; // lock will be automatically released when out of scope return; } @@ -464,7 +465,7 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( // Find KeyBlob for current thread firstly auto map_it = pMap->find(tid); if (map_it == pMap->end()) { - VLOG(3) << "GetBlob: tid=" << tid << ", miss tid\n"; + VLOG(2) << "GetBlob: tid=" << tid << ", miss tid\n"; return nullptr; } pBlob = map_it->second; @@ -477,11 +478,11 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( }); if (key_it == pBlob->end()) { - VLOG(3) << "GetBlob tid=" << tid << ", miss blob=" << name << "\n"; + VLOG(2) << "GetBlob tid=" << tid << ", miss blob=" << name << "\n"; return nullptr; } - VLOG(3) << "GetBlob tid=" << tid << ", get blob=" << name << "\n"; + VLOG(2) << "GetBlob tid=" << tid << ", get blob=" << name << "\n"; // lock will be automatically released when out of scope return key_it->second; } From 76db898aebe38b90e52e97289edc44ced873c60a Mon Sep 17 00:00:00 2001 From: luotao1 Date: Wed, 26 Jun 2019 00:09:31 +0800 Subject: [PATCH 05/21] detect model test for dynamic shape --- .../fluid/inference/tests/api/CMakeLists.txt | 3 + .../tests/api/analyzer_detect_tester.cc | 150 ++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 paddle/fluid/inference/tests/api/analyzer_detect_tester.cc diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 243f5cef00835..ec33df962e46e 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -157,6 +157,9 @@ if (NOT EXISTS ${MOBILENET_INSTALL_DIR}) endif() inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc) +# detect +inference_analysis_api_test_with_refer_result(test_analyzer_detect ${OCR_INSTALL_DIR} analyzer_detect_tester.cc) + ### Image classification tests with fake data set(IMG_CLASS_TEST_APP "test_analyzer_image_classification") set(IMG_CLASS_TEST_APP_SRC "analyzer_image_classification_tester.cc") diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc new file mode 100644 index 0000000000000..d09f1ff81a218 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -0,0 +1,150 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "paddle/fluid/inference/tests/api/tester_helper.h" +DEFINE_string(infer_shape, "", "data shape file"); + +namespace paddle { +namespace inference { +namespace analysis { + +struct Record { + std::vector data; + std::vector shape; +}; + +Record ProcessALine(const std::string &line, const std::string &shape_line) { + VLOG(3) << "process a line"; + std::vector columns; + + Record record; + std::vector data_strs; + split(line, ' ', &data_strs); + for (auto &d : data_strs) { + record.data.push_back(std::stof(d)); + } + + std::vector shape_strs; + split(shape_line, ' ', &shape_strs); + for (auto &s : shape_strs) { + record.shape.push_back(std::stoi(s)); + } + // VLOG(3) << "data size " << record.data.size(); + // VLOG(3) << "data shape size " << record.shape.size(); + VLOG(2) << "data shape size " << record.shape[3]; + return record; +} + +void SetConfig(AnalysisConfig *cfg) { + cfg->SetModel(FLAGS_infer_model + "/model", FLAGS_infer_model + "/params"); + cfg->DisableGpu(); + cfg->SwitchIrDebug(); + cfg->SwitchSpecifyInputNames(false); + cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads); +} + +void SetInput(std::vector> *inputs) { + std::string line; + std::ifstream file(FLAGS_infer_data); + std::string shape_line; + std::ifstream infer_file(FLAGS_infer_shape); + + int iteration = FLAGS_test_all_data ? 1000 : 1; + for (int k = 0; k < iteration; k++) { + std::getline(file, line); + std::getline(infer_file, shape_line); + auto record = ProcessALine(line, shape_line); + + PaddleTensor input; + input.shape = record.shape; + input.dtype = PaddleDType::FLOAT32; + size_t input_size = record.data.size() * sizeof(float); + input.data.Resize(input_size); + memcpy(input.data.data(), record.data.data(), input_size); + std::vector input_slots; + input_slots.assign({input}); + (*inputs).emplace_back(input_slots); + } +} + +// Easy for profiling independently. +// ocr, mobilenet and se_resnext50 +void profile(bool use_mkldnn = false) { + AnalysisConfig cfg; + SetConfig(&cfg); + if (use_mkldnn) { + cfg.EnableMKLDNN(); + cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); + } + // cfg.pass_builder()->TurnOnDebug(); + std::vector> outputs; + + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(reinterpret_cast(&cfg), + input_slots_all, &outputs, FLAGS_num_threads); +} + +TEST(Analyzer_vis, profile) { profile(); } + +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_vis, profile_mkldnn) { profile(true /* use_mkldnn */); } +#endif + +// Check the fuse status +TEST(Analyzer_vis, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + GetFuseStatis(predictor.get(), &num_ops); +} + +// Compare result of NativeConfig and AnalysisConfig +void compare(bool use_mkldnn = false) { + AnalysisConfig cfg; + SetConfig(&cfg); + if (use_mkldnn) { + cfg.EnableMKLDNN(); + cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); + } + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis( + reinterpret_cast(&cfg), input_slots_all); +} + +TEST(Analyzer_vis, compare) { compare(); } +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_vis, compare_mkldnn) { compare(true /* use_mkldnn */); } +#endif + +// Compare Deterministic result +TEST(Analyzer_vis, compare_determine) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareDeterministic(reinterpret_cast(&cfg), + input_slots_all); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle From 6a97049076230ef136f586dfdb16811a18802183 Mon Sep 17 00:00:00 2001 From: luotao1 Date: Wed, 26 Jun 2019 19:17:04 +0800 Subject: [PATCH 06/21] load input data one by one --- .../tests/api/analyzer_detect_tester.cc | 110 +++++++----------- 1 file changed, 45 insertions(+), 65 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc index d09f1ff81a218..5ee418534f6c5 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include "paddle/fluid/inference/tests/api/tester_helper.h" DEFINE_string(infer_shape, "", "data shape file"); +DEFINE_int32(sample, 1, "number of sample"); namespace paddle { namespace inference { @@ -45,7 +46,8 @@ Record ProcessALine(const std::string &line, const std::string &shape_line) { } // VLOG(3) << "data size " << record.data.size(); // VLOG(3) << "data shape size " << record.shape.size(); - VLOG(2) << "data shape size " << record.shape[3]; + // VLOG(2) << "data shape size " << record.shape[3]; + LOG(INFO) << "data shape size " << record.shape[3]; return record; } @@ -57,28 +59,19 @@ void SetConfig(AnalysisConfig *cfg) { cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads); } -void SetInput(std::vector> *inputs) { - std::string line; - std::ifstream file(FLAGS_infer_data); - std::string shape_line; - std::ifstream infer_file(FLAGS_infer_shape); - - int iteration = FLAGS_test_all_data ? 1000 : 1; - for (int k = 0; k < iteration; k++) { - std::getline(file, line); - std::getline(infer_file, shape_line); - auto record = ProcessALine(line, shape_line); - - PaddleTensor input; - input.shape = record.shape; - input.dtype = PaddleDType::FLOAT32; - size_t input_size = record.data.size() * sizeof(float); - input.data.Resize(input_size); - memcpy(input.data.data(), record.data.data(), input_size); - std::vector input_slots; - input_slots.assign({input}); - (*inputs).emplace_back(input_slots); - } +void SetInput(std::vector> *inputs, + const std::string &line, const std::string &shape_line) { + auto record = ProcessALine(line, shape_line); + + PaddleTensor input; + input.shape = record.shape; + input.dtype = PaddleDType::FLOAT32; + size_t input_size = record.data.size() * sizeof(float); + input.data.Resize(input_size); + memcpy(input.data.data(), record.data.data(), input_size); + std::vector input_slots; + input_slots.assign({input}); + (*inputs).emplace_back(input_slots); } // Easy for profiling independently. @@ -92,59 +85,46 @@ void profile(bool use_mkldnn = false) { } // cfg.pass_builder()->TurnOnDebug(); std::vector> outputs; - std::vector> input_slots_all; - SetInput(&input_slots_all); - TestPrediction(reinterpret_cast(&cfg), - input_slots_all, &outputs, FLAGS_num_threads); -} -TEST(Analyzer_vis, profile) { profile(); } - -#ifdef PADDLE_WITH_MKLDNN -TEST(Analyzer_vis, profile_mkldnn) { profile(true /* use_mkldnn */); } -#endif + Timer run_timer; + double elapsed_time = 0; -// Check the fuse status -TEST(Analyzer_vis, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); - int num_ops; + int iterations = FLAGS_sample; + int num_times = FLAGS_repeat; auto predictor = CreatePaddlePredictor(cfg); - GetFuseStatis(predictor.get(), &num_ops); -} - -// Compare result of NativeConfig and AnalysisConfig -void compare(bool use_mkldnn = false) { - AnalysisConfig cfg; - SetConfig(&cfg); - if (use_mkldnn) { - cfg.EnableMKLDNN(); - cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); + outputs.resize(iterations); + + for (int j = 0; j < num_times; j++) { + std::ifstream file(FLAGS_infer_data); + std::ifstream infer_file(FLAGS_infer_shape); + std::string line; + std::string shape_line; + + for (int i = 0; i < iterations; i++) { + std::getline(file, line); + std::getline(infer_file, shape_line); + SetInput(&input_slots_all, line, shape_line); + + run_timer.tic(); + predictor->Run(input_slots_all[i], &outputs[i], FLAGS_batch_size); + elapsed_time += run_timer.toc(); + } + file.close(); + infer_file.close(); } - std::vector> input_slots_all; - SetInput(&input_slots_all); - CompareNativeAndAnalysis( - reinterpret_cast(&cfg), input_slots_all); + auto batch_latency = elapsed_time / (iterations * num_times); + PrintTime(FLAGS_batch_size, num_times, FLAGS_num_threads, 0, batch_latency, + iterations, VarType::FP32); } -TEST(Analyzer_vis, compare) { compare(); } +TEST(Analyzer_vis, profile) { profile(); } + #ifdef PADDLE_WITH_MKLDNN -TEST(Analyzer_vis, compare_mkldnn) { compare(true /* use_mkldnn */); } +TEST(Analyzer_vis, profile_mkldnn) { profile(true /* use_mkldnn */); } #endif -// Compare Deterministic result -TEST(Analyzer_vis, compare_determine) { - AnalysisConfig cfg; - SetConfig(&cfg); - - std::vector> input_slots_all; - SetInput(&input_slots_all); - CompareDeterministic(reinterpret_cast(&cfg), - input_slots_all); -} - } // namespace analysis } // namespace inference } // namespace paddle From 1a473737a8ad6cc94d01c0b469a31e5e6d0b3ce7 Mon Sep 17 00:00:00 2001 From: luotao1 Date: Wed, 26 Jun 2019 20:06:27 +0800 Subject: [PATCH 07/21] each iteration use new threads --- .../tests/api/analyzer_detect_tester.cc | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc index 5ee418534f6c5..72889f9a61dea 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -95,6 +95,8 @@ void profile(bool use_mkldnn = false) { auto predictor = CreatePaddlePredictor(cfg); outputs.resize(iterations); + std::vector threads; + for (int j = 0; j < num_times; j++) { std::ifstream file(FLAGS_infer_data); std::ifstream infer_file(FLAGS_infer_shape); @@ -102,14 +104,20 @@ void profile(bool use_mkldnn = false) { std::string shape_line; for (int i = 0; i < iterations; i++) { - std::getline(file, line); - std::getline(infer_file, shape_line); - SetInput(&input_slots_all, line, shape_line); - - run_timer.tic(); - predictor->Run(input_slots_all[i], &outputs[i], FLAGS_batch_size); - elapsed_time += run_timer.toc(); + threads.emplace_back([&, i]() { + std::getline(file, line); + std::getline(infer_file, shape_line); + SetInput(&input_slots_all, line, shape_line); + + run_timer.tic(); + predictor->Run(input_slots_all[i], &outputs[i], FLAGS_batch_size); + elapsed_time += run_timer.toc(); + }); + LOG(INFO) << "threads size: " << threads.size(); + threads[0].join(); + threads.clear(); } + file.close(); infer_file.close(); } From d27c75705bf7bef619993f746c5b05f043e20759 Mon Sep 17 00:00:00 2001 From: luotao1 Date: Thu, 27 Jun 2019 16:13:04 +0800 Subject: [PATCH 08/21] fix input_slot_all memory leak --- .../fluid/inference/tests/api/analyzer_detect_tester.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc index 72889f9a61dea..7213e119eaef3 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -46,8 +46,7 @@ Record ProcessALine(const std::string &line, const std::string &shape_line) { } // VLOG(3) << "data size " << record.data.size(); // VLOG(3) << "data shape size " << record.shape.size(); - // VLOG(2) << "data shape size " << record.shape[3]; - LOG(INFO) << "data shape size " << record.shape[3]; + // LOG(INFO) << "data shape size " << record.shape[3]; return record; } @@ -81,7 +80,6 @@ void profile(bool use_mkldnn = false) { SetConfig(&cfg); if (use_mkldnn) { cfg.EnableMKLDNN(); - cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); } // cfg.pass_builder()->TurnOnDebug(); std::vector> outputs; @@ -110,12 +108,13 @@ void profile(bool use_mkldnn = false) { SetInput(&input_slots_all, line, shape_line); run_timer.tic(); - predictor->Run(input_slots_all[i], &outputs[i], FLAGS_batch_size); + predictor->Run(input_slots_all[0], &outputs[0], FLAGS_batch_size); elapsed_time += run_timer.toc(); }); - LOG(INFO) << "threads size: " << threads.size(); threads[0].join(); threads.clear(); + if (i % 100 == 0) LOG(INFO) << i << " samples"; + std::vector>().swap(input_slots_all); } file.close(); From 634d8c62f44ac737b2a3d2ed1ae4f609652da8de Mon Sep 17 00:00:00 2001 From: luotao1 Date: Thu, 27 Jun 2019 18:07:28 +0800 Subject: [PATCH 09/21] change KeyBlob from vector to map for speedup --- paddle/fluid/platform/device_context.cc | 14 +++----------- paddle/fluid/platform/device_context.h | 2 +- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 6b3317ef2247f..61c9586e6b7c6 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -433,11 +433,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, } // Find Key in found (or newly created) KeyBlob - auto key_it = std::find_if( - pBlob->begin(), pBlob->end(), - [=](std::pair> const& obj) { - return obj.first == name; - }); + auto key_it = pBlob->find(name); if (key_it == pBlob->end()) { // tid = -1 means cache clearing mode, MKLDNN_CAP defines max blob capacity @@ -446,7 +442,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, << pBlob->begin()->first << "\n"; pBlob->erase(pBlob->begin()); } - pBlob->push_back(std::make_pair(name, data)); + (*pBlob)[name] = data; } else { key_it->second = data; // set data to existing blob } @@ -473,11 +469,7 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( pBlob = map_it->second; // Find Blob via name - auto key_it = std::find_if( - pBlob->begin(), pBlob->end(), - [=](std::pair> const& obj) { - return obj.first == name; - }); + auto key_it = pBlob->find(name); if (key_it == pBlob->end()) { VLOG(2) << "GetBlob tid=" << tid << ", miss blob=" << name << "\n"; diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index cf1c29a5d9b18..0da64aea4297d 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -378,7 +378,7 @@ struct DefaultDeviceContextType { #endif #ifdef PADDLE_WITH_MKLDNN -using KeyBlob = std::vector>>; +using KeyBlob = std::unordered_map>; using BlobMap = std::unordered_map>; void set_cur_thread_id(int); From 2de7f415fb3abfb327381e3bc078d752c067b3b5 Mon Sep 17 00:00:00 2001 From: Pawel Piotrowicz Date: Thu, 27 Jun 2019 14:08:45 +0200 Subject: [PATCH 10/21] PaddlePaddle memory leak test=develop --- paddle/fluid/framework/details/op_registry.h | 5 +++-- paddle/fluid/framework/op_desc.cc | 2 +- paddle/fluid/framework/op_info.h | 10 ++++++---- paddle/fluid/framework/op_proto_maker.cc | 5 +++-- paddle/fluid/framework/op_proto_maker.h | 10 ++++++---- paddle/fluid/framework/op_proto_maker_test.cc | 12 ++++++------ 6 files changed, 25 insertions(+), 19 deletions(-) diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 0f03ca51da778..519e007161c1c 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include #include @@ -161,8 +162,8 @@ struct OpInfoFiller { template struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { - info->proto_ = new proto::OpProto; - info->checker_ = new OpAttrChecker(); + info->proto_ = std::make_shared(); + info->checker_ = std::make_shared(); T maker; maker(info->proto_, info->checker_); info->proto_->set_type(op_type); diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 1ea93b7638a85..bf38bfa07e9fd 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -668,7 +668,7 @@ static void InitInferShapeFuncs() { void OpDesc::CheckAttrs() { PADDLE_ENFORCE(!Type().empty(), "CheckAttr() can not be called before type is setted."); - auto *checker = OpInfoMap::Instance().Get(Type()).Checker(); + auto checker = OpInfoMap::Instance().Get(Type()).Checker(); if (checker == nullptr) { // checker is not configured. That operator could be generated by Paddle, // not by users. diff --git a/paddle/fluid/framework/op_info.h b/paddle/fluid/framework/op_info.h index daa72769c4957..ad21d6b7d86ed 100644 --- a/paddle/fluid/framework/op_info.h +++ b/paddle/fluid/framework/op_info.h @@ -15,8 +15,10 @@ limitations under the License. */ #pragma once #include #include +#include #include #include +#include #include #include "paddle/fluid/framework/attribute.h" @@ -36,13 +38,13 @@ class InferShapeBase { struct OpInfo { OpCreator creator_; GradOpMakerFN grad_op_maker_; - proto::OpProto* proto_{nullptr}; - OpAttrChecker* checker_{nullptr}; + std::shared_ptr proto_; + std::shared_ptr checker_; InferVarTypeFN infer_var_type_; InferShapeFN infer_shape_; InferInplaceOpFN infer_inplace_; InferNoNeedBufferVarsFN infer_no_need_buffer_vars_; - + OpInfo() : proto_{nullptr}, checker_{nullptr} {} // NOTE(zjl): this flag is added to check whether // the grad maker is the default one. bool use_default_grad_op_desc_maker_{false}; @@ -70,7 +72,7 @@ struct OpInfo { return grad_op_maker_; } - const OpAttrChecker* Checker() const { return checker_; } + const std::shared_ptr Checker() const { return checker_; } const InferNoNeedBufferVarsFN& NoNeedBufferVarsInferer() const { return infer_no_need_buffer_vars_; diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index 2311614c335a5..06ffdea636578 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -56,8 +56,9 @@ void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { } } -void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, - OpAttrChecker* attr_checker) { +void OpProtoAndCheckerMaker::operator()( + std::shared_ptr proto, + std::shared_ptr attr_checker) { proto_ = proto; op_checker_ = attr_checker; Make(); diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index 5f3ce60e1d94e..3e415d0ba904e 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -13,7 +13,9 @@ limitations under the License. */ #pragma once +#include #include +#include #include "glog/logging.h" #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/framework.pb.h" @@ -49,7 +51,8 @@ class OpProtoAndCheckerMaker { static const char *OpNamescopeAttrName() { return "op_namescope"; } static const char *OpCreationCallstackAttrName() { return "op_callstack"; } - void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); + void operator()(std::shared_ptr proto, + std::shared_ptr attr_checker); virtual void Make() = 0; @@ -99,9 +102,8 @@ class OpProtoAndCheckerMaker { private: void CheckNoDuplicatedInOutAttrs(); void Validate(); - - proto::OpProto *proto_; - OpAttrChecker *op_checker_; + std::shared_ptr proto_; + std::shared_ptr op_checker_; bool validated_{false}; }; } // namespace framework diff --git a/paddle/fluid/framework/op_proto_maker_test.cc b/paddle/fluid/framework/op_proto_maker_test.cc index a8030d377fdb4..e28048137c5fc 100644 --- a/paddle/fluid/framework/op_proto_maker_test.cc +++ b/paddle/fluid/framework/op_proto_maker_test.cc @@ -25,10 +25,10 @@ class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { }; TEST(ProtoMaker, DuplicatedAttr) { - paddle::framework::proto::OpProto op_proto; - paddle::framework::OpAttrChecker op_checker; + auto op_proto = std::make_shared(); + auto op_checker = std::make_shared(); TestAttrProtoMaker proto_maker; - ASSERT_THROW(proto_maker(&op_proto, &op_checker), + ASSERT_THROW(proto_maker(op_proto, op_checker), paddle::platform::EnforceNotMet); } @@ -41,9 +41,9 @@ class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { }; TEST(ProtoMaker, DuplicatedInOut) { - paddle::framework::proto::OpProto op_proto; - paddle::framework::OpAttrChecker op_checker; + auto op_proto = std::make_shared(); + auto op_checker = std::make_shared(); TestAttrProtoMaker proto_maker; - ASSERT_THROW(proto_maker(&op_proto, &op_checker), + ASSERT_THROW(proto_maker(op_proto, op_checker), paddle::platform::EnforceNotMet); } From 482619648fba3e80280a38dae717470b7006d586 Mon Sep 17 00:00:00 2001 From: luotao1 Date: Fri, 28 Jun 2019 00:39:50 +0800 Subject: [PATCH 11/21] use input_shape to setBlob --- .../fluid/inference/api/analysis_predictor.cc | 8 ++++ .../tests/api/analyzer_detect_tester.cc | 1 + paddle/fluid/platform/device_context.cc | 44 +++++++++++++++---- paddle/fluid/platform/device_context.h | 5 ++- 4 files changed, 48 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e839b6c2c93cf..99f764573bc1e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -205,6 +205,14 @@ bool AnalysisPredictor::Run(const std::vector &inputs, << ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n"; if (paddle::platform::get_cur_thread_id() == 0) paddle::platform::set_cur_thread_id(config_.mkldnn_thread_id_); + if (paddle::platform::get_cur_thread_id() == -1) { + std::stringstream ss; + for (size_t i = 0; i < inputs[0].shape.size(); ++i) { + ss << inputs[0].shape[i] << "-"; + } + VLOG(2) << "Set input shape=" << ss.str(); + paddle::platform::set_cur_input_shape_str(ss.str()); + } #endif VLOG(3) << "Predictor::predict"; diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc index 7213e119eaef3..10947d4594ae0 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -80,6 +80,7 @@ void profile(bool use_mkldnn = false) { SetConfig(&cfg); if (use_mkldnn) { cfg.EnableMKLDNN(); + cfg.SetMKLDNNThreadId(-1); } // cfg.pass_builder()->TurnOnDebug(); std::vector> outputs; diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 61c9586e6b7c6..9cd7fe3364b64 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -403,11 +403,16 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) namespace { // Current thread's id. thread_local int cur_thread_id = 0; +thread_local std::string cur_input_shape_str = ""; } void set_cur_thread_id(int tid) { cur_thread_id = tid; } int get_cur_thread_id(void) { return cur_thread_id; } -#define MKLDNN_CAP 10000 +void set_cur_input_shape_str(std::string input_shape_str) { + cur_input_shape_str = input_shape_str; +} +std::string get_cur_input_shape_str(void) { return cur_input_shape_str; } +#define MKLDNN_CAP 10 void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); } @@ -415,6 +420,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr data) const { BlobMap* pMap = p_blobmap_.get(); std::shared_ptr pBlob = nullptr; + std::shared_ptr blob = nullptr; int tid = platform::get_cur_thread_id(); @@ -432,19 +438,29 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, pBlob = map_it->second; } + std::string cur_input_shape_str = platform::get_cur_input_shape_str(); // Find Key in found (or newly created) KeyBlob - auto key_it = pBlob->find(name); + auto key_it = pBlob->find(cur_input_shape_str); if (key_it == pBlob->end()) { // tid = -1 means cache clearing mode, MKLDNN_CAP defines max blob capacity if ((tid == -1) && (pBlob->size() > MKLDNN_CAP)) { - VLOG(2) << "SetBlob: tid=" << tid << ", remove head blob " - << pBlob->begin()->first << "\n"; - pBlob->erase(pBlob->begin()); + VLOG(2) << "tid=" << tid + << ", remove all head blob of shape: " << pBlob->begin()->first + << "\n"; + pBlob->erase(pBlob->begin()->first); } - (*pBlob)[name] = data; + blob = std::shared_ptr(new Blob()); + (*pBlob)[cur_input_shape_str] = blob; + } else { + blob = key_it->second; + } + // Find Blob via name + auto blob_it = blob->find(name); + if (blob_it == blob->end()) { + (*blob)[name] = data; } else { - key_it->second = data; // set data to existing blob + blob_it->second = data; // set data to existing blob } VLOG(2) << "SetBlob: tid=" << tid << ", add blob=" << name << "\n"; // lock will be automatically released when out of scope @@ -455,6 +471,7 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( const std::string& name) const { BlobMap* pMap = p_blobmap_.get(); std::shared_ptr pBlob = nullptr; + std::shared_ptr blob = nullptr; int tid = platform::get_cur_thread_id(); @@ -466,12 +483,21 @@ std::shared_ptr MKLDNNDeviceContext::GetBlob( VLOG(2) << "GetBlob: tid=" << tid << ", miss tid\n"; return nullptr; } + std::string cur_input_shape_str = platform::get_cur_input_shape_str(); pBlob = map_it->second; + auto pBlob_it = pBlob->find(cur_input_shape_str); + if (pBlob_it == pBlob->end()) { + VLOG(2) << "GetBlob: tid=" << cur_input_shape_str + << ", miss input_shape_str\n"; + return nullptr; + } + blob = pBlob_it->second; + // Find Blob via name - auto key_it = pBlob->find(name); + auto key_it = blob->find(name); - if (key_it == pBlob->end()) { + if (key_it == blob->end()) { VLOG(2) << "GetBlob tid=" << tid << ", miss blob=" << name << "\n"; return nullptr; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 0da64aea4297d..5571035f51090 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -378,11 +378,14 @@ struct DefaultDeviceContextType { #endif #ifdef PADDLE_WITH_MKLDNN -using KeyBlob = std::unordered_map>; +using Blob = std::unordered_map>; +using KeyBlob = std::unordered_map>; using BlobMap = std::unordered_map>; void set_cur_thread_id(int); int get_cur_thread_id(void); +void set_cur_input_shape_str(std::string input_shape_str); +std::string get_cur_input_shape_str(void); class MKLDNNDeviceContext : public CPUDeviceContext { public: From 725f45101f20989e6e0f3eab65026f01f2c3e9de Mon Sep 17 00:00:00 2001 From: luotao1 Date: Fri, 28 Jun 2019 13:06:23 +0800 Subject: [PATCH 12/21] refine device_context, make blobMap etc more cleaner --- paddle/fluid/platform/device_context.cc | 61 +++++++++++++------------ paddle/fluid/platform/device_context.h | 12 +++-- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 9cd7fe3364b64..295b3ea53be2b 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -401,8 +401,12 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) } namespace { -// Current thread's id. +// Current thread's id. -1 means cache clearing mode. thread_local int cur_thread_id = 0; +// Current data input shape string. +// - If cur_thread_id != -1, it's a null string in default. +// - Else, for a 4-dimention input [1, 3, 18, 128], +// cur_input_shape_str = 1-3-18-128- . thread_local std::string cur_input_shape_str = ""; } @@ -419,46 +423,46 @@ void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); } void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr data) const { BlobMap* pMap = p_blobmap_.get(); + std::shared_ptr sBlob = nullptr; std::shared_ptr pBlob = nullptr; - std::shared_ptr blob = nullptr; int tid = platform::get_cur_thread_id(); std::lock_guard lock(*p_mutex_); - // Find KeyBlob for current thread + // Find ShapeBlob for current thread auto map_it = pMap->find(tid); if (map_it == pMap->end()) { // 1st time to set blob in current thread - pBlob = std::shared_ptr(new KeyBlob()); - (*pMap)[tid] = pBlob; + sBlob = std::shared_ptr(new ShapeBlob()); + (*pMap)[tid] = sBlob; VLOG(2) << "SetBlob: tid=" << tid << ", add new tid\n"; } else { - pBlob = map_it->second; + sBlob = map_it->second; } + // Find KeyBlob for current input shape std::string cur_input_shape_str = platform::get_cur_input_shape_str(); - // Find Key in found (or newly created) KeyBlob - auto key_it = pBlob->find(cur_input_shape_str); + auto key_it = sBlob->find(cur_input_shape_str); - if (key_it == pBlob->end()) { - // tid = -1 means cache clearing mode, MKLDNN_CAP defines max blob capacity - if ((tid == -1) && (pBlob->size() > MKLDNN_CAP)) { + if (key_it == sBlob->end()) { + // tid = -1 means cache clearing mode, MKLDNN_CAP defines max pblob capacity + if ((tid == -1) && (sBlob->size() > MKLDNN_CAP)) { VLOG(2) << "tid=" << tid - << ", remove all head blob of shape: " << pBlob->begin()->first - << "\n"; - pBlob->erase(pBlob->begin()->first); + << ", remove all head blob of shape: " << sBlob->begin()->first; + sBlob->erase(sBlob->begin()->first); } - blob = std::shared_ptr(new Blob()); - (*pBlob)[cur_input_shape_str] = blob; + pBlob = std::shared_ptr(new KeyBlob()); + (*sBlob)[cur_input_shape_str] = pBlob; } else { - blob = key_it->second; + pBlob = key_it->second; } + // Find Blob via name - auto blob_it = blob->find(name); - if (blob_it == blob->end()) { - (*blob)[name] = data; + auto blob_it = pBlob->find(name); + if (blob_it == pBlob->end()) { + (*pBlob)[name] = data; } else { blob_it->second = data; // set data to existing blob } @@ -470,34 +474,35 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr MKLDNNDeviceContext::GetBlob( const std::string& name) const { BlobMap* pMap = p_blobmap_.get(); + std::shared_ptr sBlob = nullptr; std::shared_ptr pBlob = nullptr; - std::shared_ptr blob = nullptr; int tid = platform::get_cur_thread_id(); std::lock_guard lock(*p_mutex_); - // Find KeyBlob for current thread firstly + // Find ShapeBlob for current thread firstly auto map_it = pMap->find(tid); if (map_it == pMap->end()) { VLOG(2) << "GetBlob: tid=" << tid << ", miss tid\n"; return nullptr; } std::string cur_input_shape_str = platform::get_cur_input_shape_str(); - pBlob = map_it->second; + sBlob = map_it->second; - auto pBlob_it = pBlob->find(cur_input_shape_str); - if (pBlob_it == pBlob->end()) { + // Find KeyBlob for current input shape secondly + auto sBlob_it = sBlob->find(cur_input_shape_str); + if (sBlob_it == sBlob->end()) { VLOG(2) << "GetBlob: tid=" << cur_input_shape_str << ", miss input_shape_str\n"; return nullptr; } - blob = pBlob_it->second; + pBlob = sBlob_it->second; // Find Blob via name - auto key_it = blob->find(name); + auto key_it = pBlob->find(name); - if (key_it == blob->end()) { + if (key_it == pBlob->end()) { VLOG(2) << "GetBlob tid=" << tid << ", miss blob=" << name << "\n"; return nullptr; } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 5571035f51090..5afecb4fbf165 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -378,9 +378,15 @@ struct DefaultDeviceContextType { #endif #ifdef PADDLE_WITH_MKLDNN -using Blob = std::unordered_map>; -using KeyBlob = std::unordered_map>; -using BlobMap = std::unordered_map>; +// Following three maps are used to cache MKLDNN primitives. +// There relations are: +// - BlobMap = Map +// - ShapeBlob = Map +// - KeyBlob = Map +// Where: +using KeyBlob = std::unordered_map>; +using ShapeBlob = std::unordered_map>; +using BlobMap = std::unordered_map>; void set_cur_thread_id(int); int get_cur_thread_id(void); From e8305ea4b0a85da2aab8f182f3bb9534e62023a3 Mon Sep 17 00:00:00 2001 From: luotao1 Date: Fri, 28 Jun 2019 16:37:31 +0800 Subject: [PATCH 13/21] add MkldnnPreRun and MkldnnPostRun interface --- .../fluid/inference/api/analysis_predictor.cc | 59 +++++++++---------- .../fluid/inference/api/analysis_predictor.h | 5 ++ 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 99f764573bc1e..65a5d062dbb12 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -193,19 +193,21 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) { #endif } -bool AnalysisPredictor::Run(const std::vector &inputs, - std::vector *output_data, - int batch_size) { - paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); -#ifdef PADDLE_WITH_MKLDNN - // TODO(intel): will refactor this code later +void AnalysisPredictor::MkldnnPreRun(const std::vector &inputs) { + // TODO(intel, luotao): will refactor this code later // Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case VLOG(2) << "AnalysisPredictor::Run get_cur_thread_id=" << paddle::platform::get_cur_thread_id() << ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n"; if (paddle::platform::get_cur_thread_id() == 0) paddle::platform::set_cur_thread_id(config_.mkldnn_thread_id_); + // -1 means cache cleaning mode. + // Set current_input_shape for caching dynamic shape. + // Only used when batch_size=1. if (paddle::platform::get_cur_thread_id() == -1) { + PADDLE_ENFORCE( + inputs.size() == 1, + "Can not process batch_size > 1 in MKLDNN cache cleaning mode now."); std::stringstream ss; for (size_t i = 0; i < inputs[0].shape.size(); ++i) { ss << inputs[0].shape[i] << "-"; @@ -213,8 +215,25 @@ bool AnalysisPredictor::Run(const std::vector &inputs, VLOG(2) << "Set input shape=" << ss.str(); paddle::platform::set_cur_input_shape_str(ss.str()); } -#endif +} + +void AnalysisPredictor::MkldnnPostRun() { + // TODO(intel): will refactor this code later + // reset thread id to avoid confusion when thread is reused from pool again + // mkldnn_thread_id_ = -1 is reserved for cache clearing mode only + if (paddle::platform::get_cur_thread_id() == -1) { + VLOG(2) << "Clear previous mkldnn thread id -1\n"; + paddle::platform::set_cur_thread_id(0); + } +} +bool AnalysisPredictor::Run(const std::vector &inputs, + std::vector *output_data, + int batch_size) { + paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); +#ifdef PADDLE_WITH_MKLDNN + MkldnnPreRun(inputs); +#endif VLOG(3) << "Predictor::predict"; inference::Timer timer; timer.tic(); @@ -257,13 +276,7 @@ bool AnalysisPredictor::Run(const std::vector &inputs, // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); #ifdef PADDLE_WITH_MKLDNN - // TODO(intel): will refactor this code later - // reset thread id to avoid confusion when thread is reused from pool again - // mkldnn_thread_id_ = -1 is reserved for cache clearing mode only - if (paddle::platform::get_cur_thread_id() == -1) { - VLOG(2) << "Clear previous mkldnn thread id -1\n"; - paddle::platform::set_cur_thread_id(0); - } + MkldnnPostRun(); #endif return true; } @@ -621,15 +634,6 @@ std::unique_ptr AnalysisPredictor::GetOutputTensor( bool AnalysisPredictor::ZeroCopyRun() { paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); -#ifdef PADDLE_WITH_MKLDNN - // TODO(intel): will refactor this code later - // Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case - VLOG(2) << "AnalysisPredictor::Run get_cur_thread_id=" - << paddle::platform::get_cur_thread_id() - << ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n"; - if (paddle::platform::get_cur_thread_id() == 0) - paddle::platform::set_cur_thread_id(config_.mkldnn_thread_id_); -#endif executor_->Run(); // Fix TensorArray reuse not cleaned bug. tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_); @@ -638,15 +642,6 @@ bool AnalysisPredictor::ZeroCopyRun() { // recover the cpu_math_library_num_threads to 1, in order to avoid thread // conflict when integrating it into deployment service. paddle::platform::SetNumThreads(1); -#ifdef PADDLE_WITH_MKLDNN - // TODO(intel): will refactor this code later - // reset thread id to avoid confusion when thread is reused from pool again - // mkldnn_thread_id_ = -1 is reserved for cache clearing mode only - if (paddle::platform::get_cur_thread_id() == -1) { - VLOG(2) << "Clear previous mkldnn thread id setting\n"; - paddle::platform::set_cur_thread_id(0); - } -#endif return true; } diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index b5e134ced70f8..ccfe21293465c 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -111,6 +111,11 @@ class AnalysisPredictor : public PaddlePredictor { template void GetFetchOne(const framework::LoDTensor &fetchs, PaddleTensor *output_data); + // Pre-process and Post-process for Mkldnn multi-thread and dynamic shape + // input. Used in AnalysisPredictor::Run(), do not support + // AnalysisPredictor::ZeroRun() now. + void MkldnnPreRun(const std::vector &inputs); + void MkldnnPostRun(); #if PADDLE_WITH_TENSORRT // When we use Paddle-TRT INT8 engine, we need to generate calibration table From 266a2010863a124838add3fa865cebc60230101e Mon Sep 17 00:00:00 2001 From: luotao1 Date: Fri, 28 Jun 2019 18:40:10 +0800 Subject: [PATCH 14/21] add EnableMKLDNN(int mkldnn_input_shape_cache_size = 1) inferace --- paddle/fluid/inference/api/analysis_config.cc | 14 +++----------- paddle/fluid/inference/api/analysis_predictor.cc | 13 ++++++++----- .../fluid/inference/api/paddle_analysis_config.h | 8 ++++++-- .../inference/tests/api/analyzer_detect_tester.cc | 3 +-- paddle/fluid/platform/device_context.cc | 15 +++++++++++---- paddle/fluid/platform/device_context.h | 2 ++ 6 files changed, 31 insertions(+), 24 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index b1221984f66b5..5797e97b97208 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -114,7 +114,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // MKLDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); - CP_MEMBER(mkldnn_thread_id_); + CP_MEMBER(mkldnn_input_shape_cache_size_); // Quantization related. CP_MEMBER(use_mkldnn_quantizer_); CP_MEMBER(mkldnn_quantizer_config_); @@ -151,9 +151,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { Update(); } -void AnalysisConfig::EnableMKLDNN() { +void AnalysisConfig::EnableMKLDNN(int mkldnn_input_shape_cache_size) { #ifdef PADDLE_WITH_MKLDNN use_mkldnn_ = true; + mkldnn_input_shape_cache_size_ = mkldnn_input_shape_cache_size; #else LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; use_mkldnn_ = false; @@ -162,15 +163,6 @@ void AnalysisConfig::EnableMKLDNN() { Update(); } -void AnalysisConfig::SetMKLDNNThreadId(int id) { -#ifdef PADDLE_WITH_MKLDNN - mkldnn_thread_id_ = id; -#else - LOG(ERROR) << "Please compile with MKLDNN first to set MKLDNN Thread Id"; - mkldnn_thread_id_ = 0; -#endif -} - void AnalysisConfig::EnableMkldnnQuantizer() { #ifdef PADDLE_WITH_MKLDNN if (!mkldnn_quantizer_config_) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 65a5d062dbb12..1067fec4f9314 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -195,13 +195,16 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) { void AnalysisPredictor::MkldnnPreRun(const std::vector &inputs) { // TODO(intel, luotao): will refactor this code later - // Make sure it not conflict with AnalysisPredictor::SetMkldnnthreadid case + // Make sure it not conflict with AnalysisPredictor::SetMkldnnThreadID case VLOG(2) << "AnalysisPredictor::Run get_cur_thread_id=" - << paddle::platform::get_cur_thread_id() - << ", mkldnn_thread_id_=" << config_.mkldnn_thread_id_ << "\n"; - if (paddle::platform::get_cur_thread_id() == 0) - paddle::platform::set_cur_thread_id(config_.mkldnn_thread_id_); + << paddle::platform::get_cur_thread_id(); // -1 means cache cleaning mode. + if (paddle::platform::get_cur_thread_id() == 0 && + config_.mkldnn_input_shape_cache_size_ > 1) { + paddle::platform::set_cur_thread_id(-1); + paddle::platform::set_cur_input_shape_cache_size( + config_.mkldnn_input_shape_cache_size_); + } // Set current_input_shape for caching dynamic shape. // Only used when batch_size=1. if (paddle::platform::get_cur_thread_id() == -1) { diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 43fd321fa27ae..da981dbac333d 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -177,14 +177,15 @@ struct AnalysisConfig { bool ngraph_enabled() const { return use_ngraph_; } /** Turn on MKLDNN. + * And set the cache size of different input shapes for MKLDNN. + * Default 1 means fixed input shape, not dynamic shape. */ - void EnableMKLDNN(); + void EnableMKLDNN(int mkldnn_input_shape_cache_size = 1); /** A boolean state telling whether to use the MKLDNN. */ bool mkldnn_enabled() const { return use_mkldnn_; } /** Set MKLDNN thread id. */ - void SetMKLDNNThreadId(int id); /** Set and get the number of cpu math library threads. */ @@ -317,8 +318,11 @@ struct AnalysisConfig { std::vector anakin_ops_filter_; std::map engine_opt_info_; + // mkldnn related. + int mkldnn_input_shape_cache_size_{1}; bool use_mkldnn_quantizer_{false}; std::shared_ptr mkldnn_quantizer_config_; + // If the config is already used on a predictor, it becomes invalid. mutable bool is_valid_{true}; // Any config can only be used with one predictor. diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc index 10947d4594ae0..e496030c79895 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -79,8 +79,7 @@ void profile(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); if (use_mkldnn) { - cfg.EnableMKLDNN(); - cfg.SetMKLDNNThreadId(-1); + cfg.EnableMKLDNN(10); } // cfg.pass_builder()->TurnOnDebug(); std::vector> outputs; diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 295b3ea53be2b..f275681e97eda 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -408,7 +408,10 @@ thread_local int cur_thread_id = 0; // - Else, for a 4-dimention input [1, 3, 18, 128], // cur_input_shape_str = 1-3-18-128- . thread_local std::string cur_input_shape_str = ""; -} +// the cache size of different input shapes for MKLDNN. +// Default 1 means fixed input shape, not dynamic shape. +thread_local int cur_input_shape_cache_size = 1; +} // namespace void set_cur_thread_id(int tid) { cur_thread_id = tid; } int get_cur_thread_id(void) { return cur_thread_id; } @@ -416,7 +419,10 @@ void set_cur_input_shape_str(std::string input_shape_str) { cur_input_shape_str = input_shape_str; } std::string get_cur_input_shape_str(void) { return cur_input_shape_str; } -#define MKLDNN_CAP 10 +void set_cur_input_shape_cache_size(int input_shape_cache_size) { + cur_input_shape_cache_size = input_shape_cache_size; +} +int get_cur_input_shape_cache_size(void) { return cur_input_shape_cache_size; } void MKLDNNDeviceContext::ResetBlobMap() const { p_blobmap_->clear(); } @@ -447,8 +453,9 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name, auto key_it = sBlob->find(cur_input_shape_str); if (key_it == sBlob->end()) { - // tid = -1 means cache clearing mode, MKLDNN_CAP defines max pblob capacity - if ((tid == -1) && (sBlob->size() > MKLDNN_CAP)) { + // tid = -1 means cache clearing mode, cur_input_shape_cache_size defines + // max pblob capacity + if ((tid == -1) && (sBlob->size() > cur_input_shape_cache_size)) { VLOG(2) << "tid=" << tid << ", remove all head blob of shape: " << sBlob->begin()->first; sBlob->erase(sBlob->begin()->first); diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 5afecb4fbf165..5d726119eeb1d 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -392,6 +392,8 @@ void set_cur_thread_id(int); int get_cur_thread_id(void); void set_cur_input_shape_str(std::string input_shape_str); std::string get_cur_input_shape_str(void); +void set_cur_input_shape_cache_size(int input_shape_cache_size); +int get_cur_input_shape_cache_size(void); class MKLDNNDeviceContext : public CPUDeviceContext { public: From 086f3471eeb8aa28705b346245e7dddf9acf978e Mon Sep 17 00:00:00 2001 From: luotao1 Date: Fri, 28 Jun 2019 20:24:32 +0800 Subject: [PATCH 15/21] Revert "PaddlePaddle memory leak test=develop" --- paddle/fluid/framework/details/op_registry.h | 5 ++--- paddle/fluid/framework/op_desc.cc | 2 +- paddle/fluid/framework/op_info.h | 10 ++++------ paddle/fluid/framework/op_proto_maker.cc | 5 ++--- paddle/fluid/framework/op_proto_maker.h | 10 ++++------ paddle/fluid/framework/op_proto_maker_test.cc | 12 ++++++------ 6 files changed, 19 insertions(+), 25 deletions(-) diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 519e007161c1c..0f03ca51da778 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -14,7 +14,6 @@ limitations under the License. */ #pragma once -#include #include #include #include @@ -162,8 +161,8 @@ struct OpInfoFiller { template struct OpInfoFiller { void operator()(const char* op_type, OpInfo* info) const { - info->proto_ = std::make_shared(); - info->checker_ = std::make_shared(); + info->proto_ = new proto::OpProto; + info->checker_ = new OpAttrChecker(); T maker; maker(info->proto_, info->checker_); info->proto_->set_type(op_type); diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index bf38bfa07e9fd..1ea93b7638a85 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -668,7 +668,7 @@ static void InitInferShapeFuncs() { void OpDesc::CheckAttrs() { PADDLE_ENFORCE(!Type().empty(), "CheckAttr() can not be called before type is setted."); - auto checker = OpInfoMap::Instance().Get(Type()).Checker(); + auto *checker = OpInfoMap::Instance().Get(Type()).Checker(); if (checker == nullptr) { // checker is not configured. That operator could be generated by Paddle, // not by users. diff --git a/paddle/fluid/framework/op_info.h b/paddle/fluid/framework/op_info.h index ad21d6b7d86ed..daa72769c4957 100644 --- a/paddle/fluid/framework/op_info.h +++ b/paddle/fluid/framework/op_info.h @@ -15,10 +15,8 @@ limitations under the License. */ #pragma once #include #include -#include #include #include -#include #include #include "paddle/fluid/framework/attribute.h" @@ -38,13 +36,13 @@ class InferShapeBase { struct OpInfo { OpCreator creator_; GradOpMakerFN grad_op_maker_; - std::shared_ptr proto_; - std::shared_ptr checker_; + proto::OpProto* proto_{nullptr}; + OpAttrChecker* checker_{nullptr}; InferVarTypeFN infer_var_type_; InferShapeFN infer_shape_; InferInplaceOpFN infer_inplace_; InferNoNeedBufferVarsFN infer_no_need_buffer_vars_; - OpInfo() : proto_{nullptr}, checker_{nullptr} {} + // NOTE(zjl): this flag is added to check whether // the grad maker is the default one. bool use_default_grad_op_desc_maker_{false}; @@ -72,7 +70,7 @@ struct OpInfo { return grad_op_maker_; } - const std::shared_ptr Checker() const { return checker_; } + const OpAttrChecker* Checker() const { return checker_; } const InferNoNeedBufferVarsFN& NoNeedBufferVarsInferer() const { return infer_no_need_buffer_vars_; diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index 97550771f311a..27922c730471a 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -57,9 +57,8 @@ void OpProtoAndCheckerMaker::CheckNoDuplicatedInOutAttrs() { } } -void OpProtoAndCheckerMaker::operator()( - std::shared_ptr proto, - std::shared_ptr attr_checker) { +void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, + OpAttrChecker* attr_checker) { proto_ = proto; op_checker_ = attr_checker; Make(); diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index e2462bc496acd..bf6528b2377dc 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -13,9 +13,7 @@ limitations under the License. */ #pragma once -#include #include -#include #include "glog/logging.h" #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/framework.pb.h" @@ -54,8 +52,7 @@ class OpProtoAndCheckerMaker { static const char *OpNamescopeAttrName() { return "op_namescope"; } static const char *OpCreationCallstackAttrName() { return "op_callstack"; } - void operator()(std::shared_ptr proto, - std::shared_ptr attr_checker); + void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); virtual void Make() = 0; @@ -105,8 +102,9 @@ class OpProtoAndCheckerMaker { private: void CheckNoDuplicatedInOutAttrs(); void Validate(); - std::shared_ptr proto_; - std::shared_ptr op_checker_; + + proto::OpProto *proto_; + OpAttrChecker *op_checker_; bool validated_{false}; }; } // namespace framework diff --git a/paddle/fluid/framework/op_proto_maker_test.cc b/paddle/fluid/framework/op_proto_maker_test.cc index e28048137c5fc..a8030d377fdb4 100644 --- a/paddle/fluid/framework/op_proto_maker_test.cc +++ b/paddle/fluid/framework/op_proto_maker_test.cc @@ -25,10 +25,10 @@ class TestAttrProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { }; TEST(ProtoMaker, DuplicatedAttr) { - auto op_proto = std::make_shared(); - auto op_checker = std::make_shared(); + paddle::framework::proto::OpProto op_proto; + paddle::framework::OpAttrChecker op_checker; TestAttrProtoMaker proto_maker; - ASSERT_THROW(proto_maker(op_proto, op_checker), + ASSERT_THROW(proto_maker(&op_proto, &op_checker), paddle::platform::EnforceNotMet); } @@ -41,9 +41,9 @@ class TestInOutProtoMaker : public paddle::framework::OpProtoAndCheckerMaker { }; TEST(ProtoMaker, DuplicatedInOut) { - auto op_proto = std::make_shared(); - auto op_checker = std::make_shared(); + paddle::framework::proto::OpProto op_proto; + paddle::framework::OpAttrChecker op_checker; TestAttrProtoMaker proto_maker; - ASSERT_THROW(proto_maker(op_proto, op_checker), + ASSERT_THROW(proto_maker(&op_proto, &op_checker), paddle::platform::EnforceNotMet); } From 6d5a84170b2db4f2f377177299e330c78b260fbc Mon Sep 17 00:00:00 2001 From: luotao1 Date: Sat, 29 Jun 2019 10:46:22 +0800 Subject: [PATCH 16/21] make unit-test support head-checker --- paddle/fluid/inference/tests/api/CMakeLists.txt | 1 + .../inference/tests/api/analyzer_detect_tester.cc | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 1ab4f215c7152..293c13e870fda 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -157,6 +157,7 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose # detect inference_analysis_api_test_with_refer_result(test_analyzer_detect ${OCR_INSTALL_DIR} analyzer_detect_tester.cc) +target_link_libraries(test_analyzer_detect tcmalloc) ### Image classification tests with fake data set(IMG_CLASS_TEST_APP "test_analyzer_image_classification") diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc index e496030c79895..6a0d802383300 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include #include @@ -135,3 +136,17 @@ TEST(Analyzer_vis, profile_mkldnn) { profile(true /* use_mkldnn */); } } // namespace analysis } // namespace inference } // namespace paddle + +// following lines are used for pprof +int main(int argc, char **argv) { + HeapLeakChecker heap_checker("test_foo"); + FLAGS_infer_model = "third_party/inference_demo/face_model/densebox"; + FLAGS_infer_data = "third_party/inference_demo/face_model/detect_input.txt"; + FLAGS_infer_shape = "third_party/inference_demo/face_model/shape.txt"; + FLAGS_paddle_num_threads = 4; + FLAGS_repeat = 1; + FLAGS_batch_size = 1; + FLAGS_sample = 10; + paddle::inference::analysis::profile(true); + std::cout << heap_checker.NoLeaks() << std::endl; +} From d91c910f96a7dfe60d1515dc98abb4998094a181 Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Mon, 1 Jul 2019 11:12:51 +0800 Subject: [PATCH 17/21] use static variable to do cache instead of tread local in thread frequent switching case to avoid memory leak test=develop --- .../fluid/framework/transfer_scope_cache.cc | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/transfer_scope_cache.cc b/paddle/fluid/framework/transfer_scope_cache.cc index e52a8317e2113..a2b9a5e171362 100644 --- a/paddle/fluid/framework/transfer_scope_cache.cc +++ b/paddle/fluid/framework/transfer_scope_cache.cc @@ -17,14 +17,31 @@ namespace paddle { namespace framework { +static std::unordered_map* static_transfer_data_cache = nullptr; +static std::unordered_set* static_transfer_scope_cache = nullptr; + std::unordered_map& global_transfer_data_cache() { - thread_local auto* x = new std::unordered_map; - return *x; + // if get_cur_thread_id() == -1, means not use thread local method to do cache + if (platform::get_cur_thread_id() == -1) { + if (!static_transfer_data_cache) + static_transfer_data_cache = new std::unordered_map; + return *static_transfer_data_cache; + } else { + thread_local auto* x = new std::unordered_map; + return *x; + } } std::unordered_set& global_transfer_scope_cache() { - thread_local auto* x = new std::unordered_set; - return *x; + // if get_cur_thread_id() == -1, means not use thread local method to do cache + if (platform::get_cur_thread_id() == -1) { + if (!static_transfer_scope_cache) + static_transfer_scope_cache = new std::unordered_set; + return *static_transfer_scope_cache; + } else { + thread_local auto* x = new std::unordered_set; + return *x; + } } Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1, From d6597b925642abf2704e63aff7fb62fa95684cda Mon Sep 17 00:00:00 2001 From: Leo Zhao Date: Mon, 1 Jul 2019 11:50:53 +0800 Subject: [PATCH 18/21] use marco to control code given it is specific for mkldnn test=develop --- paddle/fluid/framework/transfer_scope_cache.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/framework/transfer_scope_cache.cc b/paddle/fluid/framework/transfer_scope_cache.cc index a2b9a5e171362..74f0e8a140cb6 100644 --- a/paddle/fluid/framework/transfer_scope_cache.cc +++ b/paddle/fluid/framework/transfer_scope_cache.cc @@ -17,31 +17,43 @@ namespace paddle { namespace framework { +#ifdef PADDLE_WITH_MKLDNN static std::unordered_map* static_transfer_data_cache = nullptr; static std::unordered_set* static_transfer_scope_cache = nullptr; +#endif std::unordered_map& global_transfer_data_cache() { - // if get_cur_thread_id() == -1, means not use thread local method to do cache +#ifdef PADDLE_WITH_MKLDNN + // if get_cur_thread_id() == -1, means not using thread local method to do + // cache if (platform::get_cur_thread_id() == -1) { if (!static_transfer_data_cache) static_transfer_data_cache = new std::unordered_map; return *static_transfer_data_cache; } else { +#endif thread_local auto* x = new std::unordered_map; return *x; +#ifdef PADDLE_WITH_MKLDNN } +#endif } std::unordered_set& global_transfer_scope_cache() { - // if get_cur_thread_id() == -1, means not use thread local method to do cache +#ifdef PADDLE_WITH_MKLDNN + // if get_cur_thread_id() == -1, means not using thread local method to do + // cache if (platform::get_cur_thread_id() == -1) { if (!static_transfer_scope_cache) static_transfer_scope_cache = new std::unordered_set; return *static_transfer_scope_cache; } else { +#endif thread_local auto* x = new std::unordered_set; return *x; +#ifdef PADDLE_WITH_MKLDNN } +#endif } Scope* TryCreateTransferScope(OpKernelType type0, OpKernelType type1, From 5480edf3d659dc41551f853ff225a6f31d3a1d1a Mon Sep 17 00:00:00 2001 From: luotao1 Date: Mon, 8 Jul 2019 10:20:29 +0800 Subject: [PATCH 19/21] fix conflict with develop --- paddle/fluid/inference/api/analysis_predictor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 2488ad1af2ac0..be46c8d3e8587 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -194,7 +194,7 @@ void AnalysisPredictor::MkldnnPreRun(const std::vector &inputs) { if (paddle::platform::get_cur_mkldnn_session_id() == 0 && config_.mkldnn_input_shape_cache_size_ > 1) { paddle::platform::set_cur_mkldnn_session_id(-1); - paddle::platform::set_cur_input_shape_cache_size( + paddle::platform::set_cur_input_shape_cache_capacity( config_.mkldnn_input_shape_cache_size_); } // Set current_input_shape for caching dynamic shape. From 65e38651134352368dd486e515b490b1e2ad80ca Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Thu, 11 Jul 2019 21:01:47 +0800 Subject: [PATCH 20/21] checkout develop paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc --- paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 9980a6ba48ab1..ea0abf930e7f5 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -162,7 +162,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { auto propagation = src_md.data.data_type == mkldnn_f32 ? mkldnn::prop_kind::forward_training : mkldnn::prop_kind::forward_scoring; - pool_pd = + std::shared_ptr pool_pd = CreatePrimitiveDesc(src_md, dst_md, propagation, strides, padding_left_top, padding_right_bottom, ksize, pooling_type, mkldnn_engine, ceil_mode, is_test); From 330207c69869f579e2d90e2c75db65a3929493eb Mon Sep 17 00:00:00 2001 From: Tao Luo Date: Thu, 11 Jul 2019 21:04:28 +0800 Subject: [PATCH 21/21] clean detect unit-test --- .../fluid/inference/tests/api/CMakeLists.txt | 2 +- .../tests/api/analyzer_detect_tester.cc | 30 ++++++++++--------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index df727a55f07c7..7898933223da2 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -157,7 +157,7 @@ inference_analysis_api_test_with_refer_result(test_analyzer_mobilenet_transpose # detect inference_analysis_api_test_with_refer_result(test_analyzer_detect ${OCR_INSTALL_DIR} analyzer_detect_tester.cc) -target_link_libraries(test_analyzer_detect tcmalloc) +#target_link_libraries(test_analyzer_detect tcmalloc) ### Image classification tests with fake data set(IMG_CLASS_TEST_APP "test_analyzer_image_classification") diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc index 6a0d802383300..ea68d89870fe2 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +// #include #include #include #include @@ -80,7 +80,8 @@ void profile(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); if (use_mkldnn) { - cfg.EnableMKLDNN(10); + cfg.EnableMKLDNN(); + cfg.SetMkldnnCacheCapacity(10); } // cfg.pass_builder()->TurnOnDebug(); std::vector> outputs; @@ -138,15 +139,16 @@ TEST(Analyzer_vis, profile_mkldnn) { profile(true /* use_mkldnn */); } } // namespace paddle // following lines are used for pprof -int main(int argc, char **argv) { - HeapLeakChecker heap_checker("test_foo"); - FLAGS_infer_model = "third_party/inference_demo/face_model/densebox"; - FLAGS_infer_data = "third_party/inference_demo/face_model/detect_input.txt"; - FLAGS_infer_shape = "third_party/inference_demo/face_model/shape.txt"; - FLAGS_paddle_num_threads = 4; - FLAGS_repeat = 1; - FLAGS_batch_size = 1; - FLAGS_sample = 10; - paddle::inference::analysis::profile(true); - std::cout << heap_checker.NoLeaks() << std::endl; -} +// int main(int argc, char **argv) { +// HeapLeakChecker heap_checker("test_foo"); +// FLAGS_infer_model = "third_party/inference_demo/face_model/densebox"; +// FLAGS_infer_data = +// "third_party/inference_demo/face_model/detect_input.txt"; +// FLAGS_infer_shape = "third_party/inference_demo/face_model/shape.txt"; +// FLAGS_paddle_num_threads = 4; +// FLAGS_repeat = 1; +// FLAGS_batch_size = 1; +// FLAGS_sample = 10; +// paddle::inference::analysis::profile(true); +// std::cout << heap_checker.NoLeaks() << std::endl; +// }