From 2e8955bfb7b6d884ff0143e3e3a8f555c8b3bc11 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Wed, 21 Apr 2021 05:04:34 -0700 Subject: [PATCH 1/3] fix memory leak --- .../providers/tensorrt/tensorrt_execution_provider.cc | 10 +++++----- .../providers/tensorrt/tensorrt_execution_provider.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index 13d247afd37f7..c3aa8de972752 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -473,7 +473,7 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv throw std::runtime_error("Failed to create directory " + cache_path_); } } - runtime_ = nvinfer1::createInferRuntime(GetTensorrtLogger()); + runtime_ = tensorrt_ptr::unique_pointer(nvinfer1::createInferRuntime(GetTensorrtLogger())); } const std::string engine_decryption_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kDecryptionEnable); @@ -1243,7 +1243,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse &engines_[context->node_name], &contexts_[context->node_name], &builders_[context->node_name], &networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name], input_shape_ranges_[context->node_name], &tensorrt_mu_, &fp16_enable_, &int8_enable_, &max_workspace_size_, - trt_node_name_with_precision, engine_cache_enable_, cache_path_, runtime_, nullptr, + trt_node_name_with_precision, engine_cache_enable_, cache_path_, &runtime_, nullptr, allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_}; *state = p.release(); return 0; @@ -1295,9 +1295,9 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse engine_file.seekg(0, std::ios::beg); std::unique_ptr engine_buf{new char[engine_size]}; engine_file.read((char*)engine_buf.get(), engine_size); - auto runtime_ = trt_state->runtime; + auto runtime = trt_state->runtime->get(); *(trt_state->engine) = tensorrt_ptr::unique_pointer( - runtime_->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); + runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine."); } @@ -1326,7 +1326,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse // Deserialize engine trt_state->context->reset(); trt_state->engine->reset(); - *(trt_state->engine) = tensorrt_ptr::unique_pointer(trt_state->runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); + *(trt_state->engine) = tensorrt_ptr::unique_pointer(trt_state->runtime->get()->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path; if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 073ee6aa275a2..1c38b7b7ca05a 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -99,7 +99,7 @@ struct TensorrtFuncState { std::string trt_node_name_with_precision; bool engine_cache_enable; std::string engine_cache_path; - nvinfer1::IRuntime* runtime = nullptr; + tensorrt_ptr::unique_pointer* runtime = nullptr; nvinfer1::IOptimizationProfile* trt_profile = nullptr; AllocatorPtr scratch_allocator; @@ -149,7 +149,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { bool dump_subgraphs_ = false; bool engine_cache_enable_ = false; std::string cache_path_; - nvinfer1::IRuntime* runtime_ = nullptr; + tensorrt_ptr::unique_pointer runtime_ = nullptr; OrtMutex tensorrt_mu_; int device_id_; AllocatorPtr allocator_; From cd8d64065d616729d929797eb4cd8eda8e02f06b Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 22 Apr 2021 10:01:49 -0700 Subject: [PATCH 2/3] small refactor --- .../core/providers/tensorrt/tensorrt_execution_provider.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index c3aa8de972752..c733764dcd7fd 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1326,7 +1326,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse // Deserialize engine trt_state->context->reset(); trt_state->engine->reset(); - *(trt_state->engine) = tensorrt_ptr::unique_pointer(trt_state->runtime->get()->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); + auto runtime = trt_state->runtime->get(); + *(trt_state->engine) = tensorrt_ptr::unique_pointer(runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path; if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, From 48c86d55b7543d8104c41190f0fb1a205d9b4702 Mon Sep 17 00:00:00 2001 From: Chi Lo Date: Thu, 22 Apr 2021 22:33:45 -0700 Subject: [PATCH 3/3] code refactor --- .../providers/tensorrt/tensorrt_execution_provider.cc | 8 +++----- .../core/providers/tensorrt/tensorrt_execution_provider.h | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index c733764dcd7fd..aa0294a4e12ff 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1243,7 +1243,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse &engines_[context->node_name], &contexts_[context->node_name], &builders_[context->node_name], &networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name], input_shape_ranges_[context->node_name], &tensorrt_mu_, &fp16_enable_, &int8_enable_, &max_workspace_size_, - trt_node_name_with_precision, engine_cache_enable_, cache_path_, &runtime_, nullptr, + trt_node_name_with_precision, engine_cache_enable_, cache_path_, runtime_.get(), nullptr, allocator_, dynamic_range_map, engine_decryption_enable_, engine_decryption_}; *state = p.release(); return 0; @@ -1295,9 +1295,8 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse engine_file.seekg(0, std::ios::beg); std::unique_ptr engine_buf{new char[engine_size]}; engine_file.read((char*)engine_buf.get(), engine_size); - auto runtime = trt_state->runtime->get(); *(trt_state->engine) = tensorrt_ptr::unique_pointer( - runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); + trt_state->runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "TensorRT EP Failed to Build Engine."); } @@ -1326,8 +1325,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector& fuse // Deserialize engine trt_state->context->reset(); trt_state->engine->reset(); - auto runtime = trt_state->runtime->get(); - *(trt_state->engine) = tensorrt_ptr::unique_pointer(runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); + *(trt_state->engine) = tensorrt_ptr::unique_pointer(trt_state->runtime->deserializeCudaEngine(engine_buf.get(), engine_size, nullptr)); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] DeSerialized " + engine_cache_path; if (trt_state->engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 1c38b7b7ca05a..1838cc1c05f0b 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -99,7 +99,7 @@ struct TensorrtFuncState { std::string trt_node_name_with_precision; bool engine_cache_enable; std::string engine_cache_path; - tensorrt_ptr::unique_pointer* runtime = nullptr; + nvinfer1::IRuntime* runtime = nullptr; nvinfer1::IOptimizationProfile* trt_profile = nullptr; AllocatorPtr scratch_allocator;