Address CR comments

microsoft · Jul 27, 2021 · f2d10a3 · f2d10a3
1 parent 98dcd2e
commit f2d10a3
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 14 deletions.
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -61,12 +61,13 @@ static const char* const kOrtSessionOptionsUseDeviceAllocatorForInitializers = "
 static const char* const kOrtSessionOptionsConfigAllowInterOpSpinning = "session.inter_op.allow_spinning";
 static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session.intra_op.allow_spinning";
 
-// Key for disable copy model bytes for ORT format
-// By default we will copy the model bytes for ORT format at the time of session creation to ensure the model bytes
+// Key for using model bytes directly for ORT format
+// If a session is created using an input byte array contains the ORT format model data,
+// By default we will copy the model bytes at the time of session creation to ensure the model bytes
 // buffer is valid.
-// Setting this option to "1" will disable copy the model bytes. The caller has to guarantee that the model bytes
-// are valid until the ORT session using the model bytes is destroyed.
-static const char* const kOrtSessionOptionsConfigDisableCopyORTModelBytes = "session.disable_copy_ort_model_bytes";
+// Setting this option to "1" will disable copy the model bytes, and use the model bytes directly. The caller
+// has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed.
+static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly";
 
 // NNAPI EP keys begin
 // Note: These options should be specified prior to appending the NNAPI EP to the session options object in order for

diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
@@ -1005,9 +1005,9 @@ Status InferenceSession::LoadOrtModel(const std::wstring& model_uri) {
 
 Status InferenceSession::LoadOrtModel(const void* model_data, int model_data_len) {
   return LoadOrtModel([&]() {
-    const auto disable_copy_ort_model_bytes =
-        GetSessionOptions().config_options.GetConfigOrDefault(kOrtSessionOptionsConfigDisableCopyORTModelBytes, "0");
-    if (disable_copy_ort_model_bytes != "1") {
+    const auto use_ort_model_bytes_directly =
+        GetSessionOptions().config_options.GetConfigOrDefault(kOrtSessionOptionsConfigUseORTModelBytesDirectly, "0");
+    if (use_ort_model_bytes_directly != "1") {
       // copy bytes as we need them to be available when InferenceSession::Initialize is called later.
       ort_format_model_bytes_data_holder_.resize(model_data_len);
       std::copy_n(reinterpret_cast<const uint8_t*>(model_data), model_data_len,

diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
@@ -706,15 +706,15 @@ class InferenceSession {
 
   // View of the bytes from an ORT format model.
   // If the session is started with an input byte array contains model data, and the caller
-  // specify that ORT should not copy the model bytes by setting the session config option
-  // "session.disable_copy_ort_model_bytes" to "1"
+  // specifies that ORT should use the model bytes directly by setting the session config option
+  // "session.use_ort_model_bytes_directly" to "1"
   //   We use the the byte array directly without copy to reduce peak memory usage
   //   (Short term) This will require the user to guarantee the life time of the model data
   //   until the session is created.
   //   (Longer term) If we are going to use the memory offsets directly for initializers, the model data
   //   should be alive until the InferenceSession goes away.
   // If the session is started with an input byte array contains model data, and the caller does not
-  // specify ORT should not copy the model bytes
+  // specify ORT should use the model bytes directly
   // Or the session is started with a model_uri
   //   We store them currently in the ort_format_model_bytes_data_holder_ to make the Load + Initialize
   //   behave the same way as for an ONNX model, as we need some of the bytes for the Load (create the Model)
@@ -724,8 +724,10 @@ class InferenceSession {
   // those into new OrtValue instances, at which point we won't free them until the InferenceSession goes away.
   gsl::span<const uint8_t> ort_format_model_bytes_;
 
-  // This holds the actual model data when the session is started with a model_uri
-  // In case if the session is started with an input byte array contains model data, this will be empty
+  // This holds the actual model data
+  // In case if the session is started with an input byte array contains model data, and the caller
+  // specifies that ORT should use the model bytes directly by setting the session config option
+  // "session.use_ort_model_bytes_directly" to "1", this will be empty
   std::vector<uint8_t> ort_format_model_bytes_data_holder_;
 
   std::shared_ptr<onnxruntime::AllocatorManager> allocator_manager_;

diff --git a/onnxruntime/test/framework/ort_model_only_test.cc b/onnxruntime/test/framework/ort_model_only_test.cc
@@ -48,7 +48,7 @@ static void RunOrtModel(const OrtModelTestInfo& test_info) {
   }
 
   if (test_info.disable_copy_ort_buffer) {
-    so.config_options.AddConfigEntry(kOrtSessionOptionsConfigDisableCopyORTModelBytes, "1");
+    so.config_options.AddConfigEntry(kOrtSessionOptionsConfigUseORTModelBytesDirectly, "1");
   }
 
   std::vector<char> model_data;