Skip to content

Commit

Permalink
Address CR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
guoyu-wang committed Jul 27, 2021
1 parent 98dcd2e commit f2d10a3
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,13 @@ static const char* const kOrtSessionOptionsUseDeviceAllocatorForInitializers = "
static const char* const kOrtSessionOptionsConfigAllowInterOpSpinning = "session.inter_op.allow_spinning";
static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session.intra_op.allow_spinning";

// Key for disable copy model bytes for ORT format
// By default we will copy the model bytes for ORT format at the time of session creation to ensure the model bytes
// Key for using model bytes directly for ORT format
// If a session is created using an input byte array contains the ORT format model data,
// By default we will copy the model bytes at the time of session creation to ensure the model bytes
// buffer is valid.
// Setting this option to "1" will disable copy the model bytes. The caller has to guarantee that the model bytes
// are valid until the ORT session using the model bytes is destroyed.
static const char* const kOrtSessionOptionsConfigDisableCopyORTModelBytes = "session.disable_copy_ort_model_bytes";
// Setting this option to "1" will disable copy the model bytes, and use the model bytes directly. The caller
// has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed.
static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly";

// NNAPI EP keys begin
// Note: These options should be specified prior to appending the NNAPI EP to the session options object in order for
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1005,9 +1005,9 @@ Status InferenceSession::LoadOrtModel(const std::wstring& model_uri) {

Status InferenceSession::LoadOrtModel(const void* model_data, int model_data_len) {
return LoadOrtModel([&]() {
const auto disable_copy_ort_model_bytes =
GetSessionOptions().config_options.GetConfigOrDefault(kOrtSessionOptionsConfigDisableCopyORTModelBytes, "0");
if (disable_copy_ort_model_bytes != "1") {
const auto use_ort_model_bytes_directly =
GetSessionOptions().config_options.GetConfigOrDefault(kOrtSessionOptionsConfigUseORTModelBytesDirectly, "0");
if (use_ort_model_bytes_directly != "1") {
// copy bytes as we need them to be available when InferenceSession::Initialize is called later.
ort_format_model_bytes_data_holder_.resize(model_data_len);
std::copy_n(reinterpret_cast<const uint8_t*>(model_data), model_data_len,
Expand Down
12 changes: 7 additions & 5 deletions onnxruntime/core/session/inference_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -706,15 +706,15 @@ class InferenceSession {

// View of the bytes from an ORT format model.
// If the session is started with an input byte array contains model data, and the caller
// specify that ORT should not copy the model bytes by setting the session config option
// "session.disable_copy_ort_model_bytes" to "1"
// specifies that ORT should use the model bytes directly by setting the session config option
// "session.use_ort_model_bytes_directly" to "1"
// We use the the byte array directly without copy to reduce peak memory usage
// (Short term) This will require the user to guarantee the life time of the model data
// until the session is created.
// (Longer term) If we are going to use the memory offsets directly for initializers, the model data
// should be alive until the InferenceSession goes away.
// If the session is started with an input byte array contains model data, and the caller does not
// specify ORT should not copy the model bytes
// specify ORT should use the model bytes directly
// Or the session is started with a model_uri
// We store them currently in the ort_format_model_bytes_data_holder_ to make the Load + Initialize
// behave the same way as for an ONNX model, as we need some of the bytes for the Load (create the Model)
Expand All @@ -724,8 +724,10 @@ class InferenceSession {
// those into new OrtValue instances, at which point we won't free them until the InferenceSession goes away.
gsl::span<const uint8_t> ort_format_model_bytes_;

// This holds the actual model data when the session is started with a model_uri
// In case if the session is started with an input byte array contains model data, this will be empty
// This holds the actual model data
// In case if the session is started with an input byte array contains model data, and the caller
// specifies that ORT should use the model bytes directly by setting the session config option
// "session.use_ort_model_bytes_directly" to "1", this will be empty
std::vector<uint8_t> ort_format_model_bytes_data_holder_;

std::shared_ptr<onnxruntime::AllocatorManager> allocator_manager_;
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/framework/ort_model_only_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ static void RunOrtModel(const OrtModelTestInfo& test_info) {
}

if (test_info.disable_copy_ort_buffer) {
so.config_options.AddConfigEntry(kOrtSessionOptionsConfigDisableCopyORTModelBytes, "1");
so.config_options.AddConfigEntry(kOrtSessionOptionsConfigUseORTModelBytesDirectly, "1");
}

std::vector<char> model_data;
Expand Down

0 comments on commit f2d10a3

Please sign in to comment.