Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SetEpDynamicOptions and remove workload_type from run/session options #22282

Merged
merged 10 commits into from
Oct 10, 2024
Merged
5 changes: 4 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,8 @@
"-build/include_subdir",
"-runtime/references"
],
"C_Cpp.autoAddFileAssociations": false
"C_Cpp.autoAddFileAssociations": false,
"githubPullRequests.ignoredPullRequestBranches": [
"main"
]
}
9 changes: 9 additions & 0 deletions include/onnxruntime/core/framework/execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,15 @@ class IExecutionProvider {
return Status::OK();
}

/**
Called when InferenceSession::SetEpDynamicOptions is called
TODO: what is the right way of passing parameters?
*/
virtual common::Status SetEpDynamicOptions(gsl::span<const char*> /*keys*/,
gsl::span<const char*> /*values*/) {
return Status::OK();
}

/**
Indicate whether the graph capturing mode (e.g., cuda graph) is enabled for
the provider.
Expand Down
18 changes: 18 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -4722,6 +4722,24 @@ struct OrtApi {
* \param[in] adapter OrtLoraAdapter instance
*/
ORT_API2_STATUS(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);

/// @}
/// \name OrtEpDynamicOptions
/// @{

/** \brief Set DynamicOptions for EPs
l31g marked this conversation as resolved.
Show resolved Hide resolved
l31g marked this conversation as resolved.
Show resolved Hide resolved
*
* \param[in] session
* \param[in] list of keys represented by null-terminated strings
* \param[in] list of values represented by null-terminated strings
* \param[in] number of key-value pairs
*
* \since Version xxx
* @TODO: update version number
l31g marked this conversation as resolved.
Show resolved Hide resolved
* @TODO: should it be SetExecutionProvider... instead of SetEp...?
l31g marked this conversation as resolved.
Show resolved Hide resolved
*/
ORT_API2_STATUS(SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
_In_reads_(kv_len) const char* const* values, _In_ size_t kv_len);
l31g marked this conversation as resolved.
Show resolved Hide resolved
};

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,3 @@ static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_con
// If the value is set to -1, cuda graph capture/replay is disabled in that run.
// User are not expected to set the value to 0 as it is reserved for internal use.
static const char* const kOrtRunOptionsConfigCudaGraphAnnotation = "gpu_graph_id";

// Specify the type of workload for this run.
// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
static const char* const kOrtRunOptionsWorkloadType = "run.workload_type";
l31g marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,3 @@ static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas
// Refer to MatMulNBits op schema for more details.
// If not provided, default is 4.
static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";

// Specify the type of workload for this session.
// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
static const char* const kOrtSessionOptionsWorkloadType = "session.workload_type";
32 changes: 32 additions & 0 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2475,6 +2475,38 @@
};
} // namespace

// TODO: are we going to do all the ttracing and logging and telemetry for this?

Check warning on line 2478 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2478: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
// TODO: is this the right type? How do we convert to string?

Check warning on line 2479 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2479: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
Status InferenceSession::SetEpDynamicOptions(gsl::span<const char*> keys,
gsl::span<const char*> values) {

Check warning on line 2481 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not indent within a namespace. [whitespace/indent_namespace] [4] Raw Output: onnxruntime/core/session/inference_session.cc:2481: Do not indent within a namespace. [whitespace/indent_namespace] [4]
Status retval = Status::OK();

ORT_TRY {
l31g marked this conversation as resolved.
Show resolved Hide resolved
if (!is_inited_) {
LOGS(*session_logger_, ERROR) << "Session was not initialized";
return Status(common::ONNXRUNTIME, common::FAIL, "Session not initialized.");
}

// info all execution providers InferenceSession:Run started
// TODO: only call SetEpDynamicOptions for all providers in-use

Check warning on line 2491 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2491: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
for (auto& xp : execution_providers_) {
auto status = xp->SetEpDynamicOptions(keys, values);
ORT_CHECK_AND_SET_RETVAL(status);
}
}

l31g marked this conversation as resolved.
Show resolved Hide resolved
ORT_CATCH(const std::exception& e) {
ORT_HANDLE_EXCEPTION([&]() {
retval = Status(common::ONNXRUNTIME, common::FAIL, e.what());
});
}
ORT_CATCH(...) {
retval = Status(common::ONNXRUNTIME, common::RUNTIME_EXCEPTION, "Encountered unknown exception in SetEpDynamicOptions()");

Check warning on line 2504 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2504: Lines should be <= 120 characters long [whitespace/line_length] [2]
}

return retval;
}

Status InferenceSession::Run(const RunOptions& run_options,
gsl::span<const std::string> feed_names, gsl::span<const OrtValue> feeds,
gsl::span<const std::string> output_names, std::vector<OrtValue>* p_fetches,
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/session/inference_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,9 @@ class InferenceSession {
*/
[[nodiscard]] common::Status Initialize();

[[nodiscard]] common::Status SetEpDynamicOptions(gsl::span<const char*> keys,
pranavsharma marked this conversation as resolved.
Show resolved Hide resolved
gsl::span<const char*> values);

[[nodiscard]] common::Status Run(const RunOptions& run_options, gsl::span<const std::string> feed_names,
gsl::span<const OrtValue> feeds, gsl::span<const std::string> output_names,
std::vector<OrtValue>* p_fetches,
Expand Down
23 changes: 23 additions & 0 deletions onnxruntime/core/session/onnxruntime_c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,27 @@

} // namespace

ORT_API_STATUS_IMPL(OrtApis::SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
_In_reads_(kv_len) const char* const* values, _In_ size_t kv_len) {
API_IMPL_BEGIN
auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess);

auto keys_span = gsl::make_span(keys, kv_len);
l31g marked this conversation as resolved.
Show resolved Hide resolved
auto values_span = gsl::make_span(values, kv_len);

Status status;

if (kv_len == 0) {
// TODO: how does one return OK or no_values_passed

Check warning on line 857 in onnxruntime/core/session/onnxruntime_c_api.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/onnxruntime_c_api.cc:857: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
status = Status::OK();
l31g marked this conversation as resolved.
Show resolved Hide resolved
} else {
status = session->SetEpDynamicOptions(keys_span,

Check failure on line 860 in onnxruntime/core/session/onnxruntime_c_api.cc

View workflow job for this annotation

GitHub Actions / Vcpkg

no viable conversion from 'span<const char *const>' to 'span<const char *>'
values_span);
}
return ToOrtStatus(status);
API_IMPL_END
}

ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options,
_In_reads_(input_len) const char* const* input_names,
_In_reads_(input_len) const OrtValue* const* input, size_t input_len,
Expand Down Expand Up @@ -2785,6 +2806,8 @@
&OrtApis::CreateLoraAdapterFromArray,
&OrtApis::ReleaseLoraAdapter,
&OrtApis::RunOptionsAddActiveLoraAdapter,

&OrtApis::SetEpDynamicOptions,
};

// OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase.
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/session/ort_apis.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,4 +531,6 @@
ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*);
ORT_API_STATUS_IMPL(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);

ORT_API_STATUS_IMPL(SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
_In_reads_(kv_len) const char* const* values, _In_ size_t kv_len);

Check warning on line 535 in onnxruntime/core/session/ort_apis.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not indent within a namespace. [whitespace/indent_namespace] [4] Raw Output: onnxruntime/core/session/ort_apis.h:535: Do not indent within a namespace. [whitespace/indent_namespace] [4]
} // namespace OrtApis
Loading