microsoft · jywu-msft · Oct 10, 2024 · Oct 1, 2024 · Oct 1, 2024 · Oct 2, 2024
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -22,5 +22,8 @@
  "-build/include_subdir",
  "-runtime/references"
  ],
- "C_Cpp.autoAddFileAssociations": false
+ "C_Cpp.autoAddFileAssociations": false,
+ "githubPullRequests.ignoredPullRequestBranches": [
+ "main"
+ ]
 }
diff --git a/include/onnxruntime/core/framework/execution_provider.h b/include/onnxruntime/core/framework/execution_provider.h
@@ -214,6 +214,15 @@ class IExecutionProvider {
  return Status::OK();
  }
 
+ /**
+ Called when InferenceSession::SetEpDynamicOptions is called
+ TODO: what is the right way of passing parameters?
+ */
+ virtual common::Status SetEpDynamicOptions(gsl::span<const char*> /*keys*/,
+ gsl::span<const char*> /*values*/) {
+ return Status::OK();
+ }
+
  /**
  Indicate whether the graph capturing mode (e.g., cuda graph) is enabled for
  the provider.

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -4722,6 +4722,24 @@ struct OrtApi {
  * \param[in] adapter OrtLoraAdapter instance
  */
  ORT_API2_STATUS(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);
+
+ /// @}
+ /// \name OrtEpDynamicOptions
+ /// @{
+
+ /** \brief Set DynamicOptions for EPs
+ *
+ * \param[in] session
+ * \param[in] list of keys represented by null-terminated strings
+ * \param[in] list of values represented by null-terminated strings
+ * \param[in] number of key-value pairs
+ *
+ * \since Version xxx
+ * @TODO: update version number
+ * @TODO: should it be SetExecutionProvider... instead of SetEp...?
+ */
+ ORT_API2_STATUS(SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
+ _In_reads_(kv_len) const char* const* values, _In_ size_t kv_len);
 };
 
 /*

diff --git a/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h
@@ -49,8 +49,3 @@ static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_con
 // If the value is set to -1, cuda graph capture/replay is disabled in that run.
 // User are not expected to set the value to 0 as it is reserved for internal use.
 static const char* const kOrtRunOptionsConfigCudaGraphAnnotation = "gpu_graph_id";
-
-// Specify the type of workload for this run.
-// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
-// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
-static const char* const kOrtRunOptionsWorkloadType = "run.workload_type";
diff --git a/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h b/include/onnxruntime/core/session/onnxruntime_session_options_config_keys.h
@@ -282,8 +282,3 @@ static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas
 // Refer to MatMulNBits op schema for more details.
 // If not provided, default is 4.
 static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";
-
-// Specify the type of workload for this session.
-// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
-// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
-static const char* const kOrtSessionOptionsWorkloadType = "session.workload_type";
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
@@ -2475,6 +2475,38 @@
 };
 } // namespace
 
+// TODO: are we going to do all the ttracing and logging and telemetry for this?
+// TODO: is this the right type? How do we convert to string?
+Status InferenceSession::SetEpDynamicOptions(gsl::span<const char*> keys,
+ gsl::span<const char*> values) {
+ Status retval = Status::OK();
+
+ ORT_TRY {
+ if (!is_inited_) {
+ LOGS(*session_logger_, ERROR) << "Session was not initialized";
+ return Status(common::ONNXRUNTIME, common::FAIL, "Session not initialized.");
+ }
+
+ // info all execution providers InferenceSession:Run started
+ // TODO: only call SetEpDynamicOptions for all providers in-use
+ for (auto& xp : execution_providers_) {
+ auto status = xp->SetEpDynamicOptions(keys, values);
+ ORT_CHECK_AND_SET_RETVAL(status);
+ }
+ }
+
+ ORT_CATCH(const std::exception& e) {
+ ORT_HANDLE_EXCEPTION([&]() {
+ retval = Status(common::ONNXRUNTIME, common::FAIL, e.what());
+ });
+ }
+ ORT_CATCH(...) {
+ retval = Status(common::ONNXRUNTIME, common::RUNTIME_EXCEPTION, "Encountered unknown exception in SetEpDynamicOptions()");
+ }
+
+ return retval;
+}
+
 Status InferenceSession::Run(const RunOptions& run_options,
  gsl::span<const std::string> feed_names, gsl::span<const OrtValue> feeds,
  gsl::span<const std::string> output_names, std::vector<OrtValue>* p_fetches,

diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
@@ -330,6 +330,9 @@ class InferenceSession {
  */
  [[nodiscard]] common::Status Initialize();
 
+ [[nodiscard]] common::Status SetEpDynamicOptions(gsl::span<const char*> keys,
+ gsl::span<const char*> values);
+
  [[nodiscard]] common::Status Run(const RunOptions& run_options, gsl::span<const std::string> feed_names,
  gsl::span<const OrtValue> feeds, gsl::span<const std::string> output_names,
  std::vector<OrtValue>* p_fetches,

diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -843,6 +843,27 @@
 
 } // namespace
 
+ORT_API_STATUS_IMPL(OrtApis::SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
+ _In_reads_(kv_len) const char* const* values, _In_ size_t kv_len) {
+ API_IMPL_BEGIN
+ auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess);
+
+ auto keys_span = gsl::make_span(keys, kv_len);
+ auto values_span = gsl::make_span(values, kv_len);
+
+ Status status;
+
+ if (kv_len == 0) {
+ // TODO: how does one return OK or no_values_passed
+ status = Status::OK();
+ } else {
+ status = session->SetEpDynamicOptions(keys_span,
+ values_span);
+ }
+ return ToOrtStatus(status);
+ API_IMPL_END
+}
+
 ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options,
  _In_reads_(input_len) const char* const* input_names,
  _In_reads_(input_len) const OrtValue* const* input, size_t input_len,
@@ -2785,6 +2806,8 @@
  &OrtApis::CreateLoraAdapterFromArray,
  &OrtApis::ReleaseLoraAdapter,
  &OrtApis::RunOptionsAddActiveLoraAdapter,
+
+ &OrtApis::SetEpDynamicOptions,
 };
 
 // OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase.

diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
@@ -531,4 +531,6 @@
 ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*);
 ORT_API_STATUS_IMPL(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);
 
+ORT_API_STATUS_IMPL(SetEpDynamicOptions, _Inout_ OrtSession* sess, _In_reads_(kv_len) const char* const* keys,
+ _In_reads_(kv_len) const char* const* values, _In_ size_t kv_len);
 } // namespace OrtApis