From 489b56420395bf2f24e037365b7aa410a6b0806b Mon Sep 17 00:00:00 2001 From: Hadi Temmar Date: Thu, 6 Nov 2025 17:02:02 +0100 Subject: [PATCH 1/4] Option to specify aux streams used by TRT RTX. Necessary for CIG priority --- .../nv_tensorrt_rtx/nv_provider_options.h | 6 ++++++ .../nv_tensorrt_rtx/nv_execution_provider.cc | 18 ++++++++++++++++++ .../nv_tensorrt_rtx/nv_execution_provider.h | 2 ++ .../nv_execution_provider_info.cc | 16 ++++++++++++++++ .../nv_execution_provider_info.h | 2 ++ 5 files changed, 44 insertions(+) diff --git a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h index 026fc3b2dc0a0..941e0b2831702 100644 --- a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h +++ b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h @@ -7,8 +7,11 @@ * - `kDeviceId`: Specifies the GPU device ID to use. * - `kHasUserComputeStream`: Indicates whether a user-provided compute stream is used. * - `kUserComputeStream`: Specifies the user-provided compute stream. + * - `kHasUserAuxStream`: Indicates whether a user-provided aux stream is used. + * - `kUserAuxStreams`: Specifies the user-provided aux stream. * - `kMaxWorkspaceSize`: Sets the maximum workspace size for GPU memory allocation. * - 'kMaxSharedMemSize': Sets the maximum amount of shared memory that TensorRT kernels are allowed to use + * - `kMaxAuxiliaryStreams`: Sets the maximum number of auxiliary streams for TensorRT execution. * - `kDumpSubgraphs`: Enables or disables dumping of subgraphs for debugging. * - `kDetailedBuildLog`: Enables or disables detailed build logs for debugging. * - `kProfilesMinShapes`: Specifies the minimum shapes for profiling. @@ -24,8 +27,11 @@ namespace provider_option_names { constexpr const char* kDeviceId = "device_id"; constexpr const char* kHasUserComputeStream = "has_user_compute_stream"; constexpr const char* kUserComputeStream = "user_compute_stream"; +constexpr const char* kHasUserAuxStream = "has_user_aux_streams"; +constexpr const char* kUserAuxStreams = "user_aux_streams"; constexpr const char* kMaxWorkspaceSize = "nv_max_workspace_size"; constexpr const char* kMaxSharedMemSize = "nv_max_shared_mem_size"; +constexpr const char* kMaxAuxiliaryStreams = "nv_max_auxiliary_streams"; constexpr const char* kDumpSubgraphs = "nv_dump_subgraphs"; constexpr const char* kDetailedBuildLog = "nv_detailed_build_log"; constexpr const char* kProfilesMinShapes = "nv_profile_min_shapes"; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index 62210d65848d1..fd554f5d43426 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -964,6 +964,14 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) stream_ = nullptr; // Will be created in compute function } + if (info.has_user_aux_streams) { + external_aux_streams_ = true; + aux_streams_ = static_cast(info.user_aux_streams); + } else { + external_aux_streams_ = false; + aux_streams_ = nullptr; + } + std::string profile_min_shapes, profile_max_shapes, profile_opt_shapes; // incase the EP context is dumped the engine cache has to be enabled @@ -2991,6 +2999,11 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "NvTensorRTRTX EP select an optimization profile for the current context failed"); } + // Set auxiliary stream if provided by user + if (external_aux_streams_ && aux_streams_ != nullptr) { + trt_context->setAuxStreams(&aux_streams_, (int32_t)auxiliary_streams_); + } + // Check before using trt_engine if (trt_engine == nullptr) { return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "No engine is found."); @@ -3402,6 +3415,11 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra } } + // Set auxiliary stream if provided by user + if (external_aux_streams_ && aux_streams_ != nullptr) { + trt_context->setAuxStreams(&aux_streams_, (int32_t)auxiliary_streams_); + } + // Start CUDA graph capture with the correct stream // Note: We need to set the stream and start capture here because this is where we have access to the actual compute stream // Get the graph annotation ID that was stored during OnRunStart diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h index bb8f687db094f..253481450864f 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.h @@ -349,6 +349,8 @@ class NvExecutionProvider : public IExecutionProvider { mutable NvExecutionProviderInfo info_; bool external_stream_ = false; cudaStream_t stream_ = nullptr; + bool external_aux_streams_ = false; + cudaStream_t aux_streams_ = nullptr; int max_partition_iterations_ = 1000; size_t min_subgraph_size_ = 1; size_t max_workspace_size_ = 0; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc index f25718114891b..1c07a0a90d6cb 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc @@ -16,6 +16,7 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi const ConfigOptions& session_options) { NvExecutionProviderInfo info{}; void* user_compute_stream = nullptr; + void* user_aux_streams = nullptr; void* onnx_bytestream = nullptr; void* external_data_bytestream = nullptr; ORT_THROW_IF_ERROR( @@ -41,8 +42,18 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi user_compute_stream = reinterpret_cast(address); return Status::OK(); }) + .AddAssignmentToReference(nv::provider_option_names::kHasUserAuxStream, info.has_user_aux_streams) + .AddValueParser( + nv::provider_option_names::kUserAuxStreams, + [&user_aux_streams](const std::string& value_str) -> Status { + size_t address; + ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); + user_aux_streams = reinterpret_cast(address); + return Status::OK(); + }) .AddAssignmentToReference(nv::provider_option_names::kMaxWorkspaceSize, info.max_workspace_size) .AddAssignmentToReference(nv::provider_option_names::kMaxSharedMemSize, info.max_shared_mem_size) + .AddAssignmentToReference(nv::provider_option_names::kMaxAuxiliaryStreams, info.auxiliary_streams) .AddAssignmentToReference(nv::provider_option_names::kDumpSubgraphs, info.dump_subgraphs) .AddAssignmentToReference(nv::provider_option_names::kDetailedBuildLog, info.detailed_build_log) .AddAssignmentToReference(nv::provider_option_names::kProfilesMinShapes, info.profile_min_shapes) @@ -56,6 +67,8 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi info.user_compute_stream = user_compute_stream; info.has_user_compute_stream = (user_compute_stream != nullptr); + info.user_aux_streams = user_aux_streams; + info.has_user_aux_streams = (user_aux_streams != nullptr); info.onnx_bytestream = onnx_bytestream; info.external_data_bytestream = external_data_bytestream; @@ -98,8 +111,11 @@ ProviderOptions NvExecutionProviderInfo::ToProviderOptions(const NvExecutionProv {nv::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)}, {nv::provider_option_names::kHasUserComputeStream, MakeStringWithClassicLocale(info.has_user_compute_stream)}, {nv::provider_option_names::kUserComputeStream, MakeStringWithClassicLocale(reinterpret_cast(info.user_compute_stream))}, + {nv::provider_option_names::kHasUserAuxStream, MakeStringWithClassicLocale(info.has_user_aux_streams)}, + {nv::provider_option_names::kUserAuxStreams, MakeStringWithClassicLocale(reinterpret_cast(info.user_aux_streams))}, {nv::provider_option_names::kMaxWorkspaceSize, MakeStringWithClassicLocale(info.max_workspace_size)}, {nv::provider_option_names::kMaxSharedMemSize, MakeStringWithClassicLocale(info.max_shared_mem_size)}, + {nv::provider_option_names::kMaxAuxiliaryStreams, MakeStringWithClassicLocale(info.auxiliary_streams)}, {nv::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.dump_subgraphs)}, {nv::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.detailed_build_log)}, {nv::provider_option_names::kProfilesMinShapes, MakeStringWithClassicLocale(info.profile_min_shapes)}, diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h index 372e8196f38c2..759176e651f8d 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h @@ -21,6 +21,8 @@ struct NvExecutionProviderInfo { int device_id{0}; bool has_user_compute_stream{false}; void* user_compute_stream{nullptr}; + bool has_user_aux_streams{false}; + void* user_aux_streams{nullptr}; int max_partition_iterations{1000}; int min_subgraph_size{1}; size_t max_workspace_size{0}; From 98089434b9a4cb1264ff89709047cafc24b5b081 Mon Sep 17 00:00:00 2001 From: Hadi Temmar Date: Fri, 7 Nov 2025 10:19:13 +0100 Subject: [PATCH 2/4] make sure that auxiliary_streams is higher than 0 if user provide external aux stream --- .../providers/nv_tensorrt_rtx/nv_execution_provider.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index fd554f5d43426..a32ddd2a13a5e 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -965,6 +965,9 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) } if (info.has_user_aux_streams) { + if(info.auxiliary_streams <= 0){ + ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Auxiliary streams must be greater than 0 when using external auxiliary streams")); + } external_aux_streams_ = true; aux_streams_ = static_cast(info.user_aux_streams); } else { @@ -3001,6 +3004,9 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr // Set auxiliary stream if provided by user if (external_aux_streams_ && aux_streams_ != nullptr) { + if(auxiliary_streams_ <= 0){ + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Auxiliary streams must be greater than 0 when using external auxiliary streams"); + } trt_context->setAuxStreams(&aux_streams_, (int32_t)auxiliary_streams_); } @@ -3417,6 +3423,9 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra // Set auxiliary stream if provided by user if (external_aux_streams_ && aux_streams_ != nullptr) { + if(auxiliary_streams_ <= 0){ + return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Auxiliary streams must be greater than 0 when using external auxiliary streams"); + } trt_context->setAuxStreams(&aux_streams_, (int32_t)auxiliary_streams_); } From d30a3cbf7e862a80f6c2327d7c53f71b632f05b2 Mon Sep 17 00:00:00 2001 From: Hadi Temmar Date: Sat, 8 Nov 2025 20:56:54 +0100 Subject: [PATCH 3/4] update aux streams to aux stream array --- .../nv_tensorrt_rtx/nv_provider_options.h | 10 +++++----- .../nv_tensorrt_rtx/nv_execution_provider.cc | 4 ++-- .../nv_execution_provider_info.cc | 18 +++++++++--------- .../nv_execution_provider_info.h | 4 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h index 941e0b2831702..4afaf4186afd0 100644 --- a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h +++ b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h @@ -7,11 +7,11 @@ * - `kDeviceId`: Specifies the GPU device ID to use. * - `kHasUserComputeStream`: Indicates whether a user-provided compute stream is used. * - `kUserComputeStream`: Specifies the user-provided compute stream. - * - `kHasUserAuxStream`: Indicates whether a user-provided aux stream is used. - * - `kUserAuxStreams`: Specifies the user-provided aux stream. + * - `kHasUserAuxStreamArray`: Indicates whether a user-provided aux stream is used. + * - `kUserAuxStreamArray`: Specifies the user-provided aux stream. * - `kMaxWorkspaceSize`: Sets the maximum workspace size for GPU memory allocation. * - 'kMaxSharedMemSize': Sets the maximum amount of shared memory that TensorRT kernels are allowed to use - * - `kMaxAuxiliaryStreams`: Sets the maximum number of auxiliary streams for TensorRT execution. + * - `kMaxAuxiliaryStreams`: Sets the maximum number of auxiliary streams for TensorRT execution. Also, should correspond to the size of kUserAuxStreamArray, if provided. * - `kDumpSubgraphs`: Enables or disables dumping of subgraphs for debugging. * - `kDetailedBuildLog`: Enables or disables detailed build logs for debugging. * - `kProfilesMinShapes`: Specifies the minimum shapes for profiling. @@ -27,8 +27,8 @@ namespace provider_option_names { constexpr const char* kDeviceId = "device_id"; constexpr const char* kHasUserComputeStream = "has_user_compute_stream"; constexpr const char* kUserComputeStream = "user_compute_stream"; -constexpr const char* kHasUserAuxStream = "has_user_aux_streams"; -constexpr const char* kUserAuxStreams = "user_aux_streams"; +constexpr const char* kHasUserAuxStreamArray = "has_user_aux_stream_array"; +constexpr const char* kUserAuxStreamArray = "user_aux_stream_array"; constexpr const char* kMaxWorkspaceSize = "nv_max_workspace_size"; constexpr const char* kMaxSharedMemSize = "nv_max_shared_mem_size"; constexpr const char* kMaxAuxiliaryStreams = "nv_max_auxiliary_streams"; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index a32ddd2a13a5e..670185299a607 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -964,12 +964,12 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) stream_ = nullptr; // Will be created in compute function } - if (info.has_user_aux_streams) { + if (info.has_user_aux_stream_array) { if(info.auxiliary_streams <= 0){ ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Auxiliary streams must be greater than 0 when using external auxiliary streams")); } external_aux_streams_ = true; - aux_streams_ = static_cast(info.user_aux_streams); + aux_streams_ = static_cast(info.user_aux_stream_array); } else { external_aux_streams_ = false; aux_streams_ = nullptr; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc index 1c07a0a90d6cb..08d13817cace5 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc @@ -16,7 +16,7 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi const ConfigOptions& session_options) { NvExecutionProviderInfo info{}; void* user_compute_stream = nullptr; - void* user_aux_streams = nullptr; + void* user_aux_stream_array = nullptr; void* onnx_bytestream = nullptr; void* external_data_bytestream = nullptr; ORT_THROW_IF_ERROR( @@ -42,13 +42,13 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi user_compute_stream = reinterpret_cast(address); return Status::OK(); }) - .AddAssignmentToReference(nv::provider_option_names::kHasUserAuxStream, info.has_user_aux_streams) + .AddAssignmentToReference(nv::provider_option_names::kHasUserAuxStreamArray, info.has_user_aux_stream_array) .AddValueParser( - nv::provider_option_names::kUserAuxStreams, - [&user_aux_streams](const std::string& value_str) -> Status { + nv::provider_option_names::kUserAuxStreamArray, + [&user_aux_stream_array](const std::string& value_str) -> Status { size_t address; ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); - user_aux_streams = reinterpret_cast(address); + user_aux_stream_array = reinterpret_cast(address); return Status::OK(); }) .AddAssignmentToReference(nv::provider_option_names::kMaxWorkspaceSize, info.max_workspace_size) @@ -67,8 +67,8 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi info.user_compute_stream = user_compute_stream; info.has_user_compute_stream = (user_compute_stream != nullptr); - info.user_aux_streams = user_aux_streams; - info.has_user_aux_streams = (user_aux_streams != nullptr); + info.user_aux_stream_array = user_aux_stream_array; + info.has_user_aux_stream_array = (user_aux_stream_array != nullptr); info.onnx_bytestream = onnx_bytestream; info.external_data_bytestream = external_data_bytestream; @@ -111,8 +111,8 @@ ProviderOptions NvExecutionProviderInfo::ToProviderOptions(const NvExecutionProv {nv::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)}, {nv::provider_option_names::kHasUserComputeStream, MakeStringWithClassicLocale(info.has_user_compute_stream)}, {nv::provider_option_names::kUserComputeStream, MakeStringWithClassicLocale(reinterpret_cast(info.user_compute_stream))}, - {nv::provider_option_names::kHasUserAuxStream, MakeStringWithClassicLocale(info.has_user_aux_streams)}, - {nv::provider_option_names::kUserAuxStreams, MakeStringWithClassicLocale(reinterpret_cast(info.user_aux_streams))}, + {nv::provider_option_names::kHasUserAuxStreamArray, MakeStringWithClassicLocale(info.has_user_aux_stream_array)}, + {nv::provider_option_names::kUserAuxStreamArray, MakeStringWithClassicLocale(reinterpret_cast(info.user_aux_stream_array))}, {nv::provider_option_names::kMaxWorkspaceSize, MakeStringWithClassicLocale(info.max_workspace_size)}, {nv::provider_option_names::kMaxSharedMemSize, MakeStringWithClassicLocale(info.max_shared_mem_size)}, {nv::provider_option_names::kMaxAuxiliaryStreams, MakeStringWithClassicLocale(info.auxiliary_streams)}, diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h index 759176e651f8d..9caf54a4cb757 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h @@ -21,8 +21,8 @@ struct NvExecutionProviderInfo { int device_id{0}; bool has_user_compute_stream{false}; void* user_compute_stream{nullptr}; - bool has_user_aux_streams{false}; - void* user_aux_streams{nullptr}; + bool has_user_aux_stream_array{false}; + void* user_aux_stream_array{nullptr}; int max_partition_iterations{1000}; int min_subgraph_size{1}; size_t max_workspace_size{0}; From fbfaab4600219386149f06cdd7da43a0ec732d57 Mon Sep 17 00:00:00 2001 From: Hadi Temmar Date: Thu, 13 Nov 2025 17:30:27 +0100 Subject: [PATCH 4/4] remove kHasUserAuxStreamArray --- .../core/providers/nv_tensorrt_rtx/nv_provider_options.h | 2 -- .../core/providers/nv_tensorrt_rtx/nv_execution_provider.cc | 2 +- .../providers/nv_tensorrt_rtx/nv_execution_provider_info.cc | 3 --- .../providers/nv_tensorrt_rtx/nv_execution_provider_info.h | 1 - 4 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h index 4afaf4186afd0..60f03074f5a6d 100644 --- a/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h +++ b/include/onnxruntime/core/providers/nv_tensorrt_rtx/nv_provider_options.h @@ -7,7 +7,6 @@ * - `kDeviceId`: Specifies the GPU device ID to use. * - `kHasUserComputeStream`: Indicates whether a user-provided compute stream is used. * - `kUserComputeStream`: Specifies the user-provided compute stream. - * - `kHasUserAuxStreamArray`: Indicates whether a user-provided aux stream is used. * - `kUserAuxStreamArray`: Specifies the user-provided aux stream. * - `kMaxWorkspaceSize`: Sets the maximum workspace size for GPU memory allocation. * - 'kMaxSharedMemSize': Sets the maximum amount of shared memory that TensorRT kernels are allowed to use @@ -27,7 +26,6 @@ namespace provider_option_names { constexpr const char* kDeviceId = "device_id"; constexpr const char* kHasUserComputeStream = "has_user_compute_stream"; constexpr const char* kUserComputeStream = "user_compute_stream"; -constexpr const char* kHasUserAuxStreamArray = "has_user_aux_stream_array"; constexpr const char* kUserAuxStreamArray = "user_aux_stream_array"; constexpr const char* kMaxWorkspaceSize = "nv_max_workspace_size"; constexpr const char* kMaxSharedMemSize = "nv_max_shared_mem_size"; diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc index 670185299a607..b2185d65f38db 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider.cc @@ -964,7 +964,7 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info) stream_ = nullptr; // Will be created in compute function } - if (info.has_user_aux_stream_array) { + if (info.user_aux_stream_array != nullptr) { if(info.auxiliary_streams <= 0){ ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Auxiliary streams must be greater than 0 when using external auxiliary streams")); } diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc index 08d13817cace5..295ca7d6926ff 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.cc @@ -42,7 +42,6 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi user_compute_stream = reinterpret_cast(address); return Status::OK(); }) - .AddAssignmentToReference(nv::provider_option_names::kHasUserAuxStreamArray, info.has_user_aux_stream_array) .AddValueParser( nv::provider_option_names::kUserAuxStreamArray, [&user_aux_stream_array](const std::string& value_str) -> Status { @@ -68,7 +67,6 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi info.user_compute_stream = user_compute_stream; info.has_user_compute_stream = (user_compute_stream != nullptr); info.user_aux_stream_array = user_aux_stream_array; - info.has_user_aux_stream_array = (user_aux_stream_array != nullptr); info.onnx_bytestream = onnx_bytestream; info.external_data_bytestream = external_data_bytestream; @@ -111,7 +109,6 @@ ProviderOptions NvExecutionProviderInfo::ToProviderOptions(const NvExecutionProv {nv::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)}, {nv::provider_option_names::kHasUserComputeStream, MakeStringWithClassicLocale(info.has_user_compute_stream)}, {nv::provider_option_names::kUserComputeStream, MakeStringWithClassicLocale(reinterpret_cast(info.user_compute_stream))}, - {nv::provider_option_names::kHasUserAuxStreamArray, MakeStringWithClassicLocale(info.has_user_aux_stream_array)}, {nv::provider_option_names::kUserAuxStreamArray, MakeStringWithClassicLocale(reinterpret_cast(info.user_aux_stream_array))}, {nv::provider_option_names::kMaxWorkspaceSize, MakeStringWithClassicLocale(info.max_workspace_size)}, {nv::provider_option_names::kMaxSharedMemSize, MakeStringWithClassicLocale(info.max_shared_mem_size)}, diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h index 9caf54a4cb757..26f392ad446a3 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_info.h @@ -21,7 +21,6 @@ struct NvExecutionProviderInfo { int device_id{0}; bool has_user_compute_stream{false}; void* user_compute_stream{nullptr}; - bool has_user_aux_stream_array{false}; void* user_aux_stream_array{nullptr}; int max_partition_iterations{1000}; int min_subgraph_size{1};