Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
* - `kDeviceId`: Specifies the GPU device ID to use.
* - `kHasUserComputeStream`: Indicates whether a user-provided compute stream is used.
* - `kUserComputeStream`: Specifies the user-provided compute stream.
* - `kUserAuxStreamArray`: Specifies the user-provided aux stream.
* - `kMaxWorkspaceSize`: Sets the maximum workspace size for GPU memory allocation.
* - 'kMaxSharedMemSize': Sets the maximum amount of shared memory that TensorRT kernels are allowed to use
* - `kMaxAuxiliaryStreams`: Sets the maximum number of auxiliary streams for TensorRT execution. Also, should correspond to the size of kUserAuxStreamArray, if provided.
* - `kDumpSubgraphs`: Enables or disables dumping of subgraphs for debugging.
* - `kDetailedBuildLog`: Enables or disables detailed build logs for debugging.
* - `kProfilesMinShapes`: Specifies the minimum shapes for profiling.
Expand All @@ -24,8 +26,10 @@ namespace provider_option_names {
constexpr const char* kDeviceId = "device_id";
constexpr const char* kHasUserComputeStream = "has_user_compute_stream";
constexpr const char* kUserComputeStream = "user_compute_stream";
constexpr const char* kUserAuxStreamArray = "user_aux_stream_array";
constexpr const char* kMaxWorkspaceSize = "nv_max_workspace_size";
constexpr const char* kMaxSharedMemSize = "nv_max_shared_mem_size";
constexpr const char* kMaxAuxiliaryStreams = "nv_max_auxiliary_streams";
constexpr const char* kDumpSubgraphs = "nv_dump_subgraphs";
constexpr const char* kDetailedBuildLog = "nv_detailed_build_log";
constexpr const char* kProfilesMinShapes = "nv_profile_min_shapes";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,17 @@ NvExecutionProvider::NvExecutionProvider(const NvExecutionProviderInfo& info)
stream_ = nullptr; // Will be created in compute function
}

if (info.user_aux_stream_array != nullptr) {
if(info.auxiliary_streams <= 0){
ORT_THROW_IF_ERROR(ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Auxiliary streams must be greater than 0 when using external auxiliary streams"));
}
external_aux_streams_ = true;
aux_streams_ = static_cast<cudaStream_t>(info.user_aux_stream_array);
} else {
external_aux_streams_ = false;
aux_streams_ = nullptr;
}

std::string profile_min_shapes, profile_max_shapes, profile_opt_shapes;

// incase the EP context is dumped the engine cache has to be enabled
Expand Down Expand Up @@ -3033,6 +3044,14 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphViewer& gr
return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "NvTensorRTRTX EP select an optimization profile for the current context failed");
}

// Set auxiliary stream if provided by user
if (external_aux_streams_ && aux_streams_ != nullptr) {
if(auxiliary_streams_ <= 0){
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Auxiliary streams must be greater than 0 when using external auxiliary streams");
}
trt_context->setAuxStreams(&aux_streams_, (int32_t)auxiliary_streams_);
}

// Check before using trt_engine
if (trt_engine == nullptr) {
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "No engine is found.");
Expand Down Expand Up @@ -3444,6 +3463,14 @@ Status NvExecutionProvider::CreateNodeComputeInfoFromPrecompiledEngine(const Gra
}
}

// Set auxiliary stream if provided by user
if (external_aux_streams_ && aux_streams_ != nullptr) {
if(auxiliary_streams_ <= 0){
return ORT_MAKE_STATUS(ONNXRUNTIME, EP_FAIL, "Auxiliary streams must be greater than 0 when using external auxiliary streams");
}
trt_context->setAuxStreams(&aux_streams_, (int32_t)auxiliary_streams_);
}

// Start CUDA graph capture with the correct stream
// Note: We need to set the stream and start capture here because this is where we have access to the actual compute stream
// Get the graph annotation ID that was stored during OnRunStart
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ class NvExecutionProvider : public IExecutionProvider {
mutable NvExecutionProviderInfo info_;
bool external_stream_ = false;
cudaStream_t stream_ = nullptr;
bool external_aux_streams_ = false;
cudaStream_t aux_streams_ = nullptr;
int max_partition_iterations_ = 1000;
size_t min_subgraph_size_ = 1;
size_t max_workspace_size_ = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi
const ConfigOptions& session_options) {
NvExecutionProviderInfo info{};
void* user_compute_stream = nullptr;
void* user_aux_stream_array = nullptr;
void* onnx_bytestream = nullptr;
void* external_data_bytestream = nullptr;
ORT_THROW_IF_ERROR(
Expand All @@ -41,8 +42,17 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi
user_compute_stream = reinterpret_cast<void*>(address);
return Status::OK();
})
.AddValueParser(
nv::provider_option_names::kUserAuxStreamArray,
[&user_aux_stream_array](const std::string& value_str) -> Status {
size_t address;
ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
user_aux_stream_array = reinterpret_cast<void*>(address);
return Status::OK();
})
.AddAssignmentToReference(nv::provider_option_names::kMaxWorkspaceSize, info.max_workspace_size)
.AddAssignmentToReference(nv::provider_option_names::kMaxSharedMemSize, info.max_shared_mem_size)
.AddAssignmentToReference(nv::provider_option_names::kMaxAuxiliaryStreams, info.auxiliary_streams)
.AddAssignmentToReference(nv::provider_option_names::kDumpSubgraphs, info.dump_subgraphs)
.AddAssignmentToReference(nv::provider_option_names::kDetailedBuildLog, info.detailed_build_log)
.AddAssignmentToReference(nv::provider_option_names::kProfilesMinShapes, info.profile_min_shapes)
Expand All @@ -56,6 +66,7 @@ NvExecutionProviderInfo NvExecutionProviderInfo::FromProviderOptions(const Provi

info.user_compute_stream = user_compute_stream;
info.has_user_compute_stream = (user_compute_stream != nullptr);
info.user_aux_stream_array = user_aux_stream_array;
info.onnx_bytestream = onnx_bytestream;
info.external_data_bytestream = external_data_bytestream;

Expand Down Expand Up @@ -98,8 +109,10 @@ ProviderOptions NvExecutionProviderInfo::ToProviderOptions(const NvExecutionProv
{nv::provider_option_names::kDeviceId, MakeStringWithClassicLocale(info.device_id)},
{nv::provider_option_names::kHasUserComputeStream, MakeStringWithClassicLocale(info.has_user_compute_stream)},
{nv::provider_option_names::kUserComputeStream, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.user_compute_stream))},
{nv::provider_option_names::kUserAuxStreamArray, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.user_aux_stream_array))},
{nv::provider_option_names::kMaxWorkspaceSize, MakeStringWithClassicLocale(info.max_workspace_size)},
{nv::provider_option_names::kMaxSharedMemSize, MakeStringWithClassicLocale(info.max_shared_mem_size)},
{nv::provider_option_names::kMaxAuxiliaryStreams, MakeStringWithClassicLocale(info.auxiliary_streams)},
{nv::provider_option_names::kDumpSubgraphs, MakeStringWithClassicLocale(info.dump_subgraphs)},
{nv::provider_option_names::kDetailedBuildLog, MakeStringWithClassicLocale(info.detailed_build_log)},
{nv::provider_option_names::kProfilesMinShapes, MakeStringWithClassicLocale(info.profile_min_shapes)},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ struct NvExecutionProviderInfo {
int device_id{0};
bool has_user_compute_stream{false};
void* user_compute_stream{nullptr};
void* user_aux_stream_array{nullptr};
int max_partition_iterations{1000};
int min_subgraph_size{1};
size_t max_workspace_size{0};
Expand Down