Skip to content

Commit

Permalink
Merge branch 'master' into dev/shahasad/move-systems-numerics-tensors…
Browse files Browse the repository at this point in the history
…-to-onnxruntime-single-assembly
  • Loading branch information
shahasad authored Aug 15, 2019
2 parents 0505e5a + 8d12ce4 commit 7080bb7
Show file tree
Hide file tree
Showing 20 changed files with 252 additions and 148 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ static void UseApi()

// Optional : Create session options and set the graph optimization level for the session
SessionOptions options = new SessionOptions();
options.GraphOptimizationLevel = 2;
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_EXTENDED;

using (var session = new InferenceSession(modelPath, options))
{
Expand Down
2 changes: 1 addition & 1 deletion csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
public static extern IntPtr /*(OrtStatus*)*/ OrtSetSessionThreadPoolSize(IntPtr /* OrtSessionOptions* */ options, int sessionThreadPoolSize);

[DllImport(nativeLib, CharSet = charSet)]
public static extern IntPtr /*(OrtStatus*)*/ OrtSetSessionGraphOptimizationLevel(IntPtr /* OrtSessionOptions* */ options, uint graphOptimizationLevel);
public static extern IntPtr /*(OrtStatus*)*/ OrtSetSessionGraphOptimizationLevel(IntPtr /* OrtSessionOptions* */ options, GraphOptimizationLevel graphOptimizationLevel);


///**
Expand Down
37 changes: 22 additions & 15 deletions csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@

namespace Microsoft.ML.OnnxRuntime
{
/// <summary>
/// TODO Add documentation about which optimizations are enabled for each value.
/// </summary>
public enum GraphOptimizationLevel
{
ORT_DISABLE_ALL = 0,
ORT_ENABLE_BASIC = 1,
ORT_ENABLE_EXTENDED = 2,
ORT_ENABLE_ALL = 99
}

/// <summary>
/// Holds the options for creating an InferenceSession
/// </summary>
Expand Down Expand Up @@ -117,7 +128,7 @@ public bool EnableMemoryPattern
}
private bool _enableMemoryPattern = true;


/// <summary>
/// Path prefix to use for output of profiling data
/// </summary>
Expand Down Expand Up @@ -158,7 +169,7 @@ public bool EnableProfiling
/// </summary>
public string OptimizedModelFilePath
{
get
get
{
return _optimizedModelFilePath;
}
Expand All @@ -174,7 +185,7 @@ public string OptimizedModelFilePath
private string _optimizedModelFilePath = "";



/// <summary>
/// Enables Arena allocator for the CPU memory allocations. Default is true.
/// </summary>
Expand All @@ -190,7 +201,7 @@ public bool EnableCpuMemArena
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtEnableCpuMemArena(_nativePtr));
_enableCpuMemArena = true;
}
}
else if (_enableCpuMemArena && !value)
{
NativeApiStatus.VerifySuccess(NativeMethods.OrtDisableCpuMemArena(_nativePtr));
Expand Down Expand Up @@ -259,13 +270,9 @@ public int ThreadPoolSize


/// <summary>
/// Sets the graph optimization level for the session. Default is set to 1.
/// Sets the graph optimization level for the session. Default is set to ORT_ENABLE_BASIC.
/// </summary>
/// Available options are : 0, 1, 2
/// 0 -> Disable all optimizations
/// 1 -> Enable basic optimizations
/// 2 -> Enable all optimizations
public uint GraphOptimizationLevel
public GraphOptimizationLevel GraphOptimizationLevel
{
get
{
Expand All @@ -277,7 +284,7 @@ public uint GraphOptimizationLevel
_graphOptimizationLevel = value;
}
}
private uint _graphOptimizationLevel = 1;
private GraphOptimizationLevel _graphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;

#endregion

Expand All @@ -298,16 +305,16 @@ private static bool CheckCudaExecutionProviderDLLs()
{
IntPtr handle = LoadLibrary(dll);
if (handle != IntPtr.Zero)
continue;
continue;
var sysdir = new StringBuilder(String.Empty, 2048);
GetSystemDirectory(sysdir, (uint)sysdir.Capacity);
throw new OnnxRuntimeException(
ErrorCode.NoSuchFile,
ErrorCode.NoSuchFile,
$"kernel32.LoadLibrary():'{dll}' not found. CUDA is required for GPU execution. " +
$". Verify it is available in the system directory={sysdir}. Else copy it to the output folder."
);
);
}
}
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,7 @@ int main(int argc, char* argv[]) {
OrtSetSessionThreadPoolSize(session_options, 1);

// Sets graph optimization level
// Available levels are
// 0 -> To disable all optimizations
// 1 -> To enable basic optimizations (Such as redundant node removals)
// 2 -> To enable all optimizations (Includes level 1 + more complex optimizations like node fusions)
OrtSetSessionGraphOptimizationLevel(session_options, 1);
OrtSetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC);

// Optionally add more execution providers via session_options
// E.g. for CUDA include cuda_provider_factory.h and uncomment the following line:
Expand Down
23 changes: 11 additions & 12 deletions csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public void TestSessionOptions()
Assert.Equal("", opt.LogId);
Assert.Equal(LogLevel.Verbose, opt.LogVerbosityLevel);
Assert.Equal(0, opt.ThreadPoolSize);
Assert.Equal(1u, opt.GraphOptimizationLevel);
Assert.Equal(GraphOptimizationLevel.ORT_ENABLE_BASIC, opt.GraphOptimizationLevel);

// try setting options
opt.EnableSequentialExecution = false;
Expand Down Expand Up @@ -61,12 +61,11 @@ public void TestSessionOptions()
opt.ThreadPoolSize = 4;
Assert.Equal(4, opt.ThreadPoolSize);

opt.GraphOptimizationLevel = 3;
Assert.Equal(3u, opt.GraphOptimizationLevel);
opt.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_EXTENDED;
Assert.Equal(GraphOptimizationLevel.ORT_ENABLE_EXTENDED, opt.GraphOptimizationLevel);

Assert.Throws<OnnxRuntimeException>(() => { opt.ThreadPoolSize = -2; });
Assert.Throws<OnnxRuntimeException>(() => { opt.GraphOptimizationLevel = 10; });

Assert.Throws<OnnxRuntimeException>(() => { opt.GraphOptimizationLevel = (GraphOptimizationLevel)10; });
}
}

Expand Down Expand Up @@ -128,11 +127,11 @@ public void CanCreateAndDisposeSessionWithModelPath()
}

[Theory]
[InlineData(0, true)]
[InlineData(0, false)]
[InlineData(2, true)]
[InlineData(2, false)]
private void CanRunInferenceOnAModel(uint graphOptimizationLevel, bool disableSequentialExecution)
[InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, true)]
[InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, false)]
[InlineData(GraphOptimizationLevel.ORT_ENABLE_EXTENDED, true)]
[InlineData(GraphOptimizationLevel.ORT_ENABLE_EXTENDED, false)]
private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLevel, bool disableSequentialExecution)
{
string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");

Expand Down Expand Up @@ -742,7 +741,7 @@ private void TestModelSerialization()
// Set the optimized model file path to assert that no exception are thrown.
SessionOptions options = new SessionOptions();
options.OptimizedModelFilePath = modelOutputPath;
options.GraphOptimizationLevel = 1;
options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;
var session = new InferenceSession(modelPath, options);
Assert.NotNull(session);
Assert.True(File.Exists(modelOutputPath));
Expand Down Expand Up @@ -791,7 +790,7 @@ private void VerifyNativeMethodsExist()
"OrtEnableSequentialExecution","OrtDisableSequentialExecution","OrtEnableProfiling","OrtDisableProfiling",
"OrtEnableMemPattern","OrtDisableMemPattern","OrtEnableCpuMemArena","OrtDisableCpuMemArena",
"OrtSetSessionLogId","OrtSetSessionLogVerbosityLevel","OrtSetSessionThreadPoolSize","OrtSetSessionGraphOptimizationLevel",
"OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU",
"OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU",
"OrtCreateRunOptions", "OrtReleaseRunOptions", "OrtRunOptionsSetRunLogVerbosityLevel", "OrtRunOptionsSetRunTag",
"OrtRunOptionsGetRunLogVerbosityLevel", "OrtRunOptionsGetRunTag","OrtRunOptionsEnableTerminate", "OrtRunOptionsDisableTerminate",
"OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo",
Expand Down
17 changes: 9 additions & 8 deletions csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class CommandOptions
public bool ParallelExecution { get; set; } = false;

[Option('o', "optimization_level", Required = false, HelpText = "Optimization Level. Default is 1, partial optimization.")]
public uint OptimizationLevel { get; set; } = 1;
public GraphOptimizationLevel OptimizationLevel { get; set; } = GraphOptimizationLevel.ORT_ENABLE_BASIC;
}

class Program
Expand All @@ -42,7 +42,8 @@ public static void Main(string[] args)
{
var cmdOptions = Parser.Default.ParseArguments<CommandOptions>(args);
cmdOptions.WithParsed(
options => {
options =>
{
Run(options);
});
}
Expand All @@ -52,7 +53,7 @@ public static void Run(CommandOptions options)
string inputPath = options.InputFile;
int iteration = options.IterationCount;
bool parallelExecution = options.ParallelExecution;
uint optLevel = options.OptimizationLevel;
GraphOptimizationLevel optLevel = options.OptimizationLevel;
Console.WriteLine("Running model {0} in OnnxRuntime:", modelPath);
Console.WriteLine("input:{0}", inputPath);
Console.WriteLine("iteration count:{0}", iteration);
Expand Down Expand Up @@ -84,11 +85,11 @@ public static float[] LoadTensorFromFile(string filename)
return tensorData.ToArray();
}

static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteration, DateTime[] timestamps, bool parallelExecution, uint optLevel)
static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteration, DateTime[] timestamps, bool parallelExecution, GraphOptimizationLevel optLevel)
{
if (timestamps.Length != (int)TimingPoint.TotalCount)
{
throw new ArgumentException("Timestamps array must have "+(int)TimingPoint.TotalCount+" size");
throw new ArgumentException("Timestamps array must have " + (int)TimingPoint.TotalCount + " size");
}

timestamps[(int)TimingPoint.Start] = DateTime.Now;
Expand All @@ -108,12 +109,12 @@ static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteratio
container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
}



timestamps[(int)TimingPoint.InputLoaded] = DateTime.Now;

// Run the inference
for (int i=0; i < iteration; i++)
for (int i = 0; i < iteration; i++)
{
var results = session.Run(container); // results is an IReadOnlyList<NamedOnnxValue> container
Debug.Assert(results != null);
Expand All @@ -132,7 +133,7 @@ static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteratio
static void PrintUsage()
{
Console.WriteLine("Usage:\n"
+"dotnet Microsoft.ML.OnnxRuntime.PerfTool <onnx-model-path> <input-file-path> <iteration-count>"
+ "dotnet Microsoft.ML.OnnxRuntime.PerfTool <onnx-model-path> <input-file-path> <iteration-count>"
);
}

Expand Down
9 changes: 9 additions & 0 deletions docs/execution_providers/TensorRT-ExecutionProvider.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,12 @@ When using the python wheel from the ONNX Runtime build with TensorRT execution

### Using onnxruntime_perf_test
You can test the performance for your ONNX Model with the TensorRT execution provider. Use the flag `-e tensorrt` in [onnxruntime_perf_test](https://github.com/Microsoft/onnxruntime/tree/master/onnxruntime/test/perftest#onnxruntime-performance-test).

### Configuring Engine Max Batch Size and Workspace Size.
By default TensorRT execution provider builds an ICudaEngine with max batch size = 1 and max workspace size = 1 GB
One can override these defaults by setting environment variables ORT_TENSORRT_MAX_BATCH_SIZE and ORT_TENSORRT_MAX_WORKSPACE_SIZE.
e.g. on Linux
#### override default batch size to 10
export ORT_TENSORRT_MAX_BATCH_SIZE=10
#### override default max workspace size to 2GB
export ORT_TENSORRT_MAX_WORKSPACE_SIZE=2147483648
14 changes: 9 additions & 5 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,15 @@ ORT_API_STATUS(OrtSetSessionLogId, _Inout_ OrtSessionOptions* options, const cha
ORT_API_STATUS(OrtSetSessionLogVerbosityLevel, _Inout_ OrtSessionOptions* options, int session_log_verbosity_level);

// Set Graph optimization level.
// Available options are : 0, 1, 2.
// 0 -> Disable all optimizations
// 1 -> Enable basic optimizations
// 2 -> Enable all optimizations
ORT_API_STATUS(OrtSetSessionGraphOptimizationLevel, _Inout_ OrtSessionOptions* options, int graph_optimization_level);
// TODO Add documentation about which optimizations are enabled for each value.
typedef enum GraphOptimizationLevel {
ORT_DISABLE_ALL = 0,
ORT_ENABLE_BASIC = 1,
ORT_ENABLE_EXTENDED = 2,
ORT_ENABLE_ALL = 99
} GraphOptimizationLevel;
ORT_API_STATUS(OrtSetSessionGraphOptimizationLevel, _Inout_ OrtSessionOptions* options,
GraphOptimizationLevel graph_optimization_level);

// How many threads in the session thread pool.
ORT_API_STATUS(OrtSetSessionThreadPoolSize, _Inout_ OrtSessionOptions* options, int session_thread_pool_size);
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ struct SessionOptions : Base<OrtSessionOptions> {
SessionOptions Clone() const;

SessionOptions& SetThreadPoolSize(int session_thread_pool_size);
SessionOptions& SetGraphOptimizationLevel(int graph_optimization_level);
SessionOptions& SetGraphOptimizationLevel(GraphOptimizationLevel graph_optimization_level);

SessionOptions& EnableCpuMemArena();
SessionOptions& DisableCpuMemArena();
Expand Down
2 changes: 1 addition & 1 deletion include/onnxruntime/core/session/onnxruntime_cxx_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ inline SessionOptions& SessionOptions::SetThreadPoolSize(int session_thread_pool
return *this;
}

inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(int graph_optimization_level) {
inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(GraphOptimizationLevel graph_optimization_level) {
ORT_THROW_ON_ERROR(OrtSetSessionGraphOptimizationLevel(p_, graph_optimization_level));
return *this;
}
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@
from onnxruntime.capi import onnxruntime_validation
onnxruntime_validation.check_distro_info()
from onnxruntime.capi.session import InferenceSession
from onnxruntime.capi._pybind_state import RunOptions, SessionOptions, set_default_logger_severity, get_device, NodeArg, ModelMetadata
from onnxruntime.capi._pybind_state import RunOptions, SessionOptions, set_default_logger_severity, get_device, NodeArg, ModelMetadata, GraphOptimizationLevel
21 changes: 17 additions & 4 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ using namespace ::onnxruntime::logging;

namespace onnxruntime {

// Per TensorRT documentation, logger needs to be a singleton.
TensorrtLogger& GetTensorrtLogger() {
static TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
return trt_logger;
}

#define CHECK_CUDA(call) \
do { \
cudaError_t status = call; \
Expand Down Expand Up @@ -197,7 +203,7 @@ SubGraphCollection_t TensorrtExecutionProvider::GetSupportedList(SubGraphCollect

// Get supported node list recursively
SubGraphCollection_t parser_nodes_list;
TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
TensorrtLogger& trt_logger = GetTensorrtLogger();
auto trt_builder = unique_pointer<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
auto trt_network = unique_pointer<nvinfer1::INetworkDefinition>(trt_builder->createNetwork());
auto trt_parser = unique_pointer<nvonnxparser::IParser>(nvonnxparser::createParser(*trt_network, trt_logger));
Expand Down Expand Up @@ -255,7 +261,7 @@ TensorrtExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,

// Get supported node list
SubGraphCollection_t parser_nodes_vector;
TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
TensorrtLogger& trt_logger = GetTensorrtLogger();
auto trt_builder = unique_pointer<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
auto trt_network = unique_pointer<nvinfer1::INetworkDefinition>(trt_builder->createNetwork());
auto trt_parser = unique_pointer<nvonnxparser::IParser>(nvonnxparser::createParser(*trt_network, trt_logger));
Expand Down Expand Up @@ -323,7 +329,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<onnxruntime:
model_proto.SerializeToString(&string_buf);

// Create TensorRT engine
TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
TensorrtLogger& trt_logger = GetTensorrtLogger();
auto trt_builder = unique_pointer<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
auto trt_network = unique_pointer<nvinfer1::INetworkDefinition>(trt_builder->createNetwork());
auto trt_parser = unique_pointer<nvonnxparser::IParser>(nvonnxparser::createParser(*trt_network, trt_logger));
Expand Down Expand Up @@ -490,7 +496,14 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<onnxruntime:

// Run TRT inference
std::lock_guard<OrtMutex> lock(*(trt_state->tensorrt_mu_ptr));
trt_state->context->enqueue(batch_size, &buffers[0], nullptr, nullptr);
bool ret = trt_state->context->enqueue(batch_size, &buffers[0], nullptr, nullptr);
if (!ret) {
if (trt_state->context->getEngine().getMaxBatchSize() < batch_size) {
return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
"TRT enqueue failed: Set ORT_TRT_MAX_BATCH_SIZE environment variable to at least " + to_string(batch_size));
}
return common::Status(common::ONNXRUNTIME, common::FAIL, "Failed to enqueue to TRT execution context.");
}

return Status::OK();
};
Expand Down
Loading

0 comments on commit 7080bb7

Please sign in to comment.