Merge branch 'master' into dev/shahasad/move-systems-numerics-tensors…

…-to-onnxruntime-single-assembly
microsoft · Aug 15, 2019 · 7080bb7 · 7080bb7
2 parents 0505e5a + 8d12ce4
commit 7080bb7
Show file tree

Hide file tree

Showing 20 changed files with 252 additions and 148 deletions.
diff --git a/csharp/sample/Microsoft.ML.OnnxRuntime.InferenceSample/Program.cs b/csharp/sample/Microsoft.ML.OnnxRuntime.InferenceSample/Program.cs
@@ -26,7 +26,7 @@ static void UseApi()
 
  // Optional : Create session options and set the graph optimization level for the session
  SessionOptions options = new SessionOptions();
- options.GraphOptimizationLevel = 2;
+ options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_EXTENDED;
 
  using (var session = new InferenceSession(modelPath, options))
  {

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -161,7 +161,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
  public static extern IntPtr /*(OrtStatus*)*/ OrtSetSessionThreadPoolSize(IntPtr /* OrtSessionOptions* */ options, int sessionThreadPoolSize);
 
  [DllImport(nativeLib, CharSet = charSet)]
- public static extern IntPtr /*(OrtStatus*)*/ OrtSetSessionGraphOptimizationLevel(IntPtr /* OrtSessionOptions* */ options, uint graphOptimizationLevel);
+ public static extern IntPtr /*(OrtStatus*)*/ OrtSetSessionGraphOptimizationLevel(IntPtr /* OrtSessionOptions* */ options, GraphOptimizationLevel graphOptimizationLevel);
 
 
  ///**

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -8,6 +8,17 @@
 
 namespace Microsoft.ML.OnnxRuntime
 {
+ /// <summary>
+ /// TODO Add documentation about which optimizations are enabled for each value.
+ /// </summary>
+ public enum GraphOptimizationLevel
+ {
+ ORT_DISABLE_ALL = 0,
+ ORT_ENABLE_BASIC = 1,
+ ORT_ENABLE_EXTENDED = 2,
+ ORT_ENABLE_ALL = 99
+ }
+
  /// <summary>
  /// Holds the options for creating an InferenceSession
  /// </summary>
@@ -117,7 +128,7 @@ public bool EnableMemoryPattern
  }
  private bool _enableMemoryPattern = true;
 
- 
+
  /// <summary>
  /// Path prefix to use for output of profiling data
  /// </summary>
@@ -158,7 +169,7 @@ public bool EnableProfiling
  /// </summary>
  public string OptimizedModelFilePath
  {
- get 
+ get
  {
  return _optimizedModelFilePath;
  }
@@ -174,7 +185,7 @@ public string OptimizedModelFilePath
  private string _optimizedModelFilePath = "";
 
 
- 
+
  /// <summary>
  /// Enables Arena allocator for the CPU memory allocations. Default is true.
  /// </summary>
@@ -190,7 +201,7 @@ public bool EnableCpuMemArena
  {
  NativeApiStatus.VerifySuccess(NativeMethods.OrtEnableCpuMemArena(_nativePtr));
  _enableCpuMemArena = true;
- } 
+ }
  else if (_enableCpuMemArena && !value)
  {
  NativeApiStatus.VerifySuccess(NativeMethods.OrtDisableCpuMemArena(_nativePtr));
@@ -259,13 +270,9 @@ public int ThreadPoolSize
 
 
  /// <summary>
- /// Sets the graph optimization level for the session. Default is set to 1. 
+ /// Sets the graph optimization level for the session. Default is set to ORT_ENABLE_BASIC. 
  /// </summary>
- /// Available options are : 0, 1, 2
- /// 0 -> Disable all optimizations
- /// 1 -> Enable basic optimizations
- /// 2 -> Enable all optimizations
- public uint GraphOptimizationLevel
+ public GraphOptimizationLevel GraphOptimizationLevel
  {
  get
  {
@@ -277,7 +284,7 @@ public uint GraphOptimizationLevel
  _graphOptimizationLevel = value;
  }
  }
- private uint _graphOptimizationLevel = 1;
+ private GraphOptimizationLevel _graphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;
 
  #endregion
 
@@ -298,16 +305,16 @@ private static bool CheckCudaExecutionProviderDLLs()
  {
  IntPtr handle = LoadLibrary(dll);
  if (handle != IntPtr.Zero)
- continue; 
+ continue;
  var sysdir = new StringBuilder(String.Empty, 2048);
  GetSystemDirectory(sysdir, (uint)sysdir.Capacity);
  throw new OnnxRuntimeException(
- ErrorCode.NoSuchFile, 
+ ErrorCode.NoSuchFile,
  $"kernel32.LoadLibrary():'{dll}' not found. CUDA is required for GPU execution. " +
  $". Verify it is available in the system directory={sysdir}. Else copy it to the output folder."
- ); 
+ );
  }
- } 
+ }
  return true;
  }
 

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp b/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp
@@ -34,11 +34,7 @@ int main(int argc, char* argv[]) {
  OrtSetSessionThreadPoolSize(session_options, 1);
 
  // Sets graph optimization level
- // Available levels are
- // 0 -> To disable all optimizations
- // 1 -> To enable basic optimizations (Such as redundant node removals)
- // 2 -> To enable all optimizations (Includes level 1 + more complex optimizations like node fusions)
- OrtSetSessionGraphOptimizationLevel(session_options, 1);
+ OrtSetSessionGraphOptimizationLevel(session_options, ORT_ENABLE_BASIC);
 
  // Optionally add more execution providers via session_options
  // E.g. for CUDA include cuda_provider_factory.h and uncomment the following line:

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -33,7 +33,7 @@ public void TestSessionOptions()
  Assert.Equal("", opt.LogId);
  Assert.Equal(LogLevel.Verbose, opt.LogVerbosityLevel);
  Assert.Equal(0, opt.ThreadPoolSize);
- Assert.Equal(1u, opt.GraphOptimizationLevel);
+ Assert.Equal(GraphOptimizationLevel.ORT_ENABLE_BASIC, opt.GraphOptimizationLevel);
 
  // try setting options 
  opt.EnableSequentialExecution = false;
@@ -61,12 +61,11 @@ public void TestSessionOptions()
  opt.ThreadPoolSize = 4;
  Assert.Equal(4, opt.ThreadPoolSize);
 
- opt.GraphOptimizationLevel = 3;
- Assert.Equal(3u, opt.GraphOptimizationLevel);
+ opt.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_EXTENDED;
+ Assert.Equal(GraphOptimizationLevel.ORT_ENABLE_EXTENDED, opt.GraphOptimizationLevel);
 
  Assert.Throws<OnnxRuntimeException>(() => { opt.ThreadPoolSize = -2; });
- Assert.Throws<OnnxRuntimeException>(() => { opt.GraphOptimizationLevel = 10; });
-
+ Assert.Throws<OnnxRuntimeException>(() => { opt.GraphOptimizationLevel = (GraphOptimizationLevel)10; });
  }
  }
 
@@ -128,11 +127,11 @@ public void CanCreateAndDisposeSessionWithModelPath()
  }
 
  [Theory]
- [InlineData(0, true)]
- [InlineData(0, false)]
- [InlineData(2, true)]
- [InlineData(2, false)]
- private void CanRunInferenceOnAModel(uint graphOptimizationLevel, bool disableSequentialExecution)
+ [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, true)]
+ [InlineData(GraphOptimizationLevel.ORT_DISABLE_ALL, false)]
+ [InlineData(GraphOptimizationLevel.ORT_ENABLE_EXTENDED, true)]
+ [InlineData(GraphOptimizationLevel.ORT_ENABLE_EXTENDED, false)]
+ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLevel, bool disableSequentialExecution)
  {
  string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
 
@@ -742,7 +741,7 @@ private void TestModelSerialization()
  // Set the optimized model file path to assert that no exception are thrown.
  SessionOptions options = new SessionOptions();
  options.OptimizedModelFilePath = modelOutputPath;
- options.GraphOptimizationLevel = 1;
+ options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;
  var session = new InferenceSession(modelPath, options);
  Assert.NotNull(session);
  Assert.True(File.Exists(modelOutputPath));
@@ -791,7 +790,7 @@ private void VerifyNativeMethodsExist()
  "OrtEnableSequentialExecution","OrtDisableSequentialExecution","OrtEnableProfiling","OrtDisableProfiling",
  "OrtEnableMemPattern","OrtDisableMemPattern","OrtEnableCpuMemArena","OrtDisableCpuMemArena",
  "OrtSetSessionLogId","OrtSetSessionLogVerbosityLevel","OrtSetSessionThreadPoolSize","OrtSetSessionGraphOptimizationLevel",
- "OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU", 
+ "OrtSetOptimizedModelFilePath", "OrtSessionOptionsAppendExecutionProvider_CPU",
  "OrtCreateRunOptions", "OrtReleaseRunOptions", "OrtRunOptionsSetRunLogVerbosityLevel", "OrtRunOptionsSetRunTag",
  "OrtRunOptionsGetRunLogVerbosityLevel", "OrtRunOptionsGetRunTag","OrtRunOptionsEnableTerminate", "OrtRunOptionsDisableTerminate",
  "OrtCreateAllocatorInfo","OrtCreateCpuAllocatorInfo",

diff --git a/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs b/csharp/tools/Microsoft.ML.OnnxRuntime.PerfTool/Program.cs
@@ -33,7 +33,7 @@ class CommandOptions
  public bool ParallelExecution { get; set; } = false;
 
  [Option('o', "optimization_level", Required = false, HelpText = "Optimization Level. Default is 1, partial optimization.")]
- public uint OptimizationLevel { get; set; } = 1;
+ public GraphOptimizationLevel OptimizationLevel { get; set; } = GraphOptimizationLevel.ORT_ENABLE_BASIC;
  }
 
  class Program
@@ -42,7 +42,8 @@ public static void Main(string[] args)
  {
  var cmdOptions = Parser.Default.ParseArguments<CommandOptions>(args);
  cmdOptions.WithParsed(
- options => {
+ options =>
+ {
  Run(options);
  });
  }
@@ -52,7 +53,7 @@ public static void Run(CommandOptions options)
  string inputPath = options.InputFile;
  int iteration = options.IterationCount;
  bool parallelExecution = options.ParallelExecution;
- uint optLevel = options.OptimizationLevel;
+ GraphOptimizationLevel optLevel = options.OptimizationLevel;
  Console.WriteLine("Running model {0} in OnnxRuntime:", modelPath);
  Console.WriteLine("input:{0}", inputPath);
  Console.WriteLine("iteration count:{0}", iteration);
@@ -84,11 +85,11 @@ public static float[] LoadTensorFromFile(string filename)
  return tensorData.ToArray();
  }
 
- static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteration, DateTime[] timestamps, bool parallelExecution, uint optLevel)
+ static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteration, DateTime[] timestamps, bool parallelExecution, GraphOptimizationLevel optLevel)
  {
  if (timestamps.Length != (int)TimingPoint.TotalCount)
  {
- throw new ArgumentException("Timestamps array must have "+(int)TimingPoint.TotalCount+" size");
+ throw new ArgumentException("Timestamps array must have " + (int)TimingPoint.TotalCount + " size");
  }
 
  timestamps[(int)TimingPoint.Start] = DateTime.Now;
@@ -108,12 +109,12 @@ static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteratio
  container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
  }
 
- 
+
 
  timestamps[(int)TimingPoint.InputLoaded] = DateTime.Now;
 
  // Run the inference
- for (int i=0; i < iteration; i++)
+ for (int i = 0; i < iteration; i++)
  {
  var results = session.Run(container); // results is an IReadOnlyList<NamedOnnxValue> container
  Debug.Assert(results != null);
@@ -132,7 +133,7 @@ static void RunModelOnnxRuntime(string modelPath, string inputPath, int iteratio
  static void PrintUsage()
  {
  Console.WriteLine("Usage:\n"
- +"dotnet Microsoft.ML.OnnxRuntime.PerfTool <onnx-model-path> <input-file-path> <iteration-count>"
+ + "dotnet Microsoft.ML.OnnxRuntime.PerfTool <onnx-model-path> <input-file-path> <iteration-count>"
  );
  }
 

diff --git a/docs/execution_providers/TensorRT-ExecutionProvider.md b/docs/execution_providers/TensorRT-ExecutionProvider.md
@@ -22,3 +22,12 @@ When using the python wheel from the ONNX Runtime build with TensorRT execution
 
 ### Using onnxruntime_perf_test
 You can test the performance for your ONNX Model with the TensorRT execution provider. Use the flag `-e tensorrt` in [onnxruntime_perf_test](https://github.com/Microsoft/onnxruntime/tree/master/onnxruntime/test/perftest#onnxruntime-performance-test).
+
+### Configuring Engine Max Batch Size and Workspace Size.
+By default TensorRT execution provider builds an ICudaEngine with max batch size = 1 and max workspace size = 1 GB
+One can override these defaults by setting environment variables ORT_TENSORRT_MAX_BATCH_SIZE and ORT_TENSORRT_MAX_WORKSPACE_SIZE.
+e.g. on Linux
+#### override default batch size to 10
+export ORT_TENSORRT_MAX_BATCH_SIZE=10
+#### override default max workspace size to 2GB
+export ORT_TENSORRT_MAX_WORKSPACE_SIZE=2147483648
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -234,11 +234,15 @@ ORT_API_STATUS(OrtSetSessionLogId, _Inout_ OrtSessionOptions* options, const cha
 ORT_API_STATUS(OrtSetSessionLogVerbosityLevel, _Inout_ OrtSessionOptions* options, int session_log_verbosity_level);
 
 // Set Graph optimization level.
-// Available options are : 0, 1, 2.
-// 0 -> Disable all optimizations
-// 1 -> Enable basic optimizations
-// 2 -> Enable all optimizations
-ORT_API_STATUS(OrtSetSessionGraphOptimizationLevel, _Inout_ OrtSessionOptions* options, int graph_optimization_level);
+// TODO Add documentation about which optimizations are enabled for each value.
+typedef enum GraphOptimizationLevel {
+ ORT_DISABLE_ALL = 0,
+ ORT_ENABLE_BASIC = 1,
+ ORT_ENABLE_EXTENDED = 2,
+ ORT_ENABLE_ALL = 99
+} GraphOptimizationLevel;
+ORT_API_STATUS(OrtSetSessionGraphOptimizationLevel, _Inout_ OrtSessionOptions* options,
+ GraphOptimizationLevel graph_optimization_level);
 
 // How many threads in the session thread pool.
 ORT_API_STATUS(OrtSetSessionThreadPoolSize, _Inout_ OrtSessionOptions* options, int session_thread_pool_size);

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -135,7 +135,7 @@ struct SessionOptions : Base<OrtSessionOptions> {
  SessionOptions Clone() const;
 
  SessionOptions& SetThreadPoolSize(int session_thread_pool_size);
- SessionOptions& SetGraphOptimizationLevel(int graph_optimization_level);
+ SessionOptions& SetGraphOptimizationLevel(GraphOptimizationLevel graph_optimization_level);
 
  SessionOptions& EnableCpuMemArena();
  SessionOptions& DisableCpuMemArena();

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -138,7 +138,7 @@ inline SessionOptions& SessionOptions::SetThreadPoolSize(int session_thread_pool
  return *this;
 }
 
-inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(int graph_optimization_level) {
+inline SessionOptions& SessionOptions::SetGraphOptimizationLevel(GraphOptimizationLevel graph_optimization_level) {
  ORT_THROW_ON_ERROR(OrtSetSessionGraphOptimizationLevel(p_, graph_optimization_level));
  return *this;
 }

diff --git a/onnxruntime/__init__.py b/onnxruntime/__init__.py
@@ -18,4 +18,4 @@
 from onnxruntime.capi import onnxruntime_validation
 onnxruntime_validation.check_distro_info()
 from onnxruntime.capi.session import InferenceSession
-from onnxruntime.capi._pybind_state import RunOptions, SessionOptions, set_default_logger_severity, get_device, NodeArg, ModelMetadata
+from onnxruntime.capi._pybind_state import RunOptions, SessionOptions, set_default_logger_severity, get_device, NodeArg, ModelMetadata, GraphOptimizationLevel
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -25,6 +25,12 @@ using namespace ::onnxruntime::logging;
 
 namespace onnxruntime {
 
+// Per TensorRT documentation, logger needs to be a singleton.
+TensorrtLogger& GetTensorrtLogger() {
+ static TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
+ return trt_logger;
+}
+
 #define CHECK_CUDA(call) \
  do { \
  cudaError_t status = call; \
@@ -197,7 +203,7 @@ SubGraphCollection_t TensorrtExecutionProvider::GetSupportedList(SubGraphCollect
 
  // Get supported node list recursively
  SubGraphCollection_t parser_nodes_list;
- TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
+ TensorrtLogger& trt_logger = GetTensorrtLogger();
  auto trt_builder = unique_pointer<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
  auto trt_network = unique_pointer<nvinfer1::INetworkDefinition>(trt_builder->createNetwork());
  auto trt_parser = unique_pointer<nvonnxparser::IParser>(nvonnxparser::createParser(*trt_network, trt_logger));
@@ -255,7 +261,7 @@ TensorrtExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
 
  // Get supported node list
  SubGraphCollection_t parser_nodes_vector;
- TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
+ TensorrtLogger& trt_logger = GetTensorrtLogger();
  auto trt_builder = unique_pointer<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
  auto trt_network = unique_pointer<nvinfer1::INetworkDefinition>(trt_builder->createNetwork());
  auto trt_parser = unique_pointer<nvonnxparser::IParser>(nvonnxparser::createParser(*trt_network, trt_logger));
@@ -323,7 +329,7 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<onnxruntime:
  model_proto.SerializeToString(&string_buf);
 
  // Create TensorRT engine
- TensorrtLogger trt_logger(nvinfer1::ILogger::Severity::kWARNING);
+ TensorrtLogger& trt_logger = GetTensorrtLogger();
  auto trt_builder = unique_pointer<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(trt_logger));
  auto trt_network = unique_pointer<nvinfer1::INetworkDefinition>(trt_builder->createNetwork());
  auto trt_parser = unique_pointer<nvonnxparser::IParser>(nvonnxparser::createParser(*trt_network, trt_logger));
@@ -490,7 +496,14 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<onnxruntime:
 
  // Run TRT inference
  std::lock_guard<OrtMutex> lock(*(trt_state->tensorrt_mu_ptr));
- trt_state->context->enqueue(batch_size, &buffers[0], nullptr, nullptr);
+ bool ret = trt_state->context->enqueue(batch_size, &buffers[0], nullptr, nullptr);
+ if (!ret) {
+ if (trt_state->context->getEngine().getMaxBatchSize() < batch_size) {
+ return common::Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
+ "TRT enqueue failed: Set ORT_TRT_MAX_BATCH_SIZE environment variable to at least " + to_string(batch_size));
+ }
+ return common::Status(common::ONNXRUNTIME, common::FAIL, "Failed to enqueue to TRT execution context.");
+ }
 
  return Status::OK();
  };