feat(//core/ir): Implementing new internal input spec type

This commit implements the new input spec type trtorch::core::ir::Input, which incapsulates InputRange and adds the new dtype and tensor format arguments. It also changes DataType op_precision in the engine settings to std::set<nvinfer1::DataType> enabled_precisions, allowing the compiler to set more than a single precision without resorting to catch all rules such as FP32 and Int8 without FP16. Signed-off-by: Naren Dasan <naren@narendasan.com> Signed-off-by: Naren Dasan <narens@nvidia.com>
pytorch · Jul 21, 2021 · 316df28 · 316df28
1 parent 8e67e38
commit 316df28
Show file tree

Hide file tree

Showing 22 changed files with 333 additions and 188 deletions.
diff --git a/core/compiler.cpp b/core/compiler.cpp
@@ -194,7 +194,7 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
       LOG_INFO(*g << "(LoweringGraph)\n");
 
       // segment the graph and convert segmented TensorRT block
-      auto segmented_blocks = partitioning::Partition(g, convert_cfg.input_ranges, cfg.partition_info);
+      auto segmented_blocks = partitioning::Partition(g, convert_cfg.inputs, cfg.partition_info);
       if (segmented_blocks.size() == 1 && segmented_blocks[0].target() == partitioning::SegmentedBlock::kTorch) {
         LOG_WARNING("Didn't generate any TensorRT engines, the compiler did nothing\n");
         return mod;
@@ -208,16 +208,16 @@ torch::jit::script::Module CompileGraphWithFallback(const torch::jit::script::Mo
       for (auto& seg_block : segmented_blocks) {
         std::string cur_block_target =
             seg_block.target() == partitioning::SegmentedBlock::kTensorRT ? "TensorRT" : "Torch";
-        LOG_INFO(*seg_block.g() << "(MiniGraphIn" << cur_block_target << "Block)\n");
+        LOG_INFO(*seg_block.g() << "(Sub Graph" << cur_block_target << "Block)\n");
         std::ostringstream trt_engine_id;
         trt_engine_id << reinterpret_cast<const int*>(&seg_block);
         if (seg_block.target() == partitioning::SegmentedBlock::kTensorRT) {
-          std::vector<ir::InputRange> input_ranges;
+          std::vector<ir::Input> inputs;
           for (auto& shape : seg_block.in_shape()) {
-            input_ranges.push_back(ir::InputRange(shape));
+            inputs.push_back(ir::Input(shape));
           }
           // update the input ranges for each segments
-          convert_cfg.input_ranges = input_ranges;
+          convert_cfg.inputs = inputs;
           auto engine = conversion::ConvertBlockToEngine(seg_block.block(), convert_cfg, named_params);
           auto temp_g = std::make_shared<torch::jit::Graph>();
           AddEngineToGraph(new_mod, temp_g, engine, trt_engine_id.str(), true);

diff --git a/core/compiler.h b/core/compiler.h
@@ -11,7 +11,7 @@ namespace trtorch {
 namespace core {
 
 struct CompileSpec {
-  CompileSpec(std::vector<ir::InputRange> input_ranges) : convert_info(std::move(input_ranges)) {}
+  CompileSpec(std::vector<ir::Input> inputs) : convert_info(std::move(inputs)) {}
   conversion::ConversionInfo convert_info;
   partitioning::PartitionInfo partition_info;
 };

diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp
@@ -128,7 +128,7 @@ void AddLayer(ConversionCtx* ctx, const torch::jit::Node* n) {
 void AddInputs(
     ConversionCtx* ctx,
     at::ArrayRef<const torch::jit::Value*> inputs,
-    std::vector<ir::InputRange>& input_dims) {
+    std::vector<ir::Input>& input_specs) {
   std::vector<const torch::jit::Value*> input_tensors;
   for (auto in : inputs) {
     // Disregarding inputs that are not tensors
@@ -142,36 +142,33 @@ void AddInputs(
     }
   }
 
+  std::stringstream ss;
+  ss << "Input Dimension Specs: [\n";
+  for (auto i : input_specs) {
+    ss << "    " << i << ",";
+  }
+  ss << ']';
+  LOG_DEBUG(ss.str());
+
   TRTORCH_CHECK(
-      input_tensors.size() == input_dims.size(),
+      input_tensors.size() == input_specs.size(),
       "Expected dimension specifications for all input tensors"
-          << ", but found " << input_tensors.size() << " input tensors and " << input_dims.size()
+          << ", but found " << input_tensors.size() << " input tensors and " << input_specs.size()
           << " dimension specs (conversion.AddInputs)");
 
   auto profile = ctx->builder->createOptimizationProfile();
 
-  TRTORCH_CHECK(
-      ctx->input_dtypes.size() == 0 || ctx->input_dtypes.size() == input_tensors.size(),
-      "Number of input_dtypes : " << ctx->input_dtypes.size()
-                                  << " should either be 0 or equal to number of input_tensors which is "
-                                  << input_tensors.size() << " (conversion.AddInputs)");
-
-  // If the input_dtypes is not provided, assume all the input tensors to be in float32
-  if (ctx->input_dtypes.size() == 0) {
-    LOG_DEBUG("Input datatypes are not provided explicitly. Default float32 datatype is being used for all inputs");
-    ctx->input_dtypes = std::vector<nvinfer1::DataType>{input_tensors.size(), nvinfer1::DataType::kFLOAT};
-  }
-
   for (size_t i = 0; i < input_tensors.size(); i++) {
     auto in = input_tensors[i];
-    auto dims = input_dims[i];
+    auto dims = input_specs[i];
     std::string name = std::string("input_") + std::to_string(ctx->num_inputs);
     LOG_INFO(
         ctx->logger,
-        "Adding Input " << in->debugName() << " named : " << name << ", shape: " << dims.input_shape
-                        << ", dtype : " << ctx->input_dtypes[i] << " in engine (conversion.AddInputs)");
-    auto trt_in = ctx->net->addInput(name.c_str(), ctx->input_dtypes[i], dims.input_shape);
+        "Adding Input " << in->debugName() << " (named: " << name << "): " << dims << " in engine (conversion.AddInputs)");
+
+    auto trt_in = ctx->net->addInput(name.c_str(), dims.dtype, dims.input_shape);
     TRTORCH_CHECK(trt_in, "Failed to add input node: " << in->debugName() << " (conversion.AddInputs)");
+    trt_in->setAllowedFormats(1U << static_cast<int>(dims.format));
 
     profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kMIN, dims.min);
     profile->setDimensions(trt_in->getName(), nvinfer1::OptProfileSelector::kOPT, dims.opt);
@@ -191,7 +188,7 @@ void AddInputs(
 
   ctx->cfg->addOptimizationProfile(profile);
 #if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
-  if (ctx->op_precision == nvinfer1::DataType::kINT8) {
+  if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
     ctx->cfg->setCalibrationProfile(profile);
   }
 #endif
@@ -363,7 +360,7 @@ void ConvertBlockToNetDef(
 
   auto inputs = b->inputs();
   AddParamsToCtxValueMap(ctx, static_params);
-  AddInputs(ctx, inputs, build_info.input_ranges);
+  AddInputs(ctx, inputs, build_info.inputs);
 
   auto nodes = b->nodes();
 

diff --git a/core/conversion/conversion.h b/core/conversion/conversion.h
@@ -12,10 +12,10 @@ namespace core {
 namespace conversion {
 
 struct ConversionInfo {
-  std::vector<ir::InputRange> input_ranges;
+  std::vector<ir::Input> inputs;
   BuilderSettings engine_settings;
-  ConversionInfo(std::vector<ir::InputRange> input_ranges)
-      : input_ranges(std::move(input_ranges)), engine_settings(BuilderSettings()) {}
+  ConversionInfo(std::vector<ir::Input> inputs)
+      : inputs(std::move(inputs)), engine_settings(BuilderSettings()) {}
 };
 
 // TODO: REMOVE GRAPH AND PARAMS AND MOVE FULLY TO INLINED CONSTANTS

diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -10,8 +10,11 @@ namespace conversion {
 // clang-format off
 std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
     os << "Settings requested for TensorRT engine:"                                        \
-       << "\n    Operating Precision: " << s.op_precision                                  \
-       << "\n    TF32 Floating Point Computation Enabled: " << !s.disable_tf32             \
+       << "\n    Enabled Precisions: ";
+       for (auto p = s.enabled_precisions.begin(); p != s.enabled_precisions.end(); ++p) {
+        os << *p << ' ';
+       }
+    os << "\n    TF32 Floating Point Computation Enabled: " << !s.disable_tf32             \
        << "\n    Truncate Long and Double: " << s.truncate_long_and_double                 \
        << "\n    Make Refittable Engine: " << s.refit                                      \
        << "\n    Debuggable Engine: " << s.debug                                           \
@@ -57,30 +60,29 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
   LOG_DEBUG(build_settings);
   cfg = builder->createBuilderConfig();
 
-  switch (settings.op_precision) {
-    case nvinfer1::DataType::kHALF:
-      TRTORCH_CHECK(builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16");
-      cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
-      break;
-    case nvinfer1::DataType::kINT8:
-      TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
-      cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
-      if (!settings.strict_types) {
+  for(auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) {
+    switch (*p) {
+      case nvinfer1::DataType::kHALF:
+        TRTORCH_CHECK(builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16");
         cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
-      }
-      TRTORCH_CHECK(
-          settings.calibrator != nullptr,
-          "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec struct with your calibrator");
-      cfg->setInt8Calibrator(settings.calibrator);
-      break;
-    case nvinfer1::DataType::kFLOAT:
-    case nvinfer1::DataType::kINT32:
-    case nvinfer1::DataType::kBOOL:
-    default:
-      break;
+        break;
+      case nvinfer1::DataType::kINT8:
+        TRTORCH_CHECK(builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
+        cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
+        TRTORCH_CHECK(
+            settings.calibrator != nullptr,
+            "Requested inference in INT8 but no calibrator provided, set the ptq_calibrator field in the CompileSpec struct with your calibrator");
+        cfg->setInt8Calibrator(settings.calibrator);
+        break;
+      case nvinfer1::DataType::kFLOAT:
+      case nvinfer1::DataType::kINT32:
+      case nvinfer1::DataType::kBOOL:
+      default:
+        break;
+    }
   }
 
-  op_precision = settings.op_precision;
+  enabled_precisions = settings.enabled_precisions;
   input_dtypes = settings.input_dtypes;
 
   if (settings.disable_tf32) {
@@ -119,7 +121,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
         static_cast<int>(settings.device.dla_core) < nbDLACores,
         "Configured DLA Core ID: " << settings.device.dla_core
                                    << " not available. Total number of available DLA Cores: " << nbDLACores);
-    TRTORCH_CHECK(settings.op_precision != nvinfer1::DataType::kFLOAT, "DLA supports only fp16 or int8 precision");
+    TRTORCH_CHECK(settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(), "DLA supports only fp16 or int8 precision");
     cfg->setDLACore(settings.device.dla_core);
   }
 }

diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h
@@ -3,6 +3,7 @@
 #include <map>
 #include <memory>
 #include <unordered_map>
+#include <set>
 
 #include "NvInfer.h"
 #include "torch/csrc/jit/ir/ir.h"
@@ -23,7 +24,7 @@ struct Device {
 };
 
 struct BuilderSettings {
-  nvinfer1::DataType op_precision = nvinfer1::DataType::kFLOAT;
+  std::set<nvinfer1::DataType> enabled_precisions = {nvinfer1::DataType::kFLOAT};
   std::vector<nvinfer1::DataType> input_dtypes;
   bool disable_tf32 = false;
   bool refit = false;
@@ -59,7 +60,7 @@ struct ConversionCtx {
   nvinfer1::INetworkDefinition* net;
   nvinfer1::IBuilderConfig* cfg;
   std::vector<nvinfer1::DataType> input_dtypes;
-  nvinfer1::DataType op_precision;
+  std::set<nvinfer1::DataType> enabled_precisions;
   BuilderSettings settings;
   util::logging::TRTorchLogger logger;
   // Pointers to data that needs to remain alive until conversion is done

diff --git a/core/conversion/converters/impl/activation.cpp b/core/conversion/converters/impl/activation.cpp
@@ -177,7 +177,8 @@ auto acthardtanh TRTORCH_UNUSED =
                     std::string pluginName = "CustomGeluPluginDynamic";
                     nvinfer1::PluginFieldCollection fc;
                     std::vector<nvinfer1::PluginField> f;
-                    int type_id = ctx->settings.op_precision == nvinfer1::DataType::kFLOAT
+                    //REVIEW is this right?
+                    int type_id = ctx->settings.enabled_precisions.find(nvinfer1::DataType::kHALF) == ctx->settings.enabled_precisions.end()
                         ? 0
                         : 1; // Integer encoding the DataType (0: FP32, 1: FP16)
                     f.emplace_back(nvinfer1::PluginField("type_id", &type_id, nvinfer1::PluginFieldType::kINT32, 1));

diff --git a/core/ir/BUILD b/core/ir/BUILD
@@ -13,7 +13,7 @@ cc_library(
         "ir.h"
     ],
     srcs = [
-        "InputRange.cpp",
+        "Input.cpp"
     ],
     deps = [
         "@tensorrt//:nvinfer",