neo-ai · trevor-m · Jan 24, 2020 · Oct 8, 2019 · Jan 11, 2020 · Jan 13, 2020
diff --git a/cmake/modules/contrib/TensorRT.cmake b/cmake/modules/contrib/TensorRT.cmake
@@ -15,22 +15,50 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# TensorRT Module
-
+# TensorRT Runtime
 if(USE_TENSORRT)
+    # Enable codegen as well
+    SET(USE_TENSORRT_CODEGEN ON)
     if(IS_DIRECTORY ${USE_TENSORRT})
         set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
+        message(STATUS "Custom TensorRT path: " ${TENSORRT_ROOT_DIR})
     endif()
     find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES include)
     find_library(TENSORRT_LIB_DIR nvinfer HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib)
     find_package_handle_standard_args(TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIB_DIR)
     if(NOT TENSORRT_FOUND)
         message(ERROR "Could not find TensorRT.")
     endif()
-    file(GLOB TENSORRT_SRCS src/contrib/subgraph/*.cc)
+    message(STATUS "TENSORRT_LIB_DIR: " ${TENSORRT_LIB_DIR})
     include_directories(${TENSORRT_INCLUDE_DIR})
-    list(APPEND RUNTIME_SRCS ${TENSORRT_SRCS})
     list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR})
+
+    # NNVM TRT runtime sources
+    file(GLOB TENSORRT_NNVM_SRCS src/contrib/subgraph/*.cc)
+    list(APPEND RUNTIME_SRCS ${TENSORRT_NNVM_SRCS})
+
+    # Relay TRT runtime sources
+    file(GLOB TENSORRT_RELAY_CONTRIB_SRC src/runtime/contrib/tensorrt/*.cc)
+    list(APPEND RUNTIME_SRCS ${TENSORRT_RELAY_CONTRIB_SRC})
+    list(APPEND RUNTIME_SRCS src/relay/backend/contrib/tensorrt/common_utils.cc)
+
+    # Set defines
     set_source_files_properties(${RUNTIME_GRAPH_SRCS}
             PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT")
 endif()
+# TensorRT Codegen only. This can be enabled independently of USE_TENSORRT to
+# enable compilation of TensorRT modules without requiring TensorRT to be
+# installed. The compiled modules will only be able to be executed using a TVM
+# built with USE_TENSORRT=ON.
+if(USE_TENSORRT_CODEGEN)
+    message(STATUS "Build with TensorRT codegen")
+    # Relay TRT codegen sources
+    file(GLOB TENSORRT_RELAY_CONTRIB_SRC src/relay/backend/contrib/tensorrt/*.cc)
+    list(APPEND COMPILER_SRCS ${TENSORRT_RELAY_CONTRIB_SRC})
+    list(APPEND COMPILER_SRCS src/runtime/contrib/tensorrt/tensorrt_module.cc)
+    # If runtime is enabled also, set flag for compiler srcs
+    if(USE_TENSORRT)
+        set_source_files_properties(${COMPILER_SRCS}
+                PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT")
+    endif()
+endif()
diff --git a/python/tvm/relay/tensorrt.py b/python/tvm/relay/tensorrt.py
@@ -0,0 +1,194 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name,arguments-differ,no-else-return,unused-argument,missing-docstring
+"""
+Relay TensorRT codegen.
+"""
+import tvm
+from tvm import relay
+from tvm.relay.expr import Call, Constant
+
+from . import _transform
+from .expr_functor import ExprMutator
+
+def _bind_params(func, params):
+    """
+    Bind the params to the expression as constants.
+    """
+    name_dict = {}
+    for arg in func.params:
+        name = arg.name_hint
+        if name in name_dict:
+            name_dict[name] = None
+        else:
+            name_dict[name] = arg
+    bind_dict = {}
+    for k, v in params.items():
+        if k not in name_dict:
+            continue
+        arg = name_dict[k]
+        if arg is None:
+            raise ValueError("Multiple args in the function have name %s" % k)
+        bind_dict[arg] = relay.expr.const(v)
+    return relay.expr.bind(func, bind_dict)
+
+class LegalizeLayoutTranform(ExprMutator):
+    """
+    Legalize Relay layout transforms to transpose ops to simplify TensorRT conversion.
+    """
+    def visit_call(self, expr):
+        visit = super().visit_call(expr)
+        if expr.op == tvm.relay.op.get("layout_transform"):
+            src_layout = expr.attrs['src_layout']
+            dst_layout = expr.attrs['dst_layout']
+            if src_layout == "NCHW" and dst_layout == "NHWC":
+                return relay.transpose(visit, axes=[0, 2, 3, 1])
+            elif src_layout == "NHWC" and dst_layout == "NCHW":
+                return relay.transpose(visit, axes=[0, 3, 1, 2])
+            elif src_layout == "HWIO" and dst_layout == "OIHW":
+                return relay.transpose(visit, axes=[3, 2, 0, 1])
+            elif src_layout == "HWOI" and dst_layout == "OIHW":
+                return relay.transpose(visit, axes=[2, 3, 0, 1])
+            # may be uneeded
+            elif src_layout == "HWIO" and dst_layout == "IOHW":
+                return relay.transpose(visit, axes=[2, 3, 0, 1])
+        return visit
+
+class RemoveDropout(ExprMutator):
+    """
+    Removes all nn.dropout from an expr.
+    """
+    def visit_tuple_getitem(self, expr):
+        visit = super().visit_tuple_getitem(expr)
+        if visit.index != 0:
+            return visit
+        elif isinstance(visit.tuple_value, Call) and visit.tuple_value.op.name == "nn.dropout":
+            return visit.tuple_value.args[0]
+        return visit
+
+class RemoveMultiplyByOne(ExprMutator):
+    """
+    Removes multiply by 1.0f. This pass when followed by
+    RemoveRedundantTranspose is intended to remove a pattern of
+    Transpose([1, 0]) -> Scale(1.0f) -> Transpose([1, 0]) produced by
+    PyTorch's addmm operator.
+    """
+    def visit_call(self, expr):
+        if expr.op.name == "multiply":
+            if isinstance(expr.args[1], Constant):
+                data = expr.args[1].data.asnumpy()
+                if data.shape == () and data.item() == 1.0:
+                    return expr.args[0]
+        return super().visit_call(expr)
+
+class RemoveRedundantTranspose(ExprMutator):
+    """
+    Removes Transpose([1, 0]) followed by Transpose([1, 0]). This pass, when
+    preceded by with RemoveMultiplyByOne is intended to remove a pattern of
+    Transpose([1, 0]) -> Scale(1.0f) -> Transpose([1, 0]) produced by
+    PyTorch's addmm operator.
+    """
+    def check_axes(self, axes):
+        return len(axes) == 2 and int(axes[0].value) == 1 and int(axes[1].value) == 0
+
+    def visit_call(self, expr):
+        if expr.op.name == "transpose":
+            if self.check_axes(expr.attrs['axes']):
+                if isinstance(expr.args[0], Call) and expr.args[0].op.name == "transpose":
+                    if self.check_axes(expr.args[0].attrs['axes']):
+                        return expr.args[0].args[0]
+        return super().visit_call(expr)
+
+def PreprocessForTrt(mod):
+    """Applies passes to prepare main function for TensorRT conversion.
+
+    Parameters
+    ----------
+    mod: Module
+        The original module.
+
+    Returns
+    -------
+    mod: Module
+        The module modified for TensorRT.
+    """
+    mod['main'] = LegalizeLayoutTranform().visit(mod['main'])
+    mod['main'] = RemoveDropout().visit(mod['main'])
+    mod['main'] = RemoveMultiplyByOne().visit(mod['main'])
+    mod['main'] = RemoveRedundantTranspose().visit(mod['main'])
+    return mod
+
+def GetTrtVersion():
+    """Gets the version of TensorRT that TVM is built against.
+
+    Returns
+    -------
+    ret: Tuple[int]
+        TensorRT version as a tuple of major, minor, and patch number. If TVM
+        is not built with TensorRT, an empty tuple is returned instead.
+    """
+    return tuple(map(int, _transform.GetTrtVersion()))
+
+def IsTrtRuntimeAvailable():
+    if not tvm.get_global_func("relay._transform.GetTrtVersion", True):
+        return False
+    return GetTrtVersion() != ()
+
+def EnableTrt(mod, params=None, trt_version=None):
+    """Converts the "main" function in the module into one that can be executed using
+    TensorRT. If any of the operators are not supported by the TensorRT
+    conversion, the unmodified program will be returned instead.
+
+    Parameters
+    ----------
+    mod: Module
+        The original module.
+
+    params : dict of str to NDArray
+        Input parameters to the graph that do not change
+        during inference time. Used for constant folding.
+
+    trt_version : Optional[Tuple[int]]
+        Which version of TensorRT to target for partitioning as a tuple of
+        (major, minor, patch). If not specified, will attempt to get using
+        GetTrtVersion.
+
+    Returns
+    -------
+    mod: Module
+        The modified module which will use the TensorRT runtime if compatible.
+    """
+    if not trt_version:
+        trt_version = GetTrtVersion()
+        # If TVM wasn't built against TRT, default to target TRT 6. Since the
+        # actual conversion to TRT is done at runtime, building against TRT is
+        # not required for compilation.
+        if not trt_version:
+            trt_version = (6, 0, 1)
+    assert isinstance(trt_version, (list, tuple))
+    assert len(trt_version) == 3
+
+    # Apply passes required for TRT
+    mod = relay.transform.RemoveUnusedFunctions()(mod)
+    mod = relay.transform.InferType()(mod)
+    mod = relay.transform.ConvertLayout('NCHW')(mod)
+    mod = PreprocessForTrt(mod)
+    if params:
+        # Bind params so that we can use FoldConstant.
+        mod['main'] = _bind_params(mod['main'], params)
+    mod = relay.transform.FoldConstant()(mod)
+    return _transform.EnableTrt(*trt_version)(mod)
diff --git a/src/relay/backend/contrib/tensorrt/codegen_tensorrt.cc b/src/relay/backend/contrib/tensorrt/codegen_tensorrt.cc
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/tensorrt/codegen_tensorrt.cc
+ * \brief Implementation of TensorRT codegen APIs.
+ */
+
+#include <tvm/node/serialization.h>
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/relay/type.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/registry.h>
+
+#include <fstream>
+#include <sstream>
+
+#include "../codegen_c/codegen_c.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+
+/*!
+ * \brief Generates a TensorRTModule from a relay expression. This "compilation"
+ * does not require TensorRT since the actual conversion using TensorRT APIs is
+ * deferred until runtime. This step simply serializes the relay program into a
+ * string.
+ */
+class TensorRTModuleCodegen : public CSourceModuleCodegenBase {
+ public:
+  runtime::Module CreateCSourceModule(const NodeRef& ref) override {
+    std::string serialized_subgraph;
+    if (ref->IsInstance<FunctionNode>()) {
+      serialized_subgraph = SaveJSON(Downcast<Function>(ref)->body);
+    } else if (ref->IsInstance<relay::ModuleNode>()) {
+      relay::Module mod = Downcast<relay::Module>(ref);
+      // TODO(trevmorr): support multiple functions. It is currently not
+      // possible for there to be more than one TRT func, so not a problem yet.
+      for (const auto& it : mod->functions) {
+        serialized_subgraph = SaveJSON(Downcast<Function>(it.second)->body);
+      }
+    } else {
+      LOG(FATAL)
+          << "The input ref is expected to be a Relay function or module.";
+    }
+    const PackedFunc* pf =
+        runtime::Registry::Get("tvm.contrib.tensorrt.create");
+    CHECK(pf != nullptr)
+        << "tvm.contrib.tensorrt.create was not found in the registry.";
+    return (*pf)(serialized_subgraph);
+  }
+};
+
+/*!
+ * \brief The external compiler/codegen tool. It takes a Relay expression/module
+ * and compiles it into a runtime module.
+ */
+runtime::Module TrtCompiler(const NodeRef& ref) {
+  TensorRTModuleCodegen tensorrt;
+  return tensorrt.CreateCSourceModule(ref);
+}
+
+TVM_REGISTER_API("relay.ext.tensorrt").set_body_typed(TrtCompiler);
+
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/contrib/tensorrt/common_utils.cc b/src/relay/backend/contrib/tensorrt/common_utils.cc
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/tensorrt/common_utils.cc
+ * \brief Utility functions used by compilation and runtime.
+ */
+
+#include "common_utils.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+
+std::vector<int> GetShape(const Type& type) {
+  const auto* ttype = type.as<TensorTypeNode>();
+  CHECK(ttype);
+  std::vector<int> _shape;
+  _shape.reserve(ttype->shape.size());
+  for (size_t i = 0; i < ttype->shape.size(); ++i) {
+    auto* val = ttype->shape[i].as<IntImm>();
+    CHECK(val);
+    _shape.push_back(val->value);
+  }
+  return _shape;
+}
+
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm