Skip to content

Commit

Permalink
Add tensorrt backend.
Browse files Browse the repository at this point in the history
Fix merge

Fix merge and clean up logs

Add BiasAdd, Concat, padding ceil mode, and clean up code

Fix formatting and remove unused headers

uncomment models

Fix bug with variable input, clean up

Don't split batch norm

Move TRT execution to TrtExecutor

Clean up

Clean up

Add paritioning

Implement graph_runtime execution for Relay/TRT

Fix bug in extern op

Fix compilation

Add EnableTrt pass to perform same modification as previous wholegraphannotator

Renable NNVM TRT

Remove SimplifyBatchnorm, add rules for converting ops

Fix format, remove unused tests

Enable multiple outputs

Fix multiple outputs

Fix activation lookup

Fix no newline at eof

Add license header. Add consistency test to models

Add method to check TRT used. Improve comments

Fix lint

Add util to check TRT version

Add if guards around TRT5.1 APIs

Add env var for workspace size, fix logger

fix build

Add TRT versioning to EnableTrt pass

Fix build error in DLR

Fix compile for DLR

Update dmlc-core, fix copyright header, undo change to includes

Remove unused headers

Fix IsTrtCompatible visitor and move op list to constructor

Add dropout to compatible ops for CheckTrtCompatible only. Add not compatible test

Add squeeze, transpose, reshape, pad, and reduce ops. Add transpose on weights workaround

Fix formatting. Add unit tests

Support transpose on weights for conv2d and dense. Support asymmetric padding. Temp fix for 1D inputs. Add units tests for all ops.

Support StridedSlice, AdaptivePooling approximation, Pytorch addmm fixer pass

Support (2,3,0,1) tranpose on weights

Allow stride to be incomplete. Support ConstantNode -> kWeight

Fix pass serialized graph by value in runtime. Allow inclusive count for strided pool

Comments, disable failign test

Fix CI lint

Removed unused variables from TrtBuilder. Add more comments

Fix build for TRT4

Add GetTrtVersion(), Move convert map to function, remove uneeded include,  make batch_size_, logger_ TrtBuilder members, check output existence

Use shared_ptr for converters. Add check for num outputs and inputs

Support image.resize

Make GetOpConverters return a shared_ptr

Clarify count inclusive padding weirdness

Use external codegen/runtime

Move to src/runtime/contrib/tensorrt. Add Save and Load methods for tensorrt module. Rename some classes

Require format to be tensorrt so that loader knows how to load

FoldConstants

Destroy engine and context after use. Store TRT weights from op converters. Formatting

Always apply ConvertLayout to NCHW

Clean up

Add ASF header

Change ObjectRef -> NodeRef

Fix lint

Fix pylint

Fix bug with scalar weights

Making TRT cmake more informative

Make tensorrt tests dependent on whether trt codegen is enabled

Add serialization test.
  • Loading branch information
Trevor Morris committed Jan 10, 2020
1 parent 303a471 commit aa98061
Show file tree
Hide file tree
Showing 16 changed files with 3,129 additions and 3 deletions.
3 changes: 2 additions & 1 deletion cmake/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ set(USE_ROCBLAS OFF)
set(USE_SORT ON)

# Whether use TensorRT
# /path/to/tensorrt that contains include and lib dirs
# /path/to/tensorrt that contains include and lib dirs or ON to find
# automatically
set(USE_TENSORRT OFF)

# Whether use MKL-DNN (DNNL) codegen
Expand Down
19 changes: 17 additions & 2 deletions cmake/modules/contrib/TensorRT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,32 @@
if(USE_TENSORRT)
if(IS_DIRECTORY ${USE_TENSORRT})
set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
message(STATUS "Custom TensorRT path: " ${TENSORRT_ROOT_DIR})
endif()
find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES include)
find_library(TENSORRT_LIB_DIR nvinfer HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib)
find_package_handle_standard_args(TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIB_DIR)
if(NOT TENSORRT_FOUND)
message(ERROR "Could not find TensorRT.")
endif()
file(GLOB TENSORRT_SRCS src/contrib/subgraph/*.cc)
message(STATUS "TENSORRT_LIB_DIR: " ${TENSORRT_LIB_DIR})
include_directories(${TENSORRT_INCLUDE_DIR})
list(APPEND RUNTIME_SRCS ${TENSORRT_SRCS})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR})

# NNVM TRT sources
file(GLOB TENSORRT_NNVM_SRCS src/contrib/subgraph/*.cc)
list(APPEND RUNTIME_SRCS ${TENSORRT_NNVM_SRCS})

# Relay TRT sources
file(GLOB TENSORRT_RELAY_CONTRIB_SRC src/relay/backend/contrib/tensorrt/*.cc)
list(APPEND COMPILER_SRCS ${TENSORRT_RELAY_CONTRIB_SRC})
# Relay TRT runtime sources
file(GLOB TENSORRT_RELAY_CONTRIB_SRC src/runtime/contrib/tensorrt/*.cc)
list(APPEND RUNTIME_SRCS ${TENSORRT_RELAY_CONTRIB_SRC})

# Set defines
set_source_files_properties(${RUNTIME_GRAPH_SRCS}
PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT")
set_source_files_properties(${COMPILER_SRCS}
PROPERTIES COMPILE_DEFINITIONS "TVM_COMPILER_TENSORRT")
endif()
10 changes: 10 additions & 0 deletions include/tvm/relay/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,16 @@ TVM_DLL Pass EtaExpand(bool expand_constructor, bool expand_global_var);
*/
TVM_DLL Pass PrintIR(bool show_meta_data = true);

/*!
* \brief Converts the entire relay program into one that can be executed using
* TensorRT. If any of the operators are not supported by the TensorRT
* conversion, the unmodified program will be returned instead.
*
* \return The pass.
*/
TVM_DLL Pass EnableTrt(int trt_ver_major = 6, int trt_ver_minor = 0,
int trt_ver_patch = 1);

} // namespace transform

/*!
Expand Down
96 changes: 96 additions & 0 deletions python/tvm/relay/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1032,3 +1032,99 @@ def visit_var(self, var):
else:
return var
return ChangeBatchMutator().visit(func)

def GetTrtVersion():
"""Gets the version of TensorRT that TVM is built against.
Returns
-------
ret: Tuple[int]
TensorRT version as a tuple of major, minor, and patch number. If TVM
is not built with TensorRT, an empty tuple is returned instead.
"""
return tuple(map(int, _transform.GetTrtVersion()))

def EnableTrt(mod, params=None, trt_version=None):
"""Converts the entire relay program into one that can be executed using
TensorRT. If any of the operators are not supported by the TensorRT
conversion, the unmodified program will be returned instead.
Parameters
----------
mod: Module
The original module.
params : dict of str to NDArray
Input parameters to the graph that do not change
during inference time. Used for constant folding.
trt_version : Optional[Tuple[int]]
Which version of TensorRT to target for partitioning as a tuple of
(major, minor, patch). If not specified, will attempt to get using
GetTrtVersion.
Returns
-------
ret: tvm.relay.Pass
The registered pass that partitions the Relay program.
"""
def _bind_params(func, params):
"""Bind the params to the expression.
"""
name_dict = {}
for arg in func.params:
name = arg.name_hint
if name in name_dict:
name_dict[name] = None
else:
name_dict[name] = arg
bind_dict = {}
for k, v in params.items():
if k not in name_dict:
continue
arg = name_dict[k]
if arg is None:
raise ValueError("Multiple args in the function have name %s" % k)
bind_dict[arg] = relay.expr.const(v)
return relay.expr.bind(func, bind_dict)

def legalize_layout_transform(attrs, inputs, types_):
data = inputs[0]
src_layout = attrs['src_layout']
dst_layout = attrs['dst_layout']
if src_layout == "NCHW" and dst_layout == "NHWC":
return relay.transpose(data, axes=[0, 2, 3, 1])
elif src_layout == "NHWC" and dst_layout == "NCHW":
return relay.transpose(data, axes=[0, 3, 1, 2])
elif src_layout == "HWIO" and dst_layout == "OIHW":
return relay.transpose(data, axes=[3, 2, 0, 1])
elif src_layout == "HWOI" and dst_layout == "OIHW":
return relay.transpose(data, axes=[2, 3, 0, 1])
# may be uneeded
elif src_layout == "HWIO" and dst_layout == "IOHW":
return relay.transpose(data, axes=[2, 3, 0, 1])
return None

if not trt_version:
trt_version = GetTrtVersion()
# If TVM wasn't built against TRT, default to TRT 6.
if not trt_version:
trt_version = (6, 0, 1)
if not isinstance(trt_version, (list, tuple)):
raise TypeError("trt_version is expected to be the type of " +
"list/tuple.")
if len(trt_version) != 3:
raise TypeError("trt_version is expected to contain 3 elements.")

# Apply Layout transform
mod = relay.transform.RemoveUnusedFunctions()(mod)
mod = relay.transform.InferType()(mod)
mod = relay.transform.ConvertLayout('NCHW')(mod)
from tvm.relay.testing.temp_op_attr import TempOpAttr
with TempOpAttr("layout_transform", "FTVMLegalize", legalize_layout_transform):
mod = relay.transform.Legalize()(mod)

if params:
# Bind params so that we can use FoldConstant.
mod['main'] = _bind_params(mod['main'], params)
return _transform.EnableTrt(*trt_version)(mod)
77 changes: 77 additions & 0 deletions src/relay/backend/contrib/tensorrt/codegen.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file src/relay/backend/contrib/dnnl/codegen.cc
* \brief Implementation of DNNL codegen APIs.
*/

#include <tvm/node/serialization.h>
#include <tvm/relay/attrs/nn.h>
#include <tvm/relay/expr_functor.h>
#include <tvm/relay/transform.h>
#include <tvm/relay/type.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/registry.h>

#include <fstream>
#include <sstream>

#include "../../../../runtime/contrib/tensorrt/tensorrt_module.h"
#include "../codegen_c/codegen_c.h"

namespace tvm {
namespace relay {
namespace contrib {

class TrtModuleCodegen : public CSourceModuleCodegenBase {
public:
runtime::Module CreateCSourceModule(const NodeRef& ref) override {
std::string serialized_subgraph;
if (ref->IsInstance<FunctionNode>()) {
serialized_subgraph = SaveJSON(Downcast<Function>(ref)->body);
} else if (ref->IsInstance<relay::ModuleNode>()) {
relay::Module mod = Downcast<relay::Module>(ref);
// TODO(trevmorr): support multiple functions. It is currently not
// possible for there to be more than one TRT func, so not a problem yet.
for (const auto& it : mod->functions) {
serialized_subgraph = SaveJSON(Downcast<Function>(it.second)->body);
}
} else {
LOG(FATAL) << "The input ref is expected to be a Relay function or module"
<< "\n";
}
return runtime::TensorRTModuleCreate(serialized_subgraph);
}
};

/*!
* \brief The external compiler/codegen tool. It takes a Relay expression/module
* and compiles it into a runtime module.
*/
runtime::Module TrtCompiler(const NodeRef& ref) {
TrtModuleCodegen tensorrt;
return tensorrt.CreateCSourceModule(ref);
}

TVM_REGISTER_API("relay.ext.tensorrt").set_body_typed(TrtCompiler);

} // namespace contrib
} // namespace relay
} // namespace tvm
Loading

0 comments on commit aa98061

Please sign in to comment.