Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Relay/TRT Integration (whole graph only) #54

Merged
merged 22 commits into from
Jan 24, 2020
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions cmake/modules/contrib/TensorRT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,50 @@
# specific language governing permissions and limitations
# under the License.

# TensorRT Module

# TensorRT Runtime
if(USE_TENSORRT)
# Enable codegen as well
SET(USE_TENSORRT_CODEGEN ON)
if(IS_DIRECTORY ${USE_TENSORRT})
set(TENSORRT_ROOT_DIR ${USE_TENSORRT})
message(STATUS "Custom TensorRT path: " ${TENSORRT_ROOT_DIR})
endif()
find_path(TENSORRT_INCLUDE_DIR NvInfer.h HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES include)
find_library(TENSORRT_LIB_DIR nvinfer HINTS ${TENSORRT_ROOT_DIR} PATH_SUFFIXES lib)
find_package_handle_standard_args(TENSORRT DEFAULT_MSG TENSORRT_INCLUDE_DIR TENSORRT_LIB_DIR)
if(NOT TENSORRT_FOUND)
message(ERROR "Could not find TensorRT.")
endif()
file(GLOB TENSORRT_SRCS src/contrib/subgraph/*.cc)
message(STATUS "TENSORRT_LIB_DIR: " ${TENSORRT_LIB_DIR})
include_directories(${TENSORRT_INCLUDE_DIR})
list(APPEND RUNTIME_SRCS ${TENSORRT_SRCS})
list(APPEND TVM_RUNTIME_LINKER_LIBS ${TENSORRT_LIB_DIR})

# NNVM TRT runtime sources
file(GLOB TENSORRT_NNVM_SRCS src/contrib/subgraph/*.cc)
list(APPEND RUNTIME_SRCS ${TENSORRT_NNVM_SRCS})

# Relay TRT runtime sources
file(GLOB TENSORRT_RELAY_CONTRIB_SRC src/runtime/contrib/tensorrt/*.cc)
list(APPEND RUNTIME_SRCS ${TENSORRT_RELAY_CONTRIB_SRC})
list(APPEND RUNTIME_SRCS src/relay/backend/contrib/tensorrt/common_utils.cc)

# Set defines
set_source_files_properties(${RUNTIME_GRAPH_SRCS}
PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT")
endif()
# TensorRT Codegen only. This can be enabled independently of USE_TENSORRT to
# enable compilation of TensorRT modules without requiring TensorRT to be
# installed. The compiled modules will only be able to be executed using a TVM
# built with USE_TENSORRT=ON.
if(USE_TENSORRT_CODEGEN)
message(STATUS "Build with TensorRT codegen")
# Relay TRT codegen sources
file(GLOB TENSORRT_RELAY_CONTRIB_SRC src/relay/backend/contrib/tensorrt/*.cc)
list(APPEND COMPILER_SRCS ${TENSORRT_RELAY_CONTRIB_SRC})
list(APPEND COMPILER_SRCS src/runtime/contrib/tensorrt/tensorrt_module.cc)
# If runtime is enabled also, set flag for compiler srcs
if(USE_TENSORRT)
set_source_files_properties(${COMPILER_SRCS}
PROPERTIES COMPILE_DEFINITIONS "TVM_GRAPH_RUNTIME_TENSORRT")
endif()
endif()
194 changes: 194 additions & 0 deletions python/tvm/relay/tensorrt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name,arguments-differ,no-else-return,unused-argument,missing-docstring
"""
Relay TensorRT codegen.
"""
import tvm
from tvm import relay
from tvm.relay.expr import Call, Constant

from . import _transform
from .expr_functor import ExprMutator

def _bind_params(func, params):
"""
Bind the params to the expression as constants.
"""
name_dict = {}
for arg in func.params:
name = arg.name_hint
if name in name_dict:
name_dict[name] = None
else:
name_dict[name] = arg
bind_dict = {}
for k, v in params.items():
if k not in name_dict:
continue
arg = name_dict[k]
if arg is None:
raise ValueError("Multiple args in the function have name %s" % k)
bind_dict[arg] = relay.expr.const(v)
return relay.expr.bind(func, bind_dict)

class LegalizeLayoutTranform(ExprMutator):
"""
Legalize Relay layout transforms to transpose ops to simplify TensorRT conversion.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we still need to use Legalize and relay.transpose? Can we use layout transform to convert source Relay graph to what TensorRT expects?

Copy link
Author

@trevor-m trevor-m Jan 24, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its better to leverage relay's pass ability to convert layout_transform op to the more standard transpose ops. This way we only need to write one TrtOpConverter for transpose. If we didn't perform this legalize, I would need to write an additional TrtOpConverter for layout_transform which would be nearly identical to the one for transpose.

This feature of relay is very useful. For example, TRT recently announced that they won't support INT8 for matmul/fully connected layer and they want everyone to just use 1x1 Conv instead (https://docs.nvidia.com/deeplearning/sdk/tensorrt-best-practices/index.html#optimize-layer). So in the future, I plan to have a similar pass to convert all matmul/dense layers into convolutions to take advantage of this. At that point I won't need a converter for dense anymore since everything would go to conv.

"""
def visit_call(self, expr):
visit = super().visit_call(expr)
if expr.op == tvm.relay.op.get("layout_transform"):
src_layout = expr.attrs['src_layout']
dst_layout = expr.attrs['dst_layout']
if src_layout == "NCHW" and dst_layout == "NHWC":
return relay.transpose(visit, axes=[0, 2, 3, 1])
elif src_layout == "NHWC" and dst_layout == "NCHW":
return relay.transpose(visit, axes=[0, 3, 1, 2])
elif src_layout == "HWIO" and dst_layout == "OIHW":
return relay.transpose(visit, axes=[3, 2, 0, 1])
elif src_layout == "HWOI" and dst_layout == "OIHW":
return relay.transpose(visit, axes=[2, 3, 0, 1])
# may be uneeded
elif src_layout == "HWIO" and dst_layout == "IOHW":
return relay.transpose(visit, axes=[2, 3, 0, 1])
return visit

class RemoveDropout(ExprMutator):
"""
Removes all nn.dropout from an expr.
"""
def visit_tuple_getitem(self, expr):
visit = super().visit_tuple_getitem(expr)
if visit.index != 0:
return visit
elif isinstance(visit.tuple_value, Call) and visit.tuple_value.op.name == "nn.dropout":
return visit.tuple_value.args[0]
return visit

class RemoveMultiplyByOne(ExprMutator):
"""
Removes multiply by 1.0f. This pass when followed by
RemoveRedundantTranspose is intended to remove a pattern of
Transpose([1, 0]) -> Scale(1.0f) -> Transpose([1, 0]) produced by
PyTorch's addmm operator.
"""
def visit_call(self, expr):
if expr.op.name == "multiply":
if isinstance(expr.args[1], Constant):
data = expr.args[1].data.asnumpy()
if data.shape == () and data.item() == 1.0:
return expr.args[0]
return super().visit_call(expr)

class RemoveRedundantTranspose(ExprMutator):
"""
Removes Transpose([1, 0]) followed by Transpose([1, 0]). This pass, when
preceded by with RemoveMultiplyByOne is intended to remove a pattern of
Transpose([1, 0]) -> Scale(1.0f) -> Transpose([1, 0]) produced by
PyTorch's addmm operator.
"""
def check_axes(self, axes):
return len(axes) == 2 and int(axes[0].value) == 1 and int(axes[1].value) == 0

def visit_call(self, expr):
if expr.op.name == "transpose":
if self.check_axes(expr.attrs['axes']):
if isinstance(expr.args[0], Call) and expr.args[0].op.name == "transpose":
if self.check_axes(expr.args[0].attrs['axes']):
return expr.args[0].args[0]
return super().visit_call(expr)

def PreprocessForTrt(mod):
"""Applies passes to prepare main function for TensorRT conversion.
Parameters
----------
mod: Module
The original module.
Returns
-------
mod: Module
The module modified for TensorRT.
"""
mod['main'] = LegalizeLayoutTranform().visit(mod['main'])
mod['main'] = RemoveDropout().visit(mod['main'])
mod['main'] = RemoveMultiplyByOne().visit(mod['main'])
mod['main'] = RemoveRedundantTranspose().visit(mod['main'])
return mod

def GetTrtVersion():
"""Gets the version of TensorRT that TVM is built against.
Returns
-------
ret: Tuple[int]
TensorRT version as a tuple of major, minor, and patch number. If TVM
is not built with TensorRT, an empty tuple is returned instead.
"""
return tuple(map(int, _transform.GetTrtVersion()))

def IsTrtRuntimeAvailable():
if not tvm.get_global_func("relay._transform.GetTrtVersion", True):
return False
return GetTrtVersion() != ()

def EnableTrt(mod, params=None, trt_version=None):
"""Converts the "main" function in the module into one that can be executed using
TensorRT. If any of the operators are not supported by the TensorRT
conversion, the unmodified program will be returned instead.
Parameters
----------
mod: Module
The original module.
params : dict of str to NDArray
Input parameters to the graph that do not change
during inference time. Used for constant folding.
trt_version : Optional[Tuple[int]]
Which version of TensorRT to target for partitioning as a tuple of
(major, minor, patch). If not specified, will attempt to get using
GetTrtVersion.
Returns
-------
mod: Module
The modified module which will use the TensorRT runtime if compatible.
"""
if not trt_version:
trt_version = GetTrtVersion()
# If TVM wasn't built against TRT, default to target TRT 6. Since the
# actual conversion to TRT is done at runtime, building against TRT is
# not required for compilation.
if not trt_version:
trt_version = (6, 0, 1)
assert isinstance(trt_version, (list, tuple))
assert len(trt_version) == 3

# Apply passes required for TRT
mod = relay.transform.RemoveUnusedFunctions()(mod)
mod = relay.transform.InferType()(mod)
mod = relay.transform.ConvertLayout('NCHW')(mod)
mod = PreprocessForTrt(mod)
if params:
# Bind params so that we can use FoldConstant.
mod['main'] = _bind_params(mod['main'], params)
mod = relay.transform.FoldConstant()(mod)
return _transform.EnableTrt(*trt_version)(mod)
86 changes: 86 additions & 0 deletions src/relay/backend/contrib/tensorrt/codegen_tensorrt.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file src/relay/backend/contrib/tensorrt/codegen_tensorrt.cc
* \brief Implementation of TensorRT codegen APIs.
*/

#include <tvm/node/serialization.h>
#include <tvm/relay/attrs/nn.h>
#include <tvm/relay/expr_functor.h>
#include <tvm/relay/transform.h>
#include <tvm/relay/type.h>
#include <tvm/runtime/module.h>
#include <tvm/runtime/registry.h>

#include <fstream>
#include <sstream>

#include "../codegen_c/codegen_c.h"

namespace tvm {
namespace relay {
namespace contrib {

/*!
* \brief Generates a TensorRTModule from a relay expression. This "compilation"
* does not require TensorRT since the actual conversion using TensorRT APIs is
* deferred until runtime. This step simply serializes the relay program into a
* string.
*/
class TensorRTModuleCodegen : public CSourceModuleCodegenBase {
public:
runtime::Module CreateCSourceModule(const NodeRef& ref) override {
std::string serialized_subgraph;
if (ref->IsInstance<FunctionNode>()) {
serialized_subgraph = SaveJSON(Downcast<Function>(ref)->body);
} else if (ref->IsInstance<relay::ModuleNode>()) {
relay::Module mod = Downcast<relay::Module>(ref);
// TODO(trevmorr): support multiple functions. It is currently not
// possible for there to be more than one TRT func, so not a problem yet.
for (const auto& it : mod->functions) {
serialized_subgraph = SaveJSON(Downcast<Function>(it.second)->body);
}
} else {
LOG(FATAL)
<< "The input ref is expected to be a Relay function or module.";
}
const PackedFunc* pf =
runtime::Registry::Get("tvm.contrib.tensorrt.create");
CHECK(pf != nullptr)
<< "tvm.contrib.tensorrt.create was not found in the registry.";
return (*pf)(serialized_subgraph);
}
};

/*!
* \brief The external compiler/codegen tool. It takes a Relay expression/module
* and compiles it into a runtime module.
*/
runtime::Module TrtCompiler(const NodeRef& ref) {
TensorRTModuleCodegen tensorrt;
return tensorrt.CreateCSourceModule(ref);
}

TVM_REGISTER_API("relay.ext.tensorrt").set_body_typed(TrtCompiler);

} // namespace contrib
} // namespace relay
} // namespace tvm
46 changes: 46 additions & 0 deletions src/relay/backend/contrib/tensorrt/common_utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file src/relay/backend/contrib/tensorrt/common_utils.cc
* \brief Utility functions used by compilation and runtime.
*/

#include "common_utils.h"

namespace tvm {
namespace relay {
namespace contrib {

std::vector<int> GetShape(const Type& type) {
zhiics marked this conversation as resolved.
Show resolved Hide resolved
const auto* ttype = type.as<TensorTypeNode>();
CHECK(ttype);
std::vector<int> _shape;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can set vector size to be ttype->shape.size()

_shape.reserve(ttype->shape.size());
for (size_t i = 0; i < ttype->shape.size(); ++i) {
auto* val = ttype->shape[i].as<IntImm>();
CHECK(val);
_shape.push_back(val->value);
}
return _shape;
}

} // namespace contrib
} // namespace relay
} // namespace tvm
Loading