Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BYOC][TensorRT] Fix dynamic batching when use_implicit_batch=False #8461

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions src/runtime/contrib/tensorrt/tensorrt_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,19 @@ TensorRTEngineAndContext TensorRTBuilder::BuildEngine() {
auto profile = builder_->createOptimizationProfile();
for (int i = 0; i < network_->getNbInputs(); ++i) {
auto name = network_->getInput(i)->getName();
auto dims = network_->getInput(i)->getDimensions();
profile->setDimensions(name, nvinfer1::OptProfileSelector::kMIN, dims);
const uint32_t entry_id = entry_id_map_[name];
std::vector<int64_t> shape(data_entry_[entry_id]->shape,
data_entry_[entry_id]->shape + data_entry_[entry_id]->ndim);
auto dims = VectorToTrtDims(shape);

profile->setDimensions(name, nvinfer1::OptProfileSelector::kOPT, dims);
profile->setDimensions(name, nvinfer1::OptProfileSelector::kMAX, dims);
// Set minimum batch size to 1 when dynamic batching is used.
if (network_->getInput(i)->getDimensions().nbDims >= 1 &&
network_->getInput(i)->getDimensions().d[0] == -1) {
dims.d[0] = 1;
}
profile->setDimensions(name, nvinfer1::OptProfileSelector::kMIN, dims);
}
config_->addOptimizationProfile(profile);
}
Expand Down
8 changes: 7 additions & 1 deletion src/runtime/contrib/tensorrt/tensorrt_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ class TensorRTRuntime : public JSONRuntimeBase {
const std::string name = nodes_[nid].GetOpName() + "_" + std::to_string(j);
int binding_index = engine->getBindingIndex(name.c_str());
ICHECK_NE(binding_index, -1);
if (!use_implicit_batch_) {
std::vector<int64_t> shape(data_entry_[eid]->shape,
data_entry_[eid]->shape + data_entry_[eid]->ndim);
auto dims = VectorToTrtDims(shape);
ICHECK(context->setBindingDimensions(binding_index, dims));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

setBindingDimensions is not available in JP 4.2 TRT 5.0.6

error: ‘class nvinfer1::IExecutionContext’ has no member named ‘setBindingDimensions’

}
if (data_entry_[eid]->device.device_type == kDLCUDA) {
bindings[binding_index] = data_entry_[eid]->data;
} else {
Expand Down Expand Up @@ -300,7 +306,7 @@ class TensorRTRuntime : public JSONRuntimeBase {
helper.DeclareField("inputs", &engine_and_context.inputs);
helper.DeclareField("outputs", &engine_and_context.outputs);
helper.ReadAllFields(&reader);
const int batch_size = 1;
const int batch_size = GetBatchSize();
trt_engine_cache_[std::make_pair(symbol_name_, batch_size)] = engine_and_context;
return true;
}
Expand Down
54 changes: 28 additions & 26 deletions tests/python/contrib/test_tensorrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,33 +1251,35 @@ def test_tensorrt_dynamic_batch_conv():
x_data = np.ones([max(batches_to_test)] + list(x_shape)[1:]).astype("float32")
k_shape = (16, 32, 3, 3)
params = {"kernel": np.random.uniform(-1, 1, k_shape).astype("float32")}
result_arr = [{"cuda": {}, "llvm": {}} for _ in range(len(batches_to_test))]
for use_trt in [True, False]:
x = relay.var("x", shape=x_shape, dtype="float32")
kernel = relay.var("kernel", shape=k_shape, dtype="float32")
out = relay.nn.conv2d(x, kernel, channels=16, kernel_size=(3, 3), groups=1)
f = relay.Function([x, kernel], out)
mod = tvm.IRModule()
mod["main"] = f
if use_trt:
mod, _ = tensorrt.partition_for_tensorrt(mod, params)

for use_implicit_batch in [True, False]:
result_arr = [{"cuda": {}, "llvm": {}} for _ in range(len(batches_to_test))]
for use_trt in [True, False]:
x = relay.var("x", shape=x_shape, dtype="float32")
kernel = relay.var("kernel", shape=k_shape, dtype="float32")
out = relay.nn.conv2d(x, kernel, channels=16, kernel_size=(3, 3), groups=1)
f = relay.Function([x, kernel], out)
mod = tvm.IRModule()
mod["main"] = f
if use_trt:
mod, config = tensorrt.partition_for_tensorrt(
mod, params, use_implicit_batch=use_implicit_batch
)
if not skip_runtime_test():
for target in ["llvm", "cuda"]:
with tvm.transform.PassContext(
opt_level=3, config={"relay.ext.tensorrt.options": config}
):
relay_exec = relay.create_executor(
"vm", mod=mod, device=tvm.device(target), target=target
)
for i, batch_size in enumerate(batches_to_test):
result_arr[i][target][use_trt] = relay_exec.evaluate()(
x_data[:batch_size, ...], **params
)
if not skip_runtime_test():
for target in ["llvm", "cuda"]:
with relay.build_config(opt_level=3):
relay_exec = relay.create_executor(
"vm", mod=mod, device=tvm.cpu(0), target="llvm"
)

for i, batch_size in enumerate(batches_to_test):
result_arr[i][target][use_trt] = relay_exec.evaluate()(
x_data[:batch_size, ...], **params
)

if not skip_runtime_test():
for i in range(len(batches_to_test)):
for target in ["llvm", "cuda"]:
assert_result_dict_holds(result_arr[i][target])
for i in range(len(batches_to_test)):
for target in ["llvm", "cuda"]:
assert_result_dict_holds(result_arr[i][target])


def test_maskrcnn_resnet50() -> None:
Expand Down