diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 81c68a65576ca..c62e568349126 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1173,6 +1173,7 @@ USE_TRT_CONVERTER(conv2d_transpose); USE_TRT_CONVERTER(leaky_relu); USE_TRT_CONVERTER(shuffle_channel); USE_TRT_CONVERTER(swish); +USE_TRT_CONVERTER(group_norm); USE_TRT_CONVERTER(instance_norm); USE_TRT_CONVERTER(layer_norm); USE_TRT_CONVERTER(gelu); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 26d6b9c9015c2..f9586ca1701f7 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,7 +1,7 @@ # Add TRT tests nv_library(tensorrt_converter SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc - batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc + batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc group_norm_op.cc pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc index 5ecf192338877..0e107e3c8af93 100644 --- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc @@ -34,7 +34,7 @@ class ConcatOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(3) << "convert a fluid mul op to tensorrt mul layer without bias"; + VLOG(3) << "convert a paddle concat op to tensorrt concat layer"; framework::OpDesc op_desc(op, nullptr); // Declare inputs @@ -43,11 +43,6 @@ class ConcatOpConverter : public OpConverter { itensors.push_back(engine_->GetITensor(input_name)); } int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); - PADDLE_ENFORCE_GT(axis, 0, platform::errors::InvalidArgument( - "The axis attr of Concat" - " op should be larger than 0 for trt. " - "But received %d.", - axis)); auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), itensors.size()); diff --git a/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc new file mode 100644 index 0000000000000..7ce9d3be77dc5 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc @@ -0,0 +1,122 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +class GroupNormOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + VLOG(3) << "convert a fluid group_norm op"; + + framework::OpDesc op_desc(op, nullptr); + + auto* input_itensor = engine_->GetITensor(op_desc.Input("X").front()); + + int groups = BOOST_GET_CONST(int, op_desc.GetAttr("groups")); + float epsilon = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon")); + + std::string scale_name = op_desc.Input("Scale").front(); + std::string bias_name = op_desc.Input("Bias").front(); + + // get the presistable var's data + auto get_persistable_data = [&](const std::string& var_name, + framework::DDim* dims) -> float* { + auto* temp_var = scope.FindVar(var_name); + auto* temp_tensor = temp_var->GetMutable(); + (*dims) = temp_tensor->dims(); + + auto* temp_data = engine_->GetWeightCPUData(var_name, temp_tensor, false); + return temp_data; + }; + + framework::DDim scale_dims; + framework::DDim bias_dims; + float* scale_data = get_persistable_data(scale_name, &scale_dims); + float* bias_data = get_persistable_data(bias_name, &bias_dims); + + int64_t scale_numel = framework::product(scale_dims); + int64_t bias_numel = framework::product(bias_dims); + + TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, + static_cast(scale_data), + static_cast(scale_numel)}; + TensorRTEngine::Weight bias_weights{nvinfer1::DataType::kFLOAT, + static_cast(bias_data), + static_cast(bias_numel)}; + + nvinfer1::Dims scale_nv_dims; + nvinfer1::Dims bias_nv_dims; + scale_nv_dims.nbDims = scale_dims.size(); + bias_nv_dims.nbDims = bias_dims.size(); + for (int i = 0; i < scale_dims.size(); i++) { + scale_nv_dims.d[i] = scale_dims.at(i); + } + for (int i = 0; i < bias_dims.size(); i++) { + bias_nv_dims.d[i] = bias_dims.at(i); + } + + auto* scale_layer = TRT_ENGINE_ADD_LAYER(engine_, Constant, scale_nv_dims, + scale_weights.get()); + auto* bias_layer = TRT_ENGINE_ADD_LAYER(engine_, Constant, bias_nv_dims, + bias_weights.get()); + + std::vector plugin_inputs; + plugin_inputs.emplace_back(input_itensor); + plugin_inputs.emplace_back(scale_layer->getOutput(0)); + plugin_inputs.emplace_back(bias_layer->getOutput(0)); + + const std::vector fields{ + {"eps", &epsilon, nvinfer1::PluginFieldType::kFLOAT32, 1}, + {"num_groups", &groups, nvinfer1::PluginFieldType::kINT32, 1}, + }; + + nvinfer1::PluginFieldCollection* plugin_collections = + static_cast( + malloc(sizeof(*plugin_collections) + + fields.size() * sizeof(nvinfer1::PluginField))); + plugin_collections->nbFields = static_cast(fields.size()); + plugin_collections->fields = fields.data(); + + auto creator = + GetPluginRegistry()->getPluginCreator("GroupNormalizationPlugin", "1"); + auto group_norm_plugin = + creator->createPlugin("GroupNormalizationPlugin", plugin_collections); + free(plugin_collections); + + auto group_norm_plugin_layer = engine_->network()->addPluginV2( + plugin_inputs.data(), plugin_inputs.size(), *group_norm_plugin); + + auto output_name = op_desc.Output("Y")[0]; + RreplenishLayerAndOutput(group_norm_plugin_layer, "group_norm", + {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(group_norm, GroupNormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index d0c9d01872ced..6226c40d66ede 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -42,6 +42,9 @@ struct SimpleOpTypeSetTeller : public Teller { teller_set.insert("multihead_matmul"); teller_set.insert("skip_layernorm"); teller_set.insert("slice"); +#endif +#if IS_TRT_VERSION_GE(7130) + teller_set.insert("group_norm"); #endif } @@ -150,6 +153,21 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } } } + if (op_type == "group_norm") { + bool has_attrs = (desc.HasAttr("epsilon") && desc.HasAttr("groups")); + if (has_attrs == false) return false; + + auto registry = GetPluginRegistry(); + if (registry == nullptr) return false; + } + if (op_type == "concat") { + if (!desc.HasAttr("axis")) { + return false; + } else { + int axis = BOOST_GET_CONST(int, desc.GetAttr("axis")); + if (axis <= 0) return false; + } + } if (op_type == "transpose2" || op_type == "transpose") { if (!desc.HasAttr("axis")) { return false; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_group_norm_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_group_norm_op.py new file mode 100644 index 0000000000000..85bd625413c86 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_group_norm_op.py @@ -0,0 +1,78 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TRTGroupNormTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 512, 12, 12], dtype="float32") + relu_out = fluid.layers.relu(data) + relu6_out = fluid.layers.relu6(relu_out) + tanh_out = fluid.layers.tanh(relu6_out) + conv_out = fluid.layers.conv2d( + input=tanh_out, + num_filters=512, + filter_size=3, + groups=1, + padding=[1, 1], + bias_attr=False, + act=None) + out = self.append_group_norm(conv_out) + + self.feeds = { + "data": np.random.random([1, 512, 12, 12]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTGroupNormTest.TensorRTParam( + 1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = TRTGroupNormTest.DynamicShapeParam({ + 'data': [1, 512, 12, 12] + }, {'data': [1, 512, 12, 12]}, {'data': [1, 512, 12, 12]}, False) + self.fetch_list = [out] + + def append_group_norm(self, data): + param_attr = fluid.ParamAttr( + name='group_norm_scale', + initializer=fluid.initializer.Constant(value=1.0)) + bias_attr = fluid.ParamAttr( + name='group_norm_bias', + initializer=fluid.initializer.Constant(value=0.0)) + return fluid.layers.group_norm( + data, + groups=32, + epsilon=0.000009999999747378752, + param_attr=param_attr, + bias_attr=bias_attr) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +if __name__ == "__main__": + unittest.main()