diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 9cb7c8c9e8b10c..a2b4e2f92b040b 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -582,6 +582,10 @@ bool OpDesc::HasOutput(const std::string &name) const { return outputs_.find(name) != outputs_.end(); } +bool OpDesc::HasInput(const std::string &name) const { + return inputs_.find(name) != inputs_.end(); +} + std::vector OpDesc::OutputArgumentNames() const { std::vector retv; for (auto &ipt : this->outputs_) { diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index 6fab6f06cb0b22..d6ce42d5c6519d 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -76,6 +76,8 @@ class OpDesc { bool HasOutput(const std::string &name) const; + bool HasInput(const std::string &name) const; + std::vector OutputArgumentNames() const; void SetOutput(const std::string ¶m_name, diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index ccda587530bfdc..9110d63533ba4a 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2419,6 +2419,9 @@ USE_TRT_CONVERTER(batch_norm); USE_TRT_CONVERTER(concat); USE_TRT_CONVERTER(dropout); USE_TRT_CONVERTER(pad); +#if IS_TRT_VERSION_GE(8200) +USE_TRT_CONVERTER(pad3d); +#endif USE_TRT_CONVERTER(hard_sigmoid); USE_TRT_CONVERTER(hard_swish); USE_TRT_CONVERTER(split); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 487e8c9a78a049..0855cd4eb84ef2 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -16,6 +16,7 @@ list( concat_op.cc dropout_op.cc group_norm_op.cc + pad3d_op.cc pad_op.cc split_op.cc square_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/pad3d_op.cc b/paddle/fluid/inference/tensorrt/convert/pad3d_op.cc new file mode 100644 index 00000000000000..efb4012b00ab05 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/pad3d_op.cc @@ -0,0 +1,183 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * Pad3dOp. + */ +class Pad3dOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { +#if IS_TRT_VERSION_GE(8200) + VLOG(3) << "convert a pad3d op to tensorrt pad3d layer"; + + framework::OpDesc op_desc(op, nullptr); + + // Declare inputs + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + + nvinfer1::ITensor* paddings; + if (op_desc.HasInput("Paddings") && op_desc.Input("Paddings").size() > 0) { + paddings = engine_->GetITensor(op_desc.Input("Paddings")[0]); + } else { + std::vector paddings_v = + PADDLE_GET_CONST(std::vector, op_desc.GetAttr("paddings")); + paddings = Add1DConstantLayer(paddings_v); + } + + float value{0.F}; + if (op_desc.HasAttr("value")) { + value = PADDLE_GET_CONST(float, op_desc.GetAttr("value")); + } + + std::string padding_mode = "constant"; + if (op_desc.HasAttr("mode")) { + padding_mode = PADDLE_GET_CONST(std::string, op_desc.GetAttr("mode")); + } + + const int input_dim = input->getDimensions().nbDims; + const int pad_size = paddings->getDimensions().d[0]; + PADDLE_ENFORCE_EQ(input_dim * 2 - 4, + pad_size, + phi::errors::InvalidArgument( + "Expected paddings size is %d, but received %d.", + input_dim * 2 - 4, + pad_size)); + // convert paddle pad to tensorrt pad + std::vector shuffle_index{4, 2, 0, 5, 3, 1}; + std::vector shuffle_inputs; + for (int i = 0; i < pad_size; i++) { + shuffle_inputs.push_back(GetEleTensorOfShape(paddings, shuffle_index[i])); + } + paddings = Concat(shuffle_inputs); + auto* pre_zeros = Add1DConstantLayer(std::vector(2, 0)); + auto start_slice1 = nvinfer1::Dims{1, { 0 }}; + auto start_slice2 = nvinfer1::Dims{1, { 3 }}; + auto size_slice = nvinfer1::Dims{1, { 3 }}; + auto stride_slice = nvinfer1::Dims{1, { 1 }}; + + auto* pre_pad = + TRT_ENGINE_ADD_LAYER( + engine_, Slice, *paddings, start_slice1, size_slice, stride_slice) + ->getOutput(0); + pre_pad = Concat(std::vector{pre_zeros, pre_pad}); + auto* post_pad = + TRT_ENGINE_ADD_LAYER( + engine_, Slice, *paddings, start_slice2, size_slice, stride_slice) + ->getOutput(0); + post_pad = Concat(std::vector{pre_zeros, post_pad}); + + std::vector zeros_v(input_dim, 0); + auto const zeros = Add1DConstantLayer(zeros_v); + + nvinfer1::ITensor* start{}; + nvinfer1::ITensor* size{}; + // elementwise add zeros and pre_pad + start = TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *zeros, + *pre_pad, + nvinfer1::ElementWiseOperation::kSUB) + ->getOutput(0); + + auto const total_padding = + TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *pre_pad, + *post_pad, + nvinfer1::ElementWiseOperation::kSUM) + ->getOutput(0); + + auto* input_shape = Shape(input); + size = TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *input_shape, + *total_padding, + nvinfer1::ElementWiseOperation::kSUM) + ->getOutput(0); + // add slice layer + nvinfer1::Dims stride; + stride.nbDims = input_dim; + std::fill_n(stride.d, input_dim, 1); + auto const& dummy = stride; + auto* slice_layer = + TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *const_cast(input), + dummy, + dummy, + stride); + slice_layer->setInput(1, *start); + slice_layer->setInput(2, *size); + if (padding_mode == "constant") { +#if IS_TRT_VERSION_GE(8500) + slice_layer->setMode(nvinfer1::SampleMode::kFILL); +#else + slice_layer->setMode(nvinfer1::SliceMode::kFILL); +#endif + if (value != 0.F) { + nvinfer1::ITensor* fill_value = nullptr; + switch (input->getType()) { + case nvinfer1::DataType::kFLOAT: + case nvinfer1::DataType::kHALF: + case nvinfer1::DataType::kINT8: { + fill_value = Add1DConstantLayer(value); + break; + } + default: { + int value_int = static_cast(value); + fill_value = Add1DConstantLayer(value_int); + break; + } + } + slice_layer->setInput(4, *fill_value); + } + } else if (padding_mode == "reflect") { +#if IS_TRT_VERSION_GE(8500) + slice_layer->setMode(nvinfer1::SampleMode::kREFLECT); +#else + slice_layer->setMode(nvinfer1::SliceMode::kREFLECT); +#endif + } else if (padding_mode == "replicate") { +#if IS_TRT_VERSION_GE(8500) + slice_layer->setMode(nvinfer1::SampleMode::kCLAMP); +#else + slice_layer->setMode(nvinfer1::SliceMode::kCLAMP); +#endif + } else { + PADDLE_THROW(paddle::platform::errors::Fatal("Unsupported mode: %s", + padding_mode)); + } + + auto output_name = op_desc.Output("Out")[0]; + RreplenishLayerAndOutput(slice_layer, "pad3d", {output_name}, test_mode); + +#else + VLOG(3) << "pad3d is not supported when TensorRT < 8.2"; +#endif + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(pad3d, Pad3dOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index d2894bfbb2d2f5..d09a77058049d6 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1748,6 +1748,35 @@ struct SimpleOpTypeSetTeller : public Teller { } } + if (op_type == "pad3d") { +#if !IS_TRT_VERSION_GE(8200) + VLOG(3) << "pad3d is not supported when TensorRT < 8.2"; + return false; +#endif + if (!with_dynamic_shape) { + VLOG(3) << "pad3d is not supported static shape"; + return false; + } + if (!desc.HasAttr("paddings") && !desc.HasInput("Paddings")) { + return false; + } + if (desc.HasAttr("mode")) { + std::string mode = PADDLE_GET_CONST(std::string, desc.GetAttr("mode")); + if (mode != "constant" && mode != "reflect" && mode != "replicate") { + VLOG(3) << "The pad3d layer of TRT only support " + "constant/reflect/replicate mode."; + return false; + } + } + if (desc.HasAttr("data_format")) { + std::string data_format = + PADDLE_GET_CONST(std::string, desc.GetAttr("data_format")); + if (data_format != "NCDHW") { + VLOG(3) << "The pad3d layer of TRT only support NCDHW data format."; + return false; + } + } + } if (op_type == "swish") { auto* block = desc.Block(); if (block == nullptr) { @@ -1764,7 +1793,6 @@ struct SimpleOpTypeSetTeller : public Teller { return false; } } - if (op_type == "prelu") { if (desc.Input("X").size() != 1) { VLOG(3) << "Invalid input X's size of prelu TRT converter. " @@ -2694,6 +2722,7 @@ struct SimpleOpTypeSetTeller : public Teller { "batch_norm", "concat", "tanh", + "pad3d", "pad", "elementwise_add", "elementwise_sub", @@ -2849,6 +2878,7 @@ struct SimpleOpTypeSetTeller : public Teller { "batch_norm", "concat", "tanh", + "pad3d", "pad", "elementwise_add", "elementwise_sub", @@ -2974,14 +3004,6 @@ struct GenericPluginTeller : public Teller { if (!desc.HasAttr("iou_aware") && !desc.HasAttr("iou_aware_factor")) return false; } - if (op_type == "pad3d") { - auto pad3d_inputs = desc.Inputs(); - if (pad3d_inputs.find("Paddings") != pad3d_inputs.end()) { - if (desc.Input("Paddings").size() >= 1) { - return false; - } - } - } if (use_no_calib_int8) { return false; } else { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad3d.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad3d.py index 2a5087c03e61c5..df0426b23493fb 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad3d.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_pad3d.py @@ -23,59 +23,195 @@ import paddle.inference as paddle_infer -class TrtConvertPad3d(TrtLayerAutoScanTest): +class TrtConvertPad3dTensorPadding(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: + valid_version = (8, 2, 0) + compile_version = paddle_infer.get_trt_compile_version() + runtime_version = paddle_infer.get_trt_runtime_version() + self.assertTrue(compile_version == runtime_version) + if compile_version < valid_version: + return False return True def sample_program_configs(self): def generate_input1(): - return np.ones([1, 1, 3, 64, 64]).astype(np.float32) + shape = [6, 6, 6, 64, 64] + return np.random.uniform(low=0.1, high=1.0, size=shape).astype( + np.float32 + ) - for value in [True, False]: + def generate_paddings(p): + return np.array(p).astype(np.int32) + + for value in [0, 1.5, 2, 2.5, 3]: for paddings in [ [0, 0, 0, 0, 1, 1], - [0, 0, 1, 2, 3, 4], + [0, 0, 1, 2, 1, 2], [1, 1, 1, 1, 1, 1], [0, 0, -1, -1, 1, 1], ]: - dics = [{"value": value, "paddings": paddings}, {}] - - ops_config = [ - { - "op_type": "pad3d", - "op_inputs": {"X": ["input_data"]}, - "op_outputs": {"Out": ["output_data"]}, - "op_attrs": dics[0], + for pad_mode in ['constant', 'reflect', 'replicate']: + dics = [ + { + "value": value, + "data_format": "NCDHW", + "mode": pad_mode, + "paddings": [], + }, + {}, + ] + ops_config = [ + { + "op_type": "pad3d", + "op_inputs": { + "X": ["input_data"], + "Paddings": ["input_paddings"], + }, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[0], + } + ] + ops = self.generate_op_config(ops_config) + inputs = { + "input_data": TensorConfig( + data_gen=partial(generate_input1) + ) } - ] - ops = self.generate_op_config(ops_config) - for i in range(10): program_config = ProgramConfig( ops=ops, - weights={}, - inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input1) - ), + weights={ + "input_paddings": TensorConfig( + data_gen=partial(generate_paddings, paddings) + ) }, + inputs=inputs, outputs=["output_data"], ) + yield program_config - yield program_config + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [6, 6, 6, 64, 64], + } + self.dynamic_shape.max_input_shape = { + "input_data": [8, 8, 8, 66, 66], + } + self.dynamic_shape.opt_input_shape = { + "input_data": [6, 6, 6, 64, 64], + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if dynamic_shape: + return 1, 2 + return 0, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-3 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-3 + + def test(self): + self.run_test() + + +class TrtConvertPad3dListPadding(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + valid_version = (8, 2, 0) + compile_version = paddle_infer.get_trt_compile_version() + runtime_version = paddle_infer.get_trt_runtime_version() + self.assertTrue(compile_version == runtime_version) + if compile_version < valid_version: + return False + return True + + def sample_program_configs(self): + def generate_input1(): + shape = [6, 6, 6, 64, 64] + return np.random.uniform(low=0.1, high=1.0, size=shape).astype( + np.float32 + ) + + for value in [0, 1.1, 2.3, 3]: + for paddings in [ + [0, 0, 0, 0, 1, 1], + [0, 0, 1, 2, 1, 2], + [1, 1, 1, 1, 1, 1], + [0, 0, -1, -1, 1, 1], + ]: + for pad_mode in ['constant', 'reflect', 'replicate']: + dics = [ + { + "value": value, + "data_format": "NCDHW", + "mode": pad_mode, + "paddings": paddings, + }, + {}, + ] + ops_config = [ + { + "op_type": "pad3d", + "op_inputs": {"X": ["input_data"]}, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[0], + } + ] + ops = self.generate_op_config(ops_config) + inputs = { + "input_data": TensorConfig( + data_gen=partial(generate_input1) + ) + } + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs=inputs, + outputs=["output_data"], + ) + yield program_config def sample_predictor_configs( self, program_config ) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = { - "input_data": [1, 1, 3, 64, 64] + "input_data": [6, 6, 6, 64, 64], } self.dynamic_shape.max_input_shape = { - "input_data": [1, 1, 3, 64, 64] + "input_data": [8, 8, 8, 66, 66], } self.dynamic_shape.opt_input_shape = { - "input_data": [1, 1, 3, 64, 64] + "input_data": [6, 6, 6, 64, 64], } def clear_dynamic_shape(): @@ -83,23 +219,35 @@ def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} self.dynamic_shape.opt_input_shape = {} + def generate_trt_nodes_num(attrs, dynamic_shape): + if dynamic_shape: + return 1, 2 + return 0, 3 + attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - # for static_shape clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (0, 3), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (0, 3), 1e-3 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-3 # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 2), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (1, 2), 1e-3 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-3 def test(self): self.run_test()