diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index f30e2c560b57ff..9d4e7a086b14a7 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2954,6 +2954,7 @@ USE_TRT_CONVERTER(cumsum) USE_TRT_CONVERTER(assign) USE_TRT_CONVERTER(unbind) USE_TRT_CONVERTER(flip) +USE_TRT_CONVERTER(share_data) #if IS_TRT_VERSION_GE(8522) USE_TRT_CONVERTER(flash_multihead_matmul) USE_TRT_CONVERTER(cross_multihead_matmul) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 2471c365e29ed9..206b2f5a6a2fdb 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -111,7 +111,8 @@ list( assign_op.cc flip_op.cc quantize_linear_op.cc - dequantize_linear_op.cc) + dequantize_linear_op.cc + share_data_op.cc) if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) list(APPEND CONVERT_FILES emb_eltwise_layernorm.cc diff --git a/paddle/fluid/inference/tensorrt/convert/share_data_op.cc b/paddle/fluid/inference/tensorrt/convert/share_data_op.cc new file mode 100644 index 00000000000000..644eeda8d102f1 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/share_data_op.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +class ShareDataOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { + VLOG(3) << "convert a share_data op to tensorrt"; + framework::OpDesc op_desc(op, nullptr); + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input); + auto output_name = op_desc.Output("Out")[0]; + RreplenishLayerAndOutput(layer, "share_data", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(share_data, ShareDataOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index b44c58379ca732..ca3e40a987223c 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2918,7 +2918,8 @@ struct SimpleOpTypeSetTeller : public Teller { "assign", "flip", "quantize_linear", - "dequantize_linear"}; + "dequantize_linear", + "share_data"}; std::unordered_set teller_set{ "matrix_multiply", @@ -3086,7 +3087,8 @@ struct SimpleOpTypeSetTeller : public Teller { "assign", "flip", "quantize_linear", - "dequantize_linear"}; + "dequantize_linear", + "share_data"}; }; struct GenericPluginTeller : public Teller { diff --git a/test/ir/inference/test_trt_convert_share_data.py b/test/ir/inference/test_trt_convert_share_data.py new file mode 100644 index 00000000000000..168ef72b6e590b --- /dev/null +++ b/test/ir/inference/test_trt_convert_share_data.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import partial +from typing import List + +import numpy as np +from program_config import ProgramConfig, TensorConfig +from trt_layer_auto_scan_test import TrtLayerAutoScanTest + +import paddle.inference as paddle_infer + + +class TrtConvertShareDataTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + compile_version = paddle_infer.get_trt_compile_version() + runtime_version = paddle_infer.get_trt_runtime_version() + if ( + compile_version[0] * 1000 + + compile_version[1] * 100 + + compile_version[2] * 10 + < 8400 + ): + return False + if ( + runtime_version[0] * 1000 + + runtime_version[1] * 100 + + runtime_version[2] * 10 + < 8400 + ): + return False + return True + + def sample_program_configs(self): + def generate_input(type): + if self.dims == 1: + return np.ones([1]).astype(type) + else: + return np.ones([1, 3, 64, 64]).astype(type) + + for dims in [1, 4]: + self.dims = dims + for dtype in [ + np.int32, + np.float32, + np.int64, + ]: + self.has_bool_dtype = dtype == np.bool_ + ops_config = [ + { + "op_type": "share_data", + "op_inputs": {"X": ["input_data"]}, + "op_outputs": {"Out": ["output_data0"]}, + "op_attrs": {}, + }, + { + "op_type": "share_data", + "op_inputs": {"X": ["output_data0"]}, + "op_outputs": {"Out": ["output_data1"]}, + "op_attrs": {}, + }, + ] + + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input, dtype) + ) + }, + outputs=["output_data1"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 1: + self.dynamic_shape.min_input_shape = {"input_data": [1]} + self.dynamic_shape.max_input_shape = {"input_data": [1]} + self.dynamic_shape.opt_input_shape = {"input_data": [1]} + else: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 3, 64, 64] + } + self.dynamic_shape.max_input_shape = { + "input_data": [1, 3, 64, 64] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1, 3, 64, 64] + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if not dynamic_shape and self.dims == 1: + return 0, 4 + return 1, 2 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + program_config.set_input_type(np.float32) + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + program_config.set_input_type(np.float16) + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-2 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + program_config.set_input_type(np.float32) + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + program_config.set_input_type(np.float16) + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-2 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main()