From 9eaf02981832ee771ad821d1eaae3702edccd6cf Mon Sep 17 00:00:00 2001 From: andsonder Date: Tue, 28 Feb 2023 18:19:53 +0800 Subject: [PATCH 01/17] update codes about temporal_shift --- .../fluid/inference/api/analysis_predictor.cc | 1 + .../inference/tensorrt/convert/CMakeLists.txt | 3 +- .../tensorrt/convert/temporal_shift_op.cc | 89 +++++++++++++++++++ paddle/fluid/inference/tensorrt/op_teller.cc | 6 +- 4 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e8888940a99ac9..437808696a8871 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2490,6 +2490,7 @@ USE_TRT_CONVERTER(mish); USE_TRT_CONVERTER(deformable_conv); USE_TRT_CONVERTER(pool3d) USE_TRT_CONVERTER(square); +USE_TRT_CONVERTER(temporal_shift); // unary op USE_TRT_CONVERTER(exp); USE_TRT_CONVERTER(log); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index de91a0493b6946..271563380f3746 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -100,7 +100,8 @@ list( elementwiseadd_transpose_op.cc skip_groupnorm_act_op.cc preln_groupnorm_act_op.cc - expand_v2_op.cc) + expand_v2_op.cc + temporal_shift_op.cc) if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) list(APPEND CONVERT_FILES emb_eltwise_layernorm.cc diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc new file mode 100644 index 00000000000000..184136eccb512c --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -0,0 +1,89 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * TemporalShiftOp. + */ +class TemporalShiftOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { + VLOG(3) << "convert a fluid transpose op to tensorrt tranpose layer"; + + framework::OpDesc op_desc(op, nullptr); + // Declare inputs + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + const float shift_ratio = PADDLE_GET_CONST(float, op_desc.GetAttr("shift_ratio")); + const int T = PADDLE_GET_CONST(int, op_desc.GetAttr("seg_num")); + + const auto& input_dims = input->getDimensions(); + int NT = input_dims.d[0]; + int C = input_dims.d[1]; + int H = input_dims.d[2]; + int W = input_dims.d[3]; + int N = NT / T; + + // Reshape input to [N,T,C,H,W] + auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + reshape_layer->setReshapeDimensions(nvinfer1::Dims5(N, T, C, H, W)); + input = reshape_layer->getOutput(0); + + // Pad input + auto* pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input, nvinfer1::Dims4(0, 0, 1, 1), nvinfer1::Dims4(0, 0, 1, 1)); + input = pad_layer->getOutput(0); + + // Slice input + int slice_size = static_cast(C * shift_ratio); + auto slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, nvinfer1::Dims3(0, 0, 0), nvinfer1::Dims3(N, T, slice_size), nvinfer1::Dims3(1, 1, 1)); + auto slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, nvinfer1::Dims3(0, 2, slice_size), nvinfer1::Dims3(N, T, slice_size), nvinfer1::Dims3(1, 1, 1)); + auto slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, nvinfer1::Dims3(0, 1, slice_size * 2), nvinfer1::Dims3(N, T, C - slice_size * 2), nvinfer1::Dims3(1, 1, 1)); + + // Concatenate slices along the third dimension (C) + auto concat_layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, &slice1_layer->getOutput(0), 3); + concat_layer->setInput(1, slice2_layer->getOutput(0)); + concat_layer->setInput(2, slice3_layer->getOutput(0)); + concat_layer->setAxis(2); + + // Reshape output to [N*T,C,H,W] + auto reshape_layer2 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); + reshape_layer2->setReshapeDimensions(nvinfer1::Dims4(NT, C, H, W)); + + // Set output + auto output_name = op_desc.Output("Out")[0]; + RreplenishLayerAndOutput(reshape_layer2, "temporal_shift", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(temporal_shift, TemporalShiftOp); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 029665bd111315..cf67b97074a475 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2701,7 +2701,8 @@ struct SimpleOpTypeSetTeller : public Teller { "expand_v2", "fuse_eleadd_transpose", "skip_groupnorm_act", - "preln_groupnorm_act"}; + "preln_groupnorm_act", + "temporal_shift"}; std::unordered_set teller_set{ "mul", @@ -2853,7 +2854,8 @@ struct SimpleOpTypeSetTeller : public Teller { "expand_v2", "fuse_eleadd_transpose", "skip_groupnorm_act", - "preln_groupnorm_act"}; + "preln_groupnorm_act", + "temporal_shift"}; }; struct GenericPluginTeller : public Teller { From 62528288509705d61ad9f217e61cd278aef0e5af Mon Sep 17 00:00:00 2001 From: andsonder Date: Tue, 28 Feb 2023 21:33:00 +0800 Subject: [PATCH 02/17] update codes about temporal_shift --- .../tensorrt/convert/temporal_shift_op.cc | 50 ++++++-- .../test_trt_convert_temporal_shift.py | 117 ++++++++++++++++++ 2 files changed, 155 insertions(+), 12 deletions(-) create mode 100755 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index 184136eccb512c..562c7d4d6acca7 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -53,32 +53,58 @@ class TemporalShiftOpConverter : public OpConverter { // Reshape input to [N,T,C,H,W] auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - reshape_layer->setReshapeDimensions(nvinfer1::Dims5(N, T, C, H, W)); + nvinfer1::Dims reshape_dims{5, {N, T, C, H, W}}; + reshape_layer->setReshapeDimensions(reshape_dims); input = reshape_layer->getOutput(0); // Pad input - auto* pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input, nvinfer1::Dims4(0, 0, 1, 1), nvinfer1::Dims4(0, 0, 1, 1)); + auto* pad_layer = TRT_ENGINE_ADD_LAYER(engine_, + PaddingNd, + *input, + nvinfer1::Dims4(0, 1, 0, 0), + nvinfer1::Dims4(0, 1, 0, 0)); input = pad_layer->getOutput(0); // Slice input - int slice_size = static_cast(C * shift_ratio); - auto slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, nvinfer1::Dims3(0, 0, 0), nvinfer1::Dims3(N, T, slice_size), nvinfer1::Dims3(1, 1, 1)); - auto slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, nvinfer1::Dims3(0, 2, slice_size), nvinfer1::Dims3(N, T, slice_size), nvinfer1::Dims3(1, 1, 1)); - auto slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, nvinfer1::Dims3(0, 1, slice_size * 2), nvinfer1::Dims3(N, T, C - slice_size * 2), nvinfer1::Dims3(1, 1, 1)); + int slice_c = int(C * shift_ratio); + int slice_c2 = int(C * shift_ratio * 2); + auto* slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *pad_layer->getOutput(0), + nvinfer1::Dims3{0, 0, 0}, + nvinfer1::Dims3{T, slice_c, H}, + nvinfer1::Dims3{1, 1, 1}); + auto* slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *pad_layer->getOutput(0), + nvinfer1::Dims3{0, 2, 0}, + nvinfer1::Dims3{T, slice_c2 - slice_c, H}, + nvinfer1::Dims3{1, 1, 1}); + auto* slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *pad_layer->getOutput(0), + nvinfer1::Dims3{0, 1, 0}, + nvinfer1::Dims3{T, C - slice_c2, H}, + nvinfer1::Dims3{1, 1, 1}); // Concatenate slices along the third dimension (C) - auto concat_layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, &slice1_layer->getOutput(0), 3); - concat_layer->setInput(1, slice2_layer->getOutput(0)); - concat_layer->setInput(2, slice3_layer->getOutput(0)); + nvinfer1::ITensor* concat_inputs[3] = {slice1_layer->getOutput(0), + slice2_layer->getOutput(0), + slice3_layer->getOutput(0)}; + auto* concat_layer = TRT_ENGINE_ADD_LAYER(engine_, + Concatenation, + concat_inputs, 3); concat_layer->setAxis(2); // Reshape output to [N*T,C,H,W] - auto reshape_layer2 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); - reshape_layer2->setReshapeDimensions(nvinfer1::Dims4(NT, C, H, W)); + nvinfer1::Dims output_shape{4, {N * T, C, H, W}}; + auto* reshape_layer2 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); + reshape_layer2->setReshapeDimensions(output_shape); // Set output auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(reshape_layer2, "temporal_shift", {output_name}, test_mode); + } }; @@ -86,4 +112,4 @@ class TemporalShiftOpConverter : public OpConverter { } // namespace inference } // namespace paddle -REGISTER_TRT_OP_CONVERTER(temporal_shift, TemporalShiftOp); +REGISTER_TRT_OP_CONVERTER(temporal_shift, TemporalShiftOpConverter); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py new file mode 100755 index 00000000000000..4a1132f203c31f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -0,0 +1,117 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import partial +from typing import Any, Dict, List + +import numpy as np +from program_config import ProgramConfig, TensorConfig +from trt_layer_auto_scan_test import TrtLayerAutoScanTest + +import paddle.inference as paddle_infer + + +class TrtConvertTemporalShiftTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): + T = attrs[0]['seg_num'] + NT = 3 * T + return np.random.random([NT, 4, 32, 32]).astype(np.float32) + + for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50]: + for T in range(2, 5): + dics = [{"shift_ratio": shift_value, "seg_num": T}, {}] + + ops_config = [ + { + "op_type": "temporal_shift", + "op_inputs": {"X": ["input_data"]}, + "op_outputs": {"Out": ["temporal_shift_output_data"]}, + "op_attrs": dics[0], + } + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input1, dics) + ) + }, + outputs=["temporal_shift_output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + T = attrs[0]['seg_num'] + self.dynamic_shape.min_input_shape = { + "input_data": [1 * T, 3, 32, 32] + } + self.dynamic_shape.max_input_shape = { + "input_data": [3 * T, 3, 64, 64] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1 * T, 3, 64, 64] + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + return 1, 2 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), (1e-3, 1e-3) + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), (1e-3, 1e-3) + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main() From 0fd93b456c2397c36c266b9052f3351d98ecc3ec Mon Sep 17 00:00:00 2001 From: andsonder Date: Wed, 1 Mar 2023 17:03:55 +0800 Subject: [PATCH 03/17] fix error about padding --- .../tensorrt/convert/temporal_shift_op.cc | 116 ++++++++++++------ .../test_trt_convert_temporal_shift.py | 70 +++++------ 2 files changed, 109 insertions(+), 77 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index 562c7d4d6acca7..b2562594965a99 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -37,73 +37,117 @@ class TemporalShiftOpConverter : public OpConverter { const framework::Scope& scope, bool test_mode) override { VLOG(3) << "convert a fluid transpose op to tensorrt tranpose layer"; - framework::OpDesc op_desc(op, nullptr); // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + const float shift_ratio = PADDLE_GET_CONST(float, op_desc.GetAttr("shift_ratio")); const int T = PADDLE_GET_CONST(int, op_desc.GetAttr("seg_num")); - const auto& input_dims = input->getDimensions(); - int NT = input_dims.d[0]; - int C = input_dims.d[1]; - int H = input_dims.d[2]; - int W = input_dims.d[3]; - int N = NT / T; + auto input_dims = input->getDimensions(); - // Reshape input to [N,T,C,H,W] + const int NT = input_dims.d[0]; + const int C = input_dims.d[1]; + const int H = input_dims.d[2]; + const int W = input_dims.d[3]; + const int N = NT / T; + + // Reshape input to [N,C,H,W,T] auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - nvinfer1::Dims reshape_dims{5, {N, T, C, H, W}}; + nvinfer1::Dims reshape_dims{5, {N, C, H, W, T}}; reshape_layer->setReshapeDimensions(reshape_dims); - input = reshape_layer->getOutput(0); // Pad input auto* pad_layer = TRT_ENGINE_ADD_LAYER(engine_, - PaddingNd, - *input, - nvinfer1::Dims4(0, 1, 0, 0), - nvinfer1::Dims4(0, 1, 0, 0)); - input = pad_layer->getOutput(0); + Padding, + *reshape_layer->getOutput(0), + nvinfer1::DimsHW{0, 1}, + nvinfer1::DimsHW{0, 1}); + + // Reshape input to [N,T,C,H,W] + auto reshape_layer2 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *pad_layer->getOutput(0)); + nvinfer1::Dims reshape_dims2{5, {N, T + 2, C, H, W}}; + reshape_layer2->setReshapeDimensions(reshape_dims2); + + // print pad_layer->getOutput(0)->getDimensions() + auto pad_dims = pad_layer->getOutput(0)->getDimensions(); + int dims = pad_dims.nbDims; + for (int i = 0; i < dims; ++i) { + std::cout << pad_dims.d[i] << " "; + } + std::cout << std::endl; // Slice input +// int slice_c = int(C * shift_ratio); +// int slice_c2 = int(C * shift_ratio * 2); +// +// auto* slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, +// Slice, +// *pad_layer->getOutput(0), +// nvinfer1::Dims{5, {0, 0, 0, 0, 0}}, +// nvinfer1::Dims{5, {N, slice_c, H, W, T}}, +// nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); +// auto* slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, +// Slice, +// *pad_layer->getOutput(0), +// nvinfer1::Dims{5, {0, slice_c, 0, 0, 2}}, +// nvinfer1::Dims{5, {N, slice_c, H, W, T}}, +// nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); +// auto* slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, +// Slice, +// *pad_layer->getOutput(0), +// nvinfer1::Dims{5, {0, slice_c2, 0, 0, 1}}, +// nvinfer1::Dims{5, {N, C - slice_c2, H, W, T}}, +// nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); int slice_c = int(C * shift_ratio); int slice_c2 = int(C * shift_ratio * 2); auto* slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, - *pad_layer->getOutput(0), - nvinfer1::Dims3{0, 0, 0}, - nvinfer1::Dims3{T, slice_c, H}, - nvinfer1::Dims3{1, 1, 1}); + *reshape_layer2->getOutput(0), + nvinfer1::Dims{5, {0, 0, 0, 0, 0}}, + nvinfer1::Dims{5, {N, T, slice_c, H, W}}, + nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); auto* slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, - *pad_layer->getOutput(0), - nvinfer1::Dims3{0, 2, 0}, - nvinfer1::Dims3{T, slice_c2 - slice_c, H}, - nvinfer1::Dims3{1, 1, 1}); + *reshape_layer2->getOutput(0), + nvinfer1::Dims{5, {0, 2, slice_c, 0, 0}}, + nvinfer1::Dims{5, {N, T, slice_c, H, W}}, + nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); auto* slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, - *pad_layer->getOutput(0), - nvinfer1::Dims3{0, 1, 0}, - nvinfer1::Dims3{T, C - slice_c2, H}, - nvinfer1::Dims3{1, 1, 1}); + *reshape_layer2->getOutput(0), + nvinfer1::Dims{5, {0, 1, slice_c2, 0, 0}}, + nvinfer1::Dims{5, {N, T, C - slice_c2, H, W}}, + nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); // Concatenate slices along the third dimension (C) - nvinfer1::ITensor* concat_inputs[3] = {slice1_layer->getOutput(0), - slice2_layer->getOutput(0), - slice3_layer->getOutput(0)}; - auto* concat_layer = TRT_ENGINE_ADD_LAYER(engine_, + nvinfer1::IConcatenationLayer* concat_layer; + if(!slice_c){ + nvinfer1::ITensor* concat_inputs[2] = {slice2_layer->getOutput(0), + slice3_layer->getOutput(0)}; + concat_layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, - concat_inputs, 3); - concat_layer->setAxis(2); + concat_inputs, 2); + concat_layer->setAxis(2); + } + else{ + nvinfer1::ITensor* concat_inputs[3] = {slice1_layer->getOutput(0), + slice2_layer->getOutput(0), + slice3_layer->getOutput(0)}; + concat_layer = TRT_ENGINE_ADD_LAYER(engine_, + Concatenation, + concat_inputs, 3); + concat_layer->setAxis(2); + } // Reshape output to [N*T,C,H,W] nvinfer1::Dims output_shape{4, {N * T, C, H, W}}; - auto* reshape_layer2 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); - reshape_layer2->setReshapeDimensions(output_shape); + auto* reshape_layer3 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); + reshape_layer3->setReshapeDimensions(output_shape); // Set output auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(reshape_layer2, "temporal_shift", {output_name}, test_mode); + RreplenishLayerAndOutput(reshape_layer3, "temporal_shift", {output_name}, test_mode); } }; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index 4a1132f203c31f..016c4bbed56ac6 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -14,7 +14,7 @@ import unittest from functools import partial -from typing import Any, Dict, List +from typing import List import numpy as np from program_config import ProgramConfig, TensorConfig @@ -28,10 +28,9 @@ def is_program_valid(self, program_config: ProgramConfig) -> bool: return True def sample_program_configs(self): - def generate_input1(attrs: List[Dict[str, Any]]): - T = attrs[0]['seg_num'] - NT = 3 * T - return np.random.random([NT, 4, 32, 32]).astype(np.float32) + def generate_input1(attrs): + T = attrs[0]["seg_num"] + return np.ones([3 * T, 10, 64, 64]).astype(np.float32) for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50]: for T in range(2, 5): @@ -41,22 +40,23 @@ def generate_input1(attrs: List[Dict[str, Any]]): { "op_type": "temporal_shift", "op_inputs": {"X": ["input_data"]}, - "op_outputs": {"Out": ["temporal_shift_output_data"]}, + "op_outputs": {"Out": ["output_data"]}, "op_attrs": dics[0], } ] - ops = self.generate_op_config(ops_config) - program_config = ProgramConfig( - ops=ops, - weights={}, - inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input1, dics) - ) - }, - outputs=["temporal_shift_output_data"], - ) + ops = self.generate_op_config(ops_config) + for i in range(10): + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input1, dics) + ), + }, + outputs=["output_data"], + ) yield program_config @@ -64,50 +64,38 @@ def sample_predictor_configs( self, program_config ) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): - T = attrs[0]['seg_num'] self.dynamic_shape.min_input_shape = { - "input_data": [1 * T, 3, 32, 32] + "input_data": [6, 10, 64, 64] } self.dynamic_shape.max_input_shape = { - "input_data": [3 * T, 3, 64, 64] + "input_data": [20, 10, 64, 64] } self.dynamic_shape.opt_input_shape = { - "input_data": [1 * T, 3, 64, 64] + "input_data": [6, 10, 64, 64] } def clear_dynamic_shape(): - self.dynamic_shape.min_input_shape = {} self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} self.dynamic_shape.opt_input_shape = {} - def generate_trt_nodes_num(attrs, dynamic_shape): - return 1, 2 - attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - # for static_shape - clear_dynamic_shape() - self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False - ), 1e-5 - self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, False - ), (1e-3, 1e-3) + # # for static_shape + # clear_dynamic_shape() + # self.trt_param.precision = paddle_infer.PrecisionType.Float32 + # yield self.create_inference_config(), (1, 3), 1e-5 + # self.trt_param.precision = paddle_infer.PrecisionType.Half + # yield self.create_inference_config(), (1, 3), 1e-3 # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True - ), 1e-5 + yield self.create_inference_config(), (1, 2), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num( - attrs, True - ), (1e-3, 1e-3) + yield self.create_inference_config(), (1, 2), 1e-3 def test(self): self.run_test() From 053d2dd71a661214e07b6e42570c1a4a47725981 Mon Sep 17 00:00:00 2001 From: andsonder Date: Sun, 5 Mar 2023 21:08:38 +0800 Subject: [PATCH 04/17] update pad codes --- .../tensorrt/convert/temporal_shift_op.cc | 192 ++++++++++-------- 1 file changed, 112 insertions(+), 80 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index b2562594965a99..8381d80cc61be9 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -41,7 +41,8 @@ class TemporalShiftOpConverter : public OpConverter { // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]); - const float shift_ratio = PADDLE_GET_CONST(float, op_desc.GetAttr("shift_ratio")); + const float shift_ratio = + PADDLE_GET_CONST(float, op_desc.GetAttr("shift_ratio")); const int T = PADDLE_GET_CONST(int, op_desc.GetAttr("seg_num")); auto input_dims = input->getDimensions(); @@ -52,103 +53,134 @@ class TemporalShiftOpConverter : public OpConverter { const int W = input_dims.d[3]; const int N = NT / T; - // Reshape input to [N,C,H,W,T] + // Reshape input to [N,T,C,H,W] auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - nvinfer1::Dims reshape_dims{5, {N, C, H, W, T}}; + nvinfer1::Dims reshape_dims{5, {N, T, C, H, W}}; reshape_layer->setReshapeDimensions(reshape_dims); - // Pad input - auto* pad_layer = TRT_ENGINE_ADD_LAYER(engine_, - Padding, - *reshape_layer->getOutput(0), - nvinfer1::DimsHW{0, 1}, - nvinfer1::DimsHW{0, 1}); - - // Reshape input to [N,T,C,H,W] - auto reshape_layer2 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *pad_layer->getOutput(0)); - nvinfer1::Dims reshape_dims2{5, {N, T + 2, C, H, W}}; - reshape_layer2->setReshapeDimensions(reshape_dims2); - - // print pad_layer->getOutput(0)->getDimensions() - auto pad_dims = pad_layer->getOutput(0)->getDimensions(); - int dims = pad_dims.nbDims; - for (int i = 0; i < dims; ++i) { - std::cout << pad_dims.d[i] << " "; + // Pad input to [N,T+2,C,H,W] + std::vector pre_pad_v{0, 1, 0, 0, 0}; + std::vector post_pad_v{0, 1, 0, 0, 0}; + nvinfer1::ITensor* pre_pad = vectorToTensor(pre_pad_v); + nvinfer1::ITensor* post_pad = vectorToTensor(post_pad_v); + + std::vector zeros_v(inputDim, 0); + auto const zeros = vectorToTensor(zeros_v); + + nvinfer1::ITensor* start{}; + nvinfer1::ITensor* size{}; + // elementwise add zeros and pre_pad + start = TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *zeros, + *pre_pad, + nvinfer1::ElementWiseOperation::kSUB) + ->getOutput(0); + + auto const total_padding = + TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *pre_pad, + *post_pad, + nvinfer1::ElementWiseOperation::kSUM) + ->getOutput(0); + + std::vector input_shape_v(inputDim, 0); + for (int i = 0; i < inputDim; i++) { + input_shape_v[i] = input->getDimensions().d[i]; } - std::cout << std::endl; - - // Slice input -// int slice_c = int(C * shift_ratio); -// int slice_c2 = int(C * shift_ratio * 2); -// -// auto* slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, -// Slice, -// *pad_layer->getOutput(0), -// nvinfer1::Dims{5, {0, 0, 0, 0, 0}}, -// nvinfer1::Dims{5, {N, slice_c, H, W, T}}, -// nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); -// auto* slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, -// Slice, -// *pad_layer->getOutput(0), -// nvinfer1::Dims{5, {0, slice_c, 0, 0, 2}}, -// nvinfer1::Dims{5, {N, slice_c, H, W, T}}, -// nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); -// auto* slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, -// Slice, -// *pad_layer->getOutput(0), -// nvinfer1::Dims{5, {0, slice_c2, 0, 0, 1}}, -// nvinfer1::Dims{5, {N, C - slice_c2, H, W, T}}, -// nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); + auto const input_shape = vectorToTensor(input_shape_v); + + size = TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *input_shape, + *total_padding, + nvinfer1::ElementWiseOperation::kSUM) + ->getOutput(0); + nvinfer1::Dims stride; + stride.nbDims = inputDim; + std::fill_n(stride.d, inputDim, 1); + auto const& dummy = stride; + auto* slice_layer = + TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *const_cast(input), + dummy, + dummy, + stride); + slice_layer->setInput(1, *start); + slice_layer->setInput(2, *size); + slice_layer->setMode(nvinfer1::SliceMode::kFILL); + + + // Slice Padded Tensor int slice_c = int(C * shift_ratio); int slice_c2 = int(C * shift_ratio * 2); - auto* slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *reshape_layer2->getOutput(0), - nvinfer1::Dims{5, {0, 0, 0, 0, 0}}, - nvinfer1::Dims{5, {N, T, slice_c, H, W}}, - nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); - auto* slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *reshape_layer2->getOutput(0), - nvinfer1::Dims{5, {0, 2, slice_c, 0, 0}}, - nvinfer1::Dims{5, {N, T, slice_c, H, W}}, - nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); - auto* slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *reshape_layer2->getOutput(0), - nvinfer1::Dims{5, {0, 1, slice_c2, 0, 0}}, - nvinfer1::Dims{5, {N, T, C - slice_c2, H, W}}, - nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); + auto* slice1_layer = + TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *slice_layer->getOutput(0), + nvinfer1::Dims{5, {0, 0, 0, 0, 0}}, + nvinfer1::Dims{5, {N, T, slice_c, H, W}}, + nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); + auto* slice2_layer = + TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *slice_layer->getOutput(0), + nvinfer1::Dims{5, {0, 2, slice_c, 0, 0}}, + nvinfer1::Dims{5, {N, T, slice_c, H, W}}, + nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); + auto* slice3_layer = + TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *slice_layer->getOutput(0), + nvinfer1::Dims{5, {0, 1, slice_c2, 0, 0}}, + nvinfer1::Dims{5, {N, T, C - slice_c2, H, W}}, + nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); // Concatenate slices along the third dimension (C) nvinfer1::IConcatenationLayer* concat_layer; - if(!slice_c){ - nvinfer1::ITensor* concat_inputs[2] = {slice2_layer->getOutput(0), - slice3_layer->getOutput(0)}; - concat_layer = TRT_ENGINE_ADD_LAYER(engine_, - Concatenation, - concat_inputs, 2); - concat_layer->setAxis(2); - } - else{ - nvinfer1::ITensor* concat_inputs[3] = {slice1_layer->getOutput(0), - slice2_layer->getOutput(0), - slice3_layer->getOutput(0)}; - concat_layer = TRT_ENGINE_ADD_LAYER(engine_, - Concatenation, - concat_inputs, 3); - concat_layer->setAxis(2); + if (!slice_c) { + nvinfer1::ITensor* concat_inputs[2] = {slice2_layer->getOutput(0), + slice3_layer->getOutput(0)}; + concat_layer = + TRT_ENGINE_ADD_LAYER(engine_, Concatenation, concat_inputs, 2); + concat_layer->setAxis(2); + } else { + nvinfer1::ITensor* concat_inputs[3] = {slice1_layer->getOutput(0), + slice2_layer->getOutput(0), + slice3_layer->getOutput(0)}; + concat_layer = + TRT_ENGINE_ADD_LAYER(engine_, Concatenation, concat_inputs, 3); + concat_layer->setAxis(2); } // Reshape output to [N*T,C,H,W] nvinfer1::Dims output_shape{4, {N * T, C, H, W}}; - auto* reshape_layer3 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); + auto* reshape_layer3 = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); reshape_layer3->setReshapeDimensions(output_shape); // Set output auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(reshape_layer3, "temporal_shift", {output_name}, test_mode); + RreplenishLayerAndOutput( + reshape_layer3, "temporal_shift", {output_name}, test_mode); + } + + private: + template + nvinfer1::ITensor* vectorToTensor(std::vector v) { + int* v_data = const_cast(static_cast(v.data())); + + nvinfer1::Weights v_wt{nvinfer1::DataType::kINT32, + static_cast(v_data), + static_cast(v.size())}; + + nvinfer1::Dims v_dim; + v_dim.nbDims = 1; + v_dim.d[0] = static_cast(v.size()); + return TRT_ENGINE_ADD_LAYER(engine_, Constant, v_dim, v_wt)->getOutput(0); } }; From ac677a2fbf6e904e5a4f0f5839b0286044c6dabd Mon Sep 17 00:00:00 2001 From: andsonder Date: Sun, 5 Mar 2023 22:47:04 +0800 Subject: [PATCH 05/17] update test codes --- .../tensorrt/convert/temporal_shift_op.cc | 14 +++++++------- paddle/fluid/inference/tensorrt/op_teller.cc | 4 ++-- .../inference/test_trt_convert_temporal_shift.py | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index 8381d80cc61be9..9b91de0a45c275 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -64,12 +64,13 @@ class TemporalShiftOpConverter : public OpConverter { nvinfer1::ITensor* pre_pad = vectorToTensor(pre_pad_v); nvinfer1::ITensor* post_pad = vectorToTensor(post_pad_v); - std::vector zeros_v(inputDim, 0); + int dims = 5; + std::vector zeros_v(dims, 0); auto const zeros = vectorToTensor(zeros_v); nvinfer1::ITensor* start{}; nvinfer1::ITensor* size{}; - // elementwise add zeros and pre_pad + start = TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *zeros, @@ -85,8 +86,8 @@ class TemporalShiftOpConverter : public OpConverter { nvinfer1::ElementWiseOperation::kSUM) ->getOutput(0); - std::vector input_shape_v(inputDim, 0); - for (int i = 0; i < inputDim; i++) { + std::vector input_shape_v(dims, 0); + for (int i = 0; i < dims; i++) { input_shape_v[i] = input->getDimensions().d[i]; } auto const input_shape = vectorToTensor(input_shape_v); @@ -98,8 +99,8 @@ class TemporalShiftOpConverter : public OpConverter { nvinfer1::ElementWiseOperation::kSUM) ->getOutput(0); nvinfer1::Dims stride; - stride.nbDims = inputDim; - std::fill_n(stride.d, inputDim, 1); + stride.nbDims = dims; + std::fill_n(stride.d, dims, 1); auto const& dummy = stride; auto* slice_layer = TRT_ENGINE_ADD_LAYER(engine_, @@ -112,7 +113,6 @@ class TemporalShiftOpConverter : public OpConverter { slice_layer->setInput(2, *size); slice_layer->setMode(nvinfer1::SliceMode::kFILL); - // Slice Padded Tensor int slice_c = int(C * shift_ratio); int slice_c2 = int(C * shift_ratio * 2); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 99c50b4338b43a..9301d184f80be0 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2739,7 +2739,7 @@ struct SimpleOpTypeSetTeller : public Teller { "fuse_eleadd_transpose", "skip_groupnorm_act", "preln_groupnorm_act", - "temporal_shift"}; + "temporal_shift", "grid_sampler"}; std::unordered_set teller_set{ @@ -2893,7 +2893,7 @@ struct SimpleOpTypeSetTeller : public Teller { "fuse_eleadd_transpose", "skip_groupnorm_act", "preln_groupnorm_act", - "temporal_shift"}; + "temporal_shift" "grid_sampler"}; }; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index 016c4bbed56ac6..123d993daf2472 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -32,7 +32,7 @@ def generate_input1(attrs): T = attrs[0]["seg_num"] return np.ones([3 * T, 10, 64, 64]).astype(np.float32) - for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.50]: + for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.49]: for T in range(2, 5): dics = [{"shift_ratio": shift_value, "seg_num": T}, {}] @@ -93,9 +93,9 @@ def clear_dynamic_shape(): # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (1, 2), 1e-5 + yield self.create_inference_config(), (0, 3), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (1, 2), 1e-3 + yield self.create_inference_config(), (0, 3), 1e-3 def test(self): self.run_test() From d7af5e3883c57b2efbb6161c2af82efc12ddbd0a Mon Sep 17 00:00:00 2001 From: andsonder Date: Sun, 5 Mar 2023 22:52:52 +0800 Subject: [PATCH 06/17] add trt version limite --- paddle/fluid/inference/api/analysis_predictor.cc | 2 +- paddle/fluid/inference/tensorrt/op_teller.cc | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7dc3a48e5e0d6d..213b6a61b74c32 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2469,7 +2469,6 @@ USE_TRT_CONVERTER(mish); USE_TRT_CONVERTER(deformable_conv); USE_TRT_CONVERTER(pool3d) USE_TRT_CONVERTER(square); -USE_TRT_CONVERTER(temporal_shift); // unary op USE_TRT_CONVERTER(exp); USE_TRT_CONVERTER(log); @@ -2545,6 +2544,7 @@ USE_TRT_CONVERTER(grid_sampler) #endif #if IS_TRT_VERSION_GE(8200) USE_TRT_CONVERTER(set_value) +USE_TRT_CONVERTER(temporal_shift); #endif #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000) USE_TRT_CONVERTER(sparse_fc) diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 9301d184f80be0..ab1a8668e9c996 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2579,6 +2579,13 @@ struct SimpleOpTypeSetTeller : public Teller { #endif } + if (op_type == "temporal_shift") { +#if !IS_TRT_VERSION_GE(8200) + VLOG(3) << "temporal_shift is not supported when TensorRT < 8.5.1"; + return false; +#endif + } + if (use_no_calib_int8) { return int8_teller_set.count(op_type); } else { From a2a0416f5b4b4f887a3abf43aad046b1c3911be2 Mon Sep 17 00:00:00 2001 From: andsonder Date: Mon, 6 Mar 2023 17:37:26 +0800 Subject: [PATCH 07/17] =?UTF-8?q?=E6=9B=B4=E6=96=B0trt=20=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E6=8E=A7=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tensorrt/convert/temporal_shift_op.cc | 93 ++++++++++--------- paddle/fluid/inference/tensorrt/op_teller.cc | 2 +- .../test_trt_convert_temporal_shift.py | 51 +++++----- 3 files changed, 76 insertions(+), 70 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index 9b91de0a45c275..e8092302b30c88 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -36,6 +36,7 @@ class TemporalShiftOpConverter : public OpConverter { void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { +#if IS_TRT_VERSION_GE(8200) VLOG(3) << "convert a fluid transpose op to tensorrt tranpose layer"; framework::OpDesc op_desc(op, nullptr); // Declare inputs @@ -45,6 +46,20 @@ class TemporalShiftOpConverter : public OpConverter { PADDLE_GET_CONST(float, op_desc.GetAttr("shift_ratio")); const int T = PADDLE_GET_CONST(int, op_desc.GetAttr("seg_num")); + std::string data_format = "NCHW"; + if (op_desc.HasAttr("data_format")) { + data_format = + PADDLE_GET_CONST(std::string, op_desc.GetAttr("data_format")); + } + + if (data_format == "NHWC") { + // tanspose input to [N,C,H,W] + auto transpose_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + nvinfer1::Permutation perm{0, 3, 1, 2}; + transpose_layer->setFirstTranspose(perm); + input = transpose_layer->getOutput(0); + } + auto input_dims = input->getDimensions(); const int NT = input_dims.d[0]; @@ -55,18 +70,18 @@ class TemporalShiftOpConverter : public OpConverter { // Reshape input to [N,T,C,H,W] auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - nvinfer1::Dims reshape_dims{5, {N, T, C, H, W}}; + nvinfer1::Dims reshape_dims{5, { N, T, C, H, W }}; reshape_layer->setReshapeDimensions(reshape_dims); // Pad input to [N,T+2,C,H,W] std::vector pre_pad_v{0, 1, 0, 0, 0}; std::vector post_pad_v{0, 1, 0, 0, 0}; - nvinfer1::ITensor* pre_pad = vectorToTensor(pre_pad_v); - nvinfer1::ITensor* post_pad = vectorToTensor(post_pad_v); + nvinfer1::ITensor* pre_pad = Add1DConstantLayer(pre_pad_v); + nvinfer1::ITensor* post_pad = Add1DConstantLayer(post_pad_v); int dims = 5; std::vector zeros_v(dims, 0); - auto const zeros = vectorToTensor(zeros_v); + auto const zeros = Add1DConstantLayer(zeros_v); nvinfer1::ITensor* start{}; nvinfer1::ITensor* size{}; @@ -90,7 +105,7 @@ class TemporalShiftOpConverter : public OpConverter { for (int i = 0; i < dims; i++) { input_shape_v[i] = input->getDimensions().d[i]; } - auto const input_shape = vectorToTensor(input_shape_v); + auto const input_shape = Add1DConstantLayer(input_shape_v); size = TRT_ENGINE_ADD_LAYER(engine_, ElementWise, @@ -116,27 +131,19 @@ class TemporalShiftOpConverter : public OpConverter { // Slice Padded Tensor int slice_c = int(C * shift_ratio); int slice_c2 = int(C * shift_ratio * 2); - auto* slice1_layer = - TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *slice_layer->getOutput(0), - nvinfer1::Dims{5, {0, 0, 0, 0, 0}}, - nvinfer1::Dims{5, {N, T, slice_c, H, W}}, - nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); - auto* slice2_layer = - TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *slice_layer->getOutput(0), - nvinfer1::Dims{5, {0, 2, slice_c, 0, 0}}, - nvinfer1::Dims{5, {N, T, slice_c, H, W}}, - nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); - auto* slice3_layer = - TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *slice_layer->getOutput(0), - nvinfer1::Dims{5, {0, 1, slice_c2, 0, 0}}, - nvinfer1::Dims{5, {N, T, C - slice_c2, H, W}}, - nvinfer1::Dims{5, {1, 1, 1, 1, 1}}); + auto slice_start1 = nvinfer1::Dims{5, { 0, 0, 0, 0, 0 }}; + auto slice_start2 = nvinfer1::Dims{5, { 0, 2, slice_c, 0, 0 }}; + auto slice_start3 = nvinfer1::Dims{5, { 0, 1, slice_c2, 0, 0 }}; + auto slice_size = nvinfer1::Dims{5, { N, T, slice_c, H, W }}; + auto slice_size2 = nvinfer1::Dims{5, { N, T, C - slice_c2, H, W }}; + auto slice_stride = nvinfer1::Dims{5, { 1, 1, 1, 1, 1 }}; + + auto* slice1_layer = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *slice_layer->getOutput(0), slice_start1, slice_size, slice_stride); + auto* slice2_layer = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *slice_layer->getOutput(0), slice_start2, slice_size, slice_stride); + auto* slice3_layer = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *slice_layer->getOutput(0), slice_start3, slice_size2, slice_stride); // Concatenate slices along the third dimension (C) nvinfer1::IConcatenationLayer* concat_layer; @@ -156,31 +163,29 @@ class TemporalShiftOpConverter : public OpConverter { } // Reshape output to [N*T,C,H,W] - nvinfer1::Dims output_shape{4, {N * T, C, H, W}}; + nvinfer1::Dims output_shape{4, { N * T, C, H, W }}; auto* reshape_layer3 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); reshape_layer3->setReshapeDimensions(output_shape); // Set output auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput( - reshape_layer3, "temporal_shift", {output_name}, test_mode); - } - - private: - template - nvinfer1::ITensor* vectorToTensor(std::vector v) { - int* v_data = const_cast(static_cast(v.data())); - nvinfer1::Weights v_wt{nvinfer1::DataType::kINT32, - static_cast(v_data), - static_cast(v.size())}; - - nvinfer1::Dims v_dim; - v_dim.nbDims = 1; - v_dim.d[0] = static_cast(v.size()); - - return TRT_ENGINE_ADD_LAYER(engine_, Constant, v_dim, v_wt)->getOutput(0); + if (data_format == "NHWC") { + // Transpose output to [N*T,C,H,W] -> [N*T,H,W,C] + auto transpose_layer2 = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *reshape_layer3->getOutput(0)); + nvinfer1::Permutation permute_order{0, 2, 3, 1}; + transpose_layer2->setFirstTranspose(permute_order); + RreplenishLayerAndOutput( + transpose_layer2, "temporal_shift", {output_name}, test_mode); + } else { + RreplenishLayerAndOutput( + reshape_layer3, "temporal_shift", {output_name}, test_mode); + } +#else + VLOG(3) << "Temporal shift is not supported when TensorRT < 8.2"; +#endif } }; diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index ab1a8668e9c996..8efe83c108a272 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2581,7 +2581,7 @@ struct SimpleOpTypeSetTeller : public Teller { if (op_type == "temporal_shift") { #if !IS_TRT_VERSION_GE(8200) - VLOG(3) << "temporal_shift is not supported when TensorRT < 8.5.1"; + VLOG(3) << "temporal_shift is not supported when TensorRT < 8.2"; return false; #endif } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index 123d993daf2472..0facb6dcd95369 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -34,31 +34,32 @@ def generate_input1(attrs): for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.49]: for T in range(2, 5): - dics = [{"shift_ratio": shift_value, "seg_num": T}, {}] - - ops_config = [ - { - "op_type": "temporal_shift", - "op_inputs": {"X": ["input_data"]}, - "op_outputs": {"Out": ["output_data"]}, - "op_attrs": dics[0], - } - ] - - ops = self.generate_op_config(ops_config) - for i in range(10): - program_config = ProgramConfig( - ops=ops, - weights={}, - inputs={ - "input_data": TensorConfig( - data_gen=partial(generate_input1, dics) - ), - }, - outputs=["output_data"], - ) - - yield program_config + for data_format in ["NCHW", "NHWC"]: + dics = [{"shift_ratio": shift_value, "seg_num": T, "data_format": data_format}, {}] + + ops_config = [ + { + "op_type": "temporal_shift", + "op_inputs": {"X": ["input_data"]}, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[0], + } + ] + + ops = self.generate_op_config(ops_config) + for i in range(10): + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input1, dics) + ), + }, + outputs=["output_data"], + ) + + yield program_config def sample_predictor_configs( self, program_config From fcb6504161037a69fc45a13145075a89967e7e15 Mon Sep 17 00:00:00 2001 From: andsonder Date: Mon, 6 Mar 2023 21:37:58 +0800 Subject: [PATCH 08/17] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=8D=95=E6=B5=8B?= =?UTF-8?q?=E6=96=87=E4=BB=B6[=E9=9D=99=E6=80=81shape=E5=8D=95=E6=B5=8B?= =?UTF-8?q?=E6=AD=A3=E5=B8=B8=E9=80=9A=E8=BF=87]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddle/fluid/framework/ir/trt_support_nhwc_pass.cc | 7 ++++--- .../ir/inference/test_trt_convert_temporal_shift.py | 13 ++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc index 3e56200dcaa52c..142bc5d601d89e 100644 --- a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc +++ b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc @@ -154,11 +154,12 @@ void TrtSupportNHWCPass::ApplyImpl(Graph *graph) const { "bilinear_interp", "bilinear_interp_v2", "nearest_interp", - "nearest_interp_v2"}; + "nearest_interp_v2", + "temporal_shift"}; // Ops must run under the original layout even though it has // data_format/data_layout attribute, otherwise it will be very troublesome! - std::unordered_set must_original_layout_ops{"affine_channel", - "softmax"}; + std::unordered_set must_original_layout_ops{ + "affine_channel", "softmax", "temporal_shift"}; // OPs unrelated to layout are consistent according to the layout of input // var! std::unordered_set any_layout_ops{"relu"}; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index 0facb6dcd95369..3c9ab410d9acfb 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -36,7 +36,6 @@ def generate_input1(attrs): for T in range(2, 5): for data_format in ["NCHW", "NHWC"]: dics = [{"shift_ratio": shift_value, "seg_num": T, "data_format": data_format}, {}] - ops_config = [ { "op_type": "temporal_shift", @@ -84,12 +83,12 @@ def clear_dynamic_shape(): program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - # # for static_shape - # clear_dynamic_shape() - # self.trt_param.precision = paddle_infer.PrecisionType.Float32 - # yield self.create_inference_config(), (1, 3), 1e-5 - # self.trt_param.precision = paddle_infer.PrecisionType.Half - # yield self.create_inference_config(), (1, 3), 1e-3 + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 3), 1e-3 # for dynamic_shape generate_dynamic_shape(attrs) From 6320c024acf441a73dfda7b01cdb4c53b70da66b Mon Sep 17 00:00:00 2001 From: andsonder Date: Tue, 7 Mar 2023 20:52:10 +0800 Subject: [PATCH 09/17] fix code style --- .../tensorrt/convert/temporal_shift_op.cc | 28 +++++++++++++------ .../test_trt_convert_temporal_shift.py | 19 +++++++------ 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index e8092302b30c88..651a2b04df89d9 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -129,8 +129,8 @@ class TemporalShiftOpConverter : public OpConverter { slice_layer->setMode(nvinfer1::SliceMode::kFILL); // Slice Padded Tensor - int slice_c = int(C * shift_ratio); - int slice_c2 = int(C * shift_ratio * 2); + int slice_c = static_cast(C * shift_ratio); + int slice_c2 = static_cast(C * shift_ratio * 2); auto slice_start1 = nvinfer1::Dims{5, { 0, 0, 0, 0, 0 }}; auto slice_start2 = nvinfer1::Dims{5, { 0, 2, slice_c, 0, 0 }}; auto slice_start3 = nvinfer1::Dims{5, { 0, 1, slice_c2, 0, 0 }}; @@ -138,12 +138,24 @@ class TemporalShiftOpConverter : public OpConverter { auto slice_size2 = nvinfer1::Dims{5, { N, T, C - slice_c2, H, W }}; auto slice_stride = nvinfer1::Dims{5, { 1, 1, 1, 1, 1 }}; - auto* slice1_layer = TRT_ENGINE_ADD_LAYER( - engine_, Slice, *slice_layer->getOutput(0), slice_start1, slice_size, slice_stride); - auto* slice2_layer = TRT_ENGINE_ADD_LAYER( - engine_, Slice, *slice_layer->getOutput(0), slice_start2, slice_size, slice_stride); - auto* slice3_layer = TRT_ENGINE_ADD_LAYER( - engine_, Slice, *slice_layer->getOutput(0), slice_start3, slice_size2, slice_stride); + auto* slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *slice_layer->getOutput(0), + slice_start1, + slice_size, + slice_stride); + auto* slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *slice_layer->getOutput(0), + slice_start2, + slice_size, + slice_stride); + auto* slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, + Slice, + *slice_layer->getOutput(0), + slice_start3, + slice_size2, + slice_stride); // Concatenate slices along the third dimension (C) nvinfer1::IConcatenationLayer* concat_layer; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index 3c9ab410d9acfb..137d75a306ecd4 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -35,7 +35,14 @@ def generate_input1(attrs): for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.49]: for T in range(2, 5): for data_format in ["NCHW", "NHWC"]: - dics = [{"shift_ratio": shift_value, "seg_num": T, "data_format": data_format}, {}] + dics = [ + { + "shift_ratio": shift_value, + "seg_num": T, + "data_format": data_format, + }, + {}, + ] ops_config = [ { "op_type": "temporal_shift", @@ -61,18 +68,14 @@ def generate_input1(attrs): yield program_config def sample_predictor_configs( - self, program_config + self, program_config ) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): - self.dynamic_shape.min_input_shape = { - "input_data": [6, 10, 64, 64] - } + self.dynamic_shape.min_input_shape = {"input_data": [6, 10, 64, 64]} self.dynamic_shape.max_input_shape = { "input_data": [20, 10, 64, 64] } - self.dynamic_shape.opt_input_shape = { - "input_data": [6, 10, 64, 64] - } + self.dynamic_shape.opt_input_shape = {"input_data": [6, 10, 64, 64]} def clear_dynamic_shape(): self.dynamic_shape.max_input_shape = {} From b6656c7e184f637be6bb3f541b239fea3b104860 Mon Sep 17 00:00:00 2001 From: andsonder Date: Thu, 9 Mar 2023 16:10:21 +0800 Subject: [PATCH 10/17] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=8D=95=E6=B5=8B?= =?UTF-8?q?=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../inference/test_trt_convert_temporal_shift.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index 137d75a306ecd4..e3e2a2c3b3876e 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -30,7 +30,7 @@ def is_program_valid(self, program_config: ProgramConfig) -> bool: def sample_program_configs(self): def generate_input1(attrs): T = attrs[0]["seg_num"] - return np.ones([3 * T, 10, 64, 64]).astype(np.float32) + return np.random.rand(3 * T, 10, 64, 64).astype(np.float32) for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.49]: for T in range(2, 5): @@ -68,7 +68,7 @@ def generate_input1(attrs): yield program_config def sample_predictor_configs( - self, program_config + self, program_config ) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [6, 10, 64, 64]} @@ -82,6 +82,9 @@ def clear_dynamic_shape(): self.dynamic_shape.min_input_shape = {} self.dynamic_shape.opt_input_shape = {} + def generate_trt_nodes_num(attrs, is_dynamic_shape): + return 0, 3 + attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] @@ -89,16 +92,16 @@ def clear_dynamic_shape(): # for static_shape clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (0, 3), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, False), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (0, 3), 1e-3 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, False), 1e-3 # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), (0, 3), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, True), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), (0, 3), 1e-3 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, True), 1e-3 def test(self): self.run_test() From 15087cbbd7c510f2027b0708f020b473fb1151ab Mon Sep 17 00:00:00 2001 From: andsonder Date: Thu, 9 Mar 2023 16:13:00 +0800 Subject: [PATCH 11/17] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E5=8D=95=E6=B5=8B?= =?UTF-8?q?=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../test_trt_convert_temporal_shift.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index e3e2a2c3b3876e..49a717535e57eb 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -68,7 +68,7 @@ def generate_input1(attrs): yield program_config def sample_predictor_configs( - self, program_config + self, program_config ) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): self.dynamic_shape.min_input_shape = {"input_data": [6, 10, 64, 64]} @@ -92,16 +92,24 @@ def generate_trt_nodes_num(attrs, is_dynamic_shape): # for static_shape clear_dynamic_shape() self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, False), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, False), 1e-3 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-3 # for dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 - yield self.create_inference_config(), generate_trt_nodes_num(attrs, True), 1e-5 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 self.trt_param.precision = paddle_infer.PrecisionType.Half - yield self.create_inference_config(), generate_trt_nodes_num(attrs, True), 1e-3 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-3 def test(self): self.run_test() From f163694e12e4efd97ef55b7987068862057408e0 Mon Sep 17 00:00:00 2001 From: andsonder Date: Sun, 12 Mar 2023 11:11:55 +0800 Subject: [PATCH 12/17] update temporal_shift_op settings --- .../framework/ir/trt_support_nhwc_pass.cc | 3 +- .../fluid/inference/api/analysis_predictor.cc | 2 +- paddle/fluid/inference/tensorrt/op_teller.cc | 31 ++++++++++++++++++- 3 files changed, 32 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc index 142bc5d601d89e..86c7b7c9dbbaee 100644 --- a/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc +++ b/paddle/fluid/framework/ir/trt_support_nhwc_pass.cc @@ -154,8 +154,7 @@ void TrtSupportNHWCPass::ApplyImpl(Graph *graph) const { "bilinear_interp", "bilinear_interp_v2", "nearest_interp", - "nearest_interp_v2", - "temporal_shift"}; + "nearest_interp_v2"}; // Ops must run under the original layout even though it has // data_format/data_layout attribute, otherwise it will be very troublesome! std::unordered_set must_original_layout_ops{ diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 677f331b462ede..ccda587530bfdc 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2546,7 +2546,7 @@ USE_TRT_CONVERTER(grid_sampler) #endif #if IS_TRT_VERSION_GE(8200) USE_TRT_CONVERTER(set_value) -USE_TRT_CONVERTER(temporal_shift); +USE_TRT_CONVERTER(temporal_shift) #endif #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000) USE_TRT_CONVERTER(sparse_fc) diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 68cdda88b81e75..887b4de9104918 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2589,6 +2589,35 @@ struct SimpleOpTypeSetTeller : public Teller { VLOG(3) << "temporal_shift is not supported when TensorRT < 8.2"; return false; #endif + + if (!with_dynamic_shape) { + VLOG(3) << "the temporal shift does not support " + "static shape yet"; + return false; + } + + if (!desc.HasAttr("shift_ratio") || !desc.HasAttr("seg_num")) { + VLOG(3) << "temporal shift need attributes : shift_ratio and seg_num"; + return false; + } + + auto* block = desc.Block(); + if (block == nullptr) { + VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " + "Developers need to check whether block_desc is passed in " + "the pass."; + return false; + } + + auto input_name = desc.Input("X")[0]; + auto* input_desc = block->FindVar(input_name); + const auto input_shape = input_desc->GetShape(); + + if (input_shape.size() != 4) { + VLOG(3) << "The input and grid tensors must be shape tensors of rank 4 " + "using TRT TemporalShift layer."; + return false; + } } if (use_no_calib_int8) { @@ -2907,7 +2936,7 @@ struct SimpleOpTypeSetTeller : public Teller { "fuse_eleadd_transpose", "skip_groupnorm_act", "preln_groupnorm_act", - "temporal_shift" + "temporal_shift", "grid_sampler"}; }; From e9dee6a3fe8b6fde71b2aada0a53e8f1112089be Mon Sep 17 00:00:00 2001 From: andsonder Date: Sun, 12 Mar 2023 11:12:31 +0800 Subject: [PATCH 13/17] fix bugs about temporal_shift --- .../tensorrt/convert/temporal_shift_op.cc | 112 +++++++++++------- 1 file changed, 70 insertions(+), 42 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index 651a2b04df89d9..eb5ed05c57a33b 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -37,7 +37,8 @@ class TemporalShiftOpConverter : public OpConverter { const framework::Scope& scope, bool test_mode) override { #if IS_TRT_VERSION_GE(8200) - VLOG(3) << "convert a fluid transpose op to tensorrt tranpose layer"; + + VLOG(3) << "convert a fluid temporal shift op to tensorrt temporal layer"; framework::OpDesc op_desc(op, nullptr); // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]); @@ -62,16 +63,17 @@ class TemporalShiftOpConverter : public OpConverter { auto input_dims = input->getDimensions(); - const int NT = input_dims.d[0]; const int C = input_dims.d[1]; const int H = input_dims.d[2]; const int W = input_dims.d[3]; - const int N = NT / T; + std::cout << "C: " << C << " H: " << H << " W: " << W + << "shift_ratio: " << shift_ratio << " T: " << T << std::endl; // Reshape input to [N,T,C,H,W] auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - nvinfer1::Dims reshape_dims{5, { N, T, C, H, W }}; + nvinfer1::Dims reshape_dims{5, { -1, T, C, H, W }}; reshape_layer->setReshapeDimensions(reshape_dims); + input = reshape_layer->getOutput(0); // Pad input to [N,T+2,C,H,W] std::vector pre_pad_v{0, 1, 0, 0, 0}; @@ -101,11 +103,7 @@ class TemporalShiftOpConverter : public OpConverter { nvinfer1::ElementWiseOperation::kSUM) ->getOutput(0); - std::vector input_shape_v(dims, 0); - for (int i = 0; i < dims; i++) { - input_shape_v[i] = input->getDimensions().d[i]; - } - auto const input_shape = Add1DConstantLayer(input_shape_v); + auto const input_shape = Shape(input); size = TRT_ENGINE_ADD_LAYER(engine_, ElementWise, @@ -126,36 +124,67 @@ class TemporalShiftOpConverter : public OpConverter { stride); slice_layer->setInput(1, *start); slice_layer->setInput(2, *size); +#if IS_TRT_VERSION_GE(8500) + slice_layer->setMode(nvinfer1::SampleMode::kFILL); +#else slice_layer->setMode(nvinfer1::SliceMode::kFILL); - +#endif + // Slice Padded Tensor - int slice_c = static_cast(C * shift_ratio); - int slice_c2 = static_cast(C * shift_ratio * 2); - auto slice_start1 = nvinfer1::Dims{5, { 0, 0, 0, 0, 0 }}; - auto slice_start2 = nvinfer1::Dims{5, { 0, 2, slice_c, 0, 0 }}; - auto slice_start3 = nvinfer1::Dims{5, { 0, 1, slice_c2, 0, 0 }}; - auto slice_size = nvinfer1::Dims{5, { N, T, slice_c, H, W }}; - auto slice_size2 = nvinfer1::Dims{5, { N, T, C - slice_c2, H, W }}; - auto slice_stride = nvinfer1::Dims{5, { 1, 1, 1, 1, 1 }}; - - auto* slice1_layer = TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *slice_layer->getOutput(0), - slice_start1, - slice_size, - slice_stride); - auto* slice2_layer = TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *slice_layer->getOutput(0), - slice_start2, - slice_size, - slice_stride); - auto* slice3_layer = TRT_ENGINE_ADD_LAYER(engine_, - Slice, - *slice_layer->getOutput(0), - slice_start3, - slice_size2, - slice_stride); + const int slice_c = static_cast(C * shift_ratio); + const int slice_c2 = static_cast(C * shift_ratio * 2); + + nvinfer1::ITensor* slice_start1 = Add1DConstantLayer(zeros_v); + nvinfer1::ITensor* slice_start2 = + Add1DConstantLayer(std::vector{0, 2, slice_c, 0, 0}); + nvinfer1::ITensor* slice_start3 = + Add1DConstantLayer(std::vector{0, 1, slice_c2, 0, 0}); + + nvinfer1::ITensor* slice_size_base = Shape(input); + nvinfer1::ITensor* sub_size1 = + Add1DConstantLayer(std::vector{0, 0, C - slice_c, 0, 0}); + nvinfer1::ITensor* sub_size2 = Add1DConstantLayer( + std::vector{0, 0, C + slice_c - slice_c2, 0, 0}); + nvinfer1::ITensor* sub_size3 = + Add1DConstantLayer(std::vector{0, 0, slice_c2, 0, 0}); + // [N, T, C, H, W] - [0, 0, C - slice_c, 0, 0] = [N, T, slice_c, H, W] + nvinfer1::ITensor* slice_size1 = + TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *slice_size_base, + *sub_size1, + nvinfer1::ElementWiseOperation::kSUB) + ->getOutput(0); + + nvinfer1::ITensor* slice_size2 = + TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *slice_size_base, + *sub_size2, + nvinfer1::ElementWiseOperation::kSUB) + ->getOutput(0); + nvinfer1::ITensor* slice_size3 = + TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *slice_size_base, + *sub_size3, + nvinfer1::ElementWiseOperation::kSUB) + ->getOutput(0); + + auto* slice1_layer = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *slice_layer->getOutput(0), dummy, dummy, stride); + slice1_layer->setInput(1, *slice_start1); + slice1_layer->setInput(2, *slice_size1); + + auto* slice2_layer = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *slice_layer->getOutput(0), dummy, dummy, stride); + slice2_layer->setInput(1, *slice_start2); + slice2_layer->setInput(2, *slice_size2); + + auto* slice3_layer = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *slice_layer->getOutput(0), dummy, dummy, stride); + slice3_layer->setInput(1, *slice_start3); + slice3_layer->setInput(2, *slice_size3); // Concatenate slices along the third dimension (C) nvinfer1::IConcatenationLayer* concat_layer; @@ -173,16 +202,15 @@ class TemporalShiftOpConverter : public OpConverter { TRT_ENGINE_ADD_LAYER(engine_, Concatenation, concat_inputs, 3); concat_layer->setAxis(2); } - + // Reshape output to [N*T,C,H,W] - nvinfer1::Dims output_shape{4, { N * T, C, H, W }}; auto* reshape_layer3 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); - reshape_layer3->setReshapeDimensions(output_shape); - + reshape_layer3->setReshapeDimensions(input_dims); + // Set output auto output_name = op_desc.Output("Out")[0]; - + if (data_format == "NHWC") { // Transpose output to [N*T,C,H,W] -> [N*T,H,W,C] auto transpose_layer2 = From f247f64a18d82fa68c681a1ecfe0b8d9c553f328 Mon Sep 17 00:00:00 2001 From: andsonder Date: Sun, 12 Mar 2023 11:14:12 +0800 Subject: [PATCH 14/17] update trt node nums for dynamic mode --- .../test_trt_convert_temporal_shift.py | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index 49a717535e57eb..e59135b424296b 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -25,12 +25,31 @@ class TrtConvertTemporalShiftTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: + compile_version = paddle_infer.get_trt_compile_version() + runtime_version = paddle_infer.get_trt_runtime_version() + if ( + compile_version[0] * 1000 + + compile_version[1] * 100 + + compile_version[2] * 10 + < 8200 + ): + return False + if ( + runtime_version[0] * 1000 + + runtime_version[1] * 100 + + runtime_version[2] * 10 + < 8200 + ): + return False return True def sample_program_configs(self): def generate_input1(attrs): T = attrs[0]["seg_num"] - return np.random.rand(3 * T, 10, 64, 64).astype(np.float32) + shape = [2 * T, 10, 64, 64] + return np.random.uniform(low=0.1, high=1.0, size=shape).astype( + np.float32 + ) for shift_value in [0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.49]: for T in range(2, 5): @@ -71,11 +90,16 @@ def sample_predictor_configs( self, program_config ) -> (paddle_infer.Config, List[int], float): def generate_dynamic_shape(attrs): - self.dynamic_shape.min_input_shape = {"input_data": [6, 10, 64, 64]} + t = attrs[0]['seg_num'] + self.dynamic_shape.min_input_shape = { + "input_data": [2 * t, 10, 64, 64] + } self.dynamic_shape.max_input_shape = { - "input_data": [20, 10, 64, 64] + "input_data": [5 * t, 10, 64, 64] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [3 * t, 10, 64, 64] } - self.dynamic_shape.opt_input_shape = {"input_data": [6, 10, 64, 64]} def clear_dynamic_shape(): self.dynamic_shape.max_input_shape = {} @@ -83,6 +107,14 @@ def clear_dynamic_shape(): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, is_dynamic_shape): + valid_version = (8, 2, 0) + compile_version = paddle_infer.get_trt_compile_version() + runtime_version = paddle_infer.get_trt_runtime_version() + self.assertTrue(compile_version == runtime_version) + if compile_version < valid_version: + return 0, 3 + if is_dynamic_shape: + return 1, 2 return 0, 3 attrs = [ From 90973fc30cfee678d2946307caaaf78c80606379 Mon Sep 17 00:00:00 2001 From: andsonder Date: Sun, 12 Mar 2023 11:15:11 +0800 Subject: [PATCH 15/17] remove std::cout --- .../tensorrt/convert/temporal_shift_op.cc | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index eb5ed05c57a33b..157d4a8580c8ae 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -66,8 +66,6 @@ class TemporalShiftOpConverter : public OpConverter { const int C = input_dims.d[1]; const int H = input_dims.d[2]; const int W = input_dims.d[3]; - std::cout << "C: " << C << " H: " << H << " W: " << W - << "shift_ratio: " << shift_ratio << " T: " << T << std::endl; // Reshape input to [N,T,C,H,W] auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); @@ -129,7 +127,7 @@ class TemporalShiftOpConverter : public OpConverter { #else slice_layer->setMode(nvinfer1::SliceMode::kFILL); #endif - + // Slice Padded Tensor const int slice_c = static_cast(C * shift_ratio); const int slice_c2 = static_cast(C * shift_ratio * 2); @@ -139,7 +137,7 @@ class TemporalShiftOpConverter : public OpConverter { Add1DConstantLayer(std::vector{0, 2, slice_c, 0, 0}); nvinfer1::ITensor* slice_start3 = Add1DConstantLayer(std::vector{0, 1, slice_c2, 0, 0}); - + nvinfer1::ITensor* slice_size_base = Shape(input); nvinfer1::ITensor* sub_size1 = Add1DConstantLayer(std::vector{0, 0, C - slice_c, 0, 0}); @@ -170,17 +168,17 @@ class TemporalShiftOpConverter : public OpConverter { *sub_size3, nvinfer1::ElementWiseOperation::kSUB) ->getOutput(0); - + auto* slice1_layer = TRT_ENGINE_ADD_LAYER( engine_, Slice, *slice_layer->getOutput(0), dummy, dummy, stride); slice1_layer->setInput(1, *slice_start1); slice1_layer->setInput(2, *slice_size1); - + auto* slice2_layer = TRT_ENGINE_ADD_LAYER( engine_, Slice, *slice_layer->getOutput(0), dummy, dummy, stride); slice2_layer->setInput(1, *slice_start2); slice2_layer->setInput(2, *slice_size2); - + auto* slice3_layer = TRT_ENGINE_ADD_LAYER( engine_, Slice, *slice_layer->getOutput(0), dummy, dummy, stride); slice3_layer->setInput(1, *slice_start3); @@ -202,15 +200,15 @@ class TemporalShiftOpConverter : public OpConverter { TRT_ENGINE_ADD_LAYER(engine_, Concatenation, concat_inputs, 3); concat_layer->setAxis(2); } - + // Reshape output to [N*T,C,H,W] auto* reshape_layer3 = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *concat_layer->getOutput(0)); reshape_layer3->setReshapeDimensions(input_dims); - + // Set output auto output_name = op_desc.Output("Out")[0]; - + if (data_format == "NHWC") { // Transpose output to [N*T,C,H,W] -> [N*T,H,W,C] auto transpose_layer2 = From 7b9ae3fa716c64fbd39a5d6d207ed091df486463 Mon Sep 17 00:00:00 2001 From: andsonder Date: Mon, 13 Mar 2023 08:37:05 +0800 Subject: [PATCH 16/17] delete compile version judge in test file --- .../inference/test_trt_convert_temporal_shift.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py index e59135b424296b..b0b2ce5106213c 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_temporal_shift.py @@ -25,22 +25,6 @@ class TrtConvertTemporalShiftTest(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: - compile_version = paddle_infer.get_trt_compile_version() - runtime_version = paddle_infer.get_trt_runtime_version() - if ( - compile_version[0] * 1000 - + compile_version[1] * 100 - + compile_version[2] * 10 - < 8200 - ): - return False - if ( - runtime_version[0] * 1000 - + runtime_version[1] * 100 - + runtime_version[2] * 10 - < 8200 - ): - return False return True def sample_program_configs(self): From 27d43f830d7484340699a067849f08e30cb93bf3 Mon Sep 17 00:00:00 2001 From: andsonder Date: Mon, 13 Mar 2023 11:14:54 +0800 Subject: [PATCH 17/17] remove useless codes --- .../tensorrt/convert/temporal_shift_op.cc | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc index 157d4a8580c8ae..03983ff3930336 100644 --- a/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/temporal_shift_op.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,16 +14,6 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -namespace paddle { -namespace framework { -class Scope; - -namespace proto { -class OpDesc; -} // namespace proto -} // namespace framework -} // namespace paddle - namespace paddle { namespace inference { namespace tensorrt { @@ -38,7 +28,7 @@ class TemporalShiftOpConverter : public OpConverter { bool test_mode) override { #if IS_TRT_VERSION_GE(8200) - VLOG(3) << "convert a fluid temporal shift op to tensorrt temporal layer"; + VLOG(3) << "convert a temporal shift op to tensorrt temporal layer"; framework::OpDesc op_desc(op, nullptr); // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]);