-
Notifications
You must be signed in to change notification settings - Fork 5.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Hackathon NO.71] 为 Paddle-TRT 添加 pad3d 算子 #50986
Changes from 17 commits
5e8a79e
5d6bf44
443f608
0c92e0e
66a3c42
933e021
ed83ca0
01b6b4c
f1563b6
0deb315
c246abb
89f0957
90713f3
8a72aff
924ce3f
215472c
dfcd101
eac5f71
5e50733
ac1918d
40afeb7
dcf4210
b448e45
13a1c82
a3106bd
46a6218
458f538
c90c556
af0d6f3
61363ee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ list( | |
concat_op.cc | ||
dropout_op.cc | ||
group_norm_op.cc | ||
pad3d_op.cc | ||
pad_op.cc | ||
split_op.cc | ||
square_op.cc | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,195 @@ | ||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. */ | ||
|
||
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" | ||
|
||
namespace paddle { | ||
namespace framework { | ||
class Scope; | ||
|
||
namespace proto { | ||
class OpDesc; | ||
} // namespace proto | ||
} // namespace framework | ||
} // namespace paddle | ||
|
||
namespace paddle { | ||
namespace inference { | ||
namespace tensorrt { | ||
|
||
/* | ||
* Pad3dOp. | ||
*/ | ||
class Pad3dOpConverter : public OpConverter { | ||
public: | ||
void operator()(const framework::proto::OpDesc& op, | ||
const framework::Scope& scope, | ||
bool test_mode) override { | ||
#if IS_TRT_VERSION_GE(8200) | ||
VLOG(3) << "convert a fluid transpose op to tensorrt tranpose layer"; | ||
|
||
framework::OpDesc op_desc(op, nullptr); | ||
|
||
// Declare inputs | ||
auto* input = engine_->GetITensor(op_desc.Input("X")[0]); | ||
|
||
std::vector<int> paddings; | ||
if (op_desc.HasInput("Paddings")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 需要同时满足 op_desc.Input("Paddings").size() > 0,否则会core dump There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
auto* paddings_v = scope.FindVar(op_desc.Input("Paddings")[0]); | ||
auto* padding_t = paddings_v->GetMutable<phi::DenseTensor>(); | ||
phi::DenseTensor paddings_tensor; | ||
paddings_tensor.Resize(padding_t->dims()); | ||
platform::CPUPlace cpu_place; | ||
paddle::framework::TensorCopySync( | ||
(*padding_t), cpu_place, &paddings_tensor); | ||
auto* paddings_data = | ||
paddings_tensor.mutable_data<int>(platform::CPUPlace()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 有一个GetITensor的函数可以利用 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里写法是有问题的,动态shape下Paddings为tensor输入,这里是获取不到数据的,全部都需要使用Tensor形式操作 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里应该是可以获取到的,我参考了 batch norm 那个op的写法,数据是从 |
||
paddings = std::vector<int>(paddings_data, | ||
paddings_data + paddings_tensor.numel()); | ||
|
||
} else { | ||
paddings = | ||
PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("paddings")); | ||
} | ||
|
||
float value{0.F}; | ||
if (op_desc.HasAttr("value")) { | ||
value = PADDLE_GET_CONST(float, op_desc.GetAttr("value")); | ||
} | ||
|
||
std::string padding_mode = "constant"; | ||
if (op_desc.HasAttr("mode")) { | ||
padding_mode = PADDLE_GET_CONST(std::string, op_desc.GetAttr("mode")); | ||
} | ||
|
||
const int inputDim = input->getDimensions().nbDims; | ||
const int pad_size = paddings.size(); | ||
PADDLE_ENFORCE_EQ(inputDim * 2 - 4, | ||
pad_size, | ||
phi::errors::InvalidArgument( | ||
"Expected paddings size is %d, but received %d.", | ||
inputDim * 2 - 4, | ||
pad_size)); | ||
|
||
// convert paddle pad to tensorrt pad | ||
std::vector<int> pre_pad_v(inputDim, 0); | ||
std::vector<int> post_pad_v(inputDim, 0); | ||
|
||
for (int i = 0; i < inputDim; i += 2) { | ||
pre_pad_v[i + 2] = paddings[pad_size - 2 - i]; | ||
post_pad_v[i + 2] = paddings[pad_size - 1 - i]; | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 同上面提到问题,paddings不一定能获取到 |
||
|
||
nvinfer1::ITensor* pre_pad = Add1DConstantLayer(pre_pad_v); | ||
nvinfer1::ITensor* post_pad = Add1DConstantLayer(post_pad_v); | ||
|
||
std::vector<int> zeros_v(inputDim, 0); | ||
auto const zeros = Add1DConstantLayer(zeros_v); | ||
|
||
nvinfer1::ITensor* start{}; | ||
nvinfer1::ITensor* size{}; | ||
// elementwise add zeros and pre_pad | ||
start = TRT_ENGINE_ADD_LAYER(engine_, | ||
ElementWise, | ||
*zeros, | ||
*pre_pad, | ||
nvinfer1::ElementWiseOperation::kSUB) | ||
->getOutput(0); | ||
|
||
auto const total_padding = | ||
TRT_ENGINE_ADD_LAYER(engine_, | ||
ElementWise, | ||
*pre_pad, | ||
*post_pad, | ||
nvinfer1::ElementWiseOperation::kSUM) | ||
->getOutput(0); | ||
|
||
std::vector<int> input_shape_v(inputDim, 0); | ||
for (int i = 0; i < inputDim; i++) { | ||
input_shape_v[i] = input->getDimensions().d[i]; | ||
} | ||
auto const input_shape = Add1DConstantLayer(input_shape_v); | ||
|
||
size = TRT_ENGINE_ADD_LAYER(engine_, | ||
ElementWise, | ||
*input_shape, | ||
*total_padding, | ||
nvinfer1::ElementWiseOperation::kSUM) | ||
->getOutput(0); | ||
|
||
// add slice layer | ||
nvinfer1::Dims stride; | ||
stride.nbDims = inputDim; | ||
std::fill_n(stride.d, inputDim, 1); | ||
auto const& dummy = stride; | ||
auto* slice_layer = | ||
TRT_ENGINE_ADD_LAYER(engine_, | ||
Slice, | ||
*const_cast<nvinfer1::ITensor*>(input), | ||
dummy, | ||
dummy, | ||
stride); | ||
slice_layer->setInput(1, *start); | ||
slice_layer->setInput(2, *size); | ||
if (padding_mode == "constant") { | ||
slice_layer->setMode(nvinfer1::SliceMode::kFILL); | ||
if (value != 0.F) { | ||
nvinfer1::ITensor* fill_value = nullptr; | ||
switch (input->getType()) { | ||
case nvinfer1::DataType::kFLOAT: | ||
case nvinfer1::DataType::kHALF: | ||
case nvinfer1::DataType::kINT8: { | ||
float* value_ptr = const_cast<float*>(&value); | ||
nvinfer1::Weights value_wt{nvinfer1::DataType::kFLOAT, | ||
static_cast<void*>(value_ptr), | ||
static_cast<int32_t>(1)}; | ||
nvinfer1::Dims dims; | ||
dims.nbDims = 0; | ||
fill_value = TRT_ENGINE_ADD_LAYER(engine_, Constant, dims, value_wt) | ||
->getOutput(0); | ||
} | ||
default: { | ||
int* value_ptr = const_cast<int*>(reinterpret_cast<int*>(&value)); | ||
nvinfer1::Weights value_wt{nvinfer1::DataType::kINT32, | ||
static_cast<void*>(value_ptr), | ||
static_cast<int32_t>(1)}; | ||
nvinfer1::Dims dims; | ||
dims.nbDims = 0; | ||
fill_value = TRT_ENGINE_ADD_LAYER(engine_, Constant, dims, value_wt) | ||
->getOutput(0); | ||
} | ||
} | ||
slice_layer->setInput(4, *fill_value); | ||
} | ||
} else if (padding_mode == "reflect") { | ||
slice_layer->setMode(nvinfer1::SliceMode::kREFLECT); | ||
} else if (padding_mode == "replicate") { | ||
slice_layer->setMode(nvinfer1::SliceMode::kCLAMP); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这是TRT 8.2后添加的,这里Converter实现可以用宏定义包起来,参考one_hot实现。 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 已添加trt限制 |
||
} else { | ||
PADDLE_THROW("Unsupported mode: %s", padding_mode); | ||
} | ||
|
||
auto output_name = op_desc.Output("Out")[0]; | ||
RreplenishLayerAndOutput(slice_layer, "pad3d", {output_name}, test_mode); | ||
#else | ||
VLOG(3) << "pad3d is not supported when TensorRT < 8.2"; | ||
#endif | ||
} | ||
}; | ||
|
||
} // namespace tensorrt | ||
} // namespace inference | ||
} // namespace paddle | ||
|
||
REGISTER_TRT_OP_CONVERTER(pad3d, Pad3dOpConverter); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1724,6 +1724,31 @@ struct SimpleOpTypeSetTeller : public Teller { | |
} | ||
} | ||
|
||
if (op_type == "pad3d") { | ||
#if !IS_TRT_VERSION_GE(8200) | ||
VLOG(3) << "pad3d is not supported when TensorRT < 8.2"; | ||
return false; | ||
#endif | ||
if (!desc.HasAttr("paddings") && !desc.HasInput("Paddings")) { | ||
return false; | ||
} | ||
if (desc.HasAttr("mode")) { | ||
std::string mode = PADDLE_GET_CONST(std::string, desc.GetAttr("mode")); | ||
if (mode != "constant" || (mode != "reflect" && mode != "replicate")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个写法有问题, 应该是“如果全部都不等于” |
||
VLOG(3) << "The pad3d layer of TRT only support " | ||
"constant/reflect/replicate mode."; | ||
return false; | ||
} | ||
} | ||
if (desc.HasAttr("data_format")) { | ||
std::string data_format = | ||
PADDLE_GET_CONST(std::string, desc.GetAttr("data_format")); | ||
if (data_format != "NCDHW") { | ||
VLOG(3) << "The pad3d layer of TRT only support NCDHW data format."; | ||
return false; | ||
} | ||
} | ||
} | ||
if (op_type == "swish") { | ||
auto* block = desc.Block(); | ||
if (block == nullptr) { | ||
|
@@ -1740,7 +1765,6 @@ struct SimpleOpTypeSetTeller : public Teller { | |
return false; | ||
} | ||
} | ||
|
||
if (op_type == "prelu") { | ||
if (desc.Input("X").size() != 1) { | ||
VLOG(3) << "Invalid input X's size of prelu TRT converter. " | ||
|
@@ -2634,6 +2658,7 @@ struct SimpleOpTypeSetTeller : public Teller { | |
"batch_norm", | ||
"concat", | ||
"tanh", | ||
"pad3d", | ||
"pad", | ||
"elementwise_add", | ||
"elementwise_sub", | ||
|
@@ -2787,6 +2812,7 @@ struct SimpleOpTypeSetTeller : public Teller { | |
"batch_norm", | ||
"concat", | ||
"tanh", | ||
"pad3d", | ||
"pad", | ||
"elementwise_add", | ||
"elementwise_sub", | ||
|
@@ -2910,14 +2936,6 @@ struct GenericPluginTeller : public Teller { | |
if (!desc.HasAttr("iou_aware") && !desc.HasAttr("iou_aware_factor")) | ||
return false; | ||
} | ||
if (op_type == "pad3d") { | ||
auto pad3d_inputs = desc.Inputs(); | ||
if (pad3d_inputs.find("Paddings") != pad3d_inputs.end()) { | ||
if (desc.Input("Paddings").size() >= 1) { | ||
return false; | ||
} | ||
} | ||
} | ||
if (use_no_calib_int8) { | ||
return false; | ||
} else { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
顺便改成2023吧
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done