Skip to content

Commit

Permalink
[Paddle-TRT][cherry pick] Slice to 2.3 (#44757)
Browse files Browse the repository at this point in the history
* slice_to_2.3
  • Loading branch information
zhoutianzi666 authored Aug 4, 2022
1 parent 7cdce09 commit 245005d
Show file tree
Hide file tree
Showing 6 changed files with 494 additions and 252 deletions.
203 changes: 143 additions & 60 deletions paddle/fluid/inference/tensorrt/convert/fc_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,51 +34,97 @@ namespace tensorrt {
class FcOpConverter : public OpConverter {
public:
nvinfer1::ILayer* reshape_before_fc(nvinfer1::ITensor* before_fc,
nvinfer1::Dims x_dim, int x_num_col_dims,
nvinfer1::Dims x_dim,
int x_num_col_dims,
std::string output_name) {
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
reshape_before_fc_dim.nbDims = x_num_col_dims + 3;
// padding shape "* x q x 1 x 1"
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 1;
}
for (int i = 0; i < x_dim.nbDims; i++) {
if (i < x_num_col_dims) {
reshape_before_fc_dim.d[i] = 0;
} else {
if (x_dim.d[i] < 0) {
reshape_before_fc_dim.d[x_num_col_dims] = -1;
break;

nvinfer1::ITensor* filal_reshape_before_fc_shape_tensor = nullptr;

if (!engine_->with_dynamic_shape()) {
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 1;
}
for (int i = 0; i < x_dim.nbDims; i++) {
if (i < x_num_col_dims) {
reshape_before_fc_dim.d[i] = 0;
} else {
reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i];
}
}
} else {
std::vector<nvinfer1::ITensor*> reshape_before_fc_shape_tensor;
nvinfer1::ITensor* input_shape_tensor = Shape(before_fc);

for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
reshape_before_fc_shape_tensor.push_back(Add1DConstantLayer(1));
}
for (int i = 0; i < x_dim.nbDims; i++) {
if (i < x_num_col_dims) {
reshape_before_fc_shape_tensor[i] =
GetEleTensorOfShape(input_shape_tensor, i);
} else {
reshape_before_fc_shape_tensor[x_num_col_dims] =
Prod(GetEleTensorOfShape(input_shape_tensor, i),
reshape_before_fc_shape_tensor[x_num_col_dims]);
// If not set, test_trt_matmul_quant_dequant in trt 6015 will fail
reshape_before_fc_shape_tensor[x_num_col_dims]->setType(
nvinfer1::DataType::kINT32);
}
reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i];
}
filal_reshape_before_fc_shape_tensor =
Concat(reshape_before_fc_shape_tensor);
}

auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *before_fc);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
if (!engine_->with_dynamic_shape()) {
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
} else {
reshape_before_fc_layer->setInput(1,
*filal_reshape_before_fc_shape_tensor);
}
reshape_before_fc_layer->setName(
("fc_op_reshape_before_fc: Shuffle (Output: " + output_name + ")")
.c_str());
return reshape_before_fc_layer;
}

nvinfer1::ILayer* reshape_after_fc(nvinfer1::ITensor* after_fc,
nvinfer1::Dims x_dim, int x_num_col_dims) {
nvinfer1::Dims x_dim,
int x_num_col_dims) {
// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;

nvinfer1::ITensor* filal_reshape_after_fc_shape_tensor = nullptr;
if (!engine_->with_dynamic_shape()) {
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;
}
} else {
std::vector<int> gather_indices(x_num_col_dims + 1);
std::iota(gather_indices.begin(), gather_indices.end(), 0);
filal_reshape_after_fc_shape_tensor =
Gather(Shape(after_fc), gather_indices);
}

auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *after_fc);
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
if (!engine_->with_dynamic_shape()) {
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
} else {
reshape_after_fc_layer->setInput(1, *filal_reshape_after_fc_shape_tensor);
}
return reshape_after_fc_layer;
}

void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a fluid fc op to tensorrt fc layer without bias";
framework::OpDesc op_desc(op, nullptr);
auto output_name = op_desc.Output("Out").front();
Expand All @@ -96,8 +142,9 @@ class FcOpConverter : public OpConverter {
// Declare weights
auto* Y_v = scope.FindVar(op_desc.Input(w_name).front());
PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound(
"Can not find %s presistale var of fc in scope.", w_name));
Y_v,
platform::errors::NotFound(
"Can not find %s presistale var of fc in scope.", w_name));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
int x_num_col_dims =
op_desc.HasAttr("x_num_col_dims")
Expand Down Expand Up @@ -128,7 +175,8 @@ class FcOpConverter : public OpConverter {
}
weight_data = engine_->GetWeightCPUData(op_desc.Input(w_name).front(), Y_t);

PADDLE_ENFORCE_EQ(Y_t->dims().size(), 2UL,
PADDLE_ENFORCE_EQ(Y_t->dims().size(),
2UL,
platform::errors::InvalidArgument(
"The fc's weight should be a matrix with 2 dims, but "
"it's %d-dimensional.",
Expand All @@ -143,25 +191,31 @@ class FcOpConverter : public OpConverter {
}
};

auto regist_fc = [&](nvinfer1::ITensor* inputs, int n_output,
auto regist_fc = [&](nvinfer1::ITensor* inputs,
int n_output,
TensorRTEngine::Weight& weight,
TensorRTEngine::Weight& bias) {
if (enable_int8 || support_int8) {
// add conv layer
float out_scale = 0;
if (enable_int8) {
PADDLE_ENFORCE_EQ(
op_desc.HasAttr("out_threshold"), true,
op_desc.HasAttr("out_threshold"),
true,
platform::errors::InvalidArgument(
"must have out threshold in fc layers in int8 mode"));
out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
} else {
out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("Out"));
}
nvinfer1::DimsHW nv_ksize(1, 1);
auto* fc_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
nv_ksize, weight.get(), bias.get());
auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_,
Convolution,
*inputs,
n_output,
nv_ksize,
weight.get(),
bias.get());
fc_layer_int8->setName(
("fc_op_int8_conv1x1: Convolution (Output: " + output_name + ")")
.c_str());
Expand All @@ -174,21 +228,29 @@ class FcOpConverter : public OpConverter {
.c_str());
engine_->SetTensorDynamicRange(fc_after_reshape_int8->getOutput(0),
out_scale);
nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_after_reshape_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8, "relu_after_fc_shuffle",
{output_name}, test_mode);
nvinfer1::IActivationLayer* relu_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_after_reshape_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8,
"relu_after_fc_shuffle",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_after_reshape_int8,
"fc_op_int8_reshape_after_fc: Shuffle",
{output_name}, test_mode);
{output_name},
test_mode);
}
} else {
// add fc layer
auto* fc_layer_float =
TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs, n_output,
weight.get(), bias.get());
auto* fc_layer_float = TRT_ENGINE_ADD_LAYER(engine_,
FullyConnected,
*inputs,
n_output,
weight.get(),
bias.get());
fc_layer_float->setName(
("fc_op_float: FullyConnected (Output: " + output_name + ")")
.c_str());
Expand All @@ -198,14 +260,20 @@ class FcOpConverter : public OpConverter {
fc_after_reshape_float->setName(
("float_reshape_after_fc: Shuffle (Output: " + output_name + ")")
.c_str());
nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_after_reshape_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_float, "relu_after_fc_shuffle",
{output_name}, test_mode);
nvinfer1::IActivationLayer* relu_layer_float =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_after_reshape_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_float,
"relu_after_fc_shuffle",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_after_reshape_float, "shuffle_after_fc",
{output_name}, test_mode);
RreplenishLayerAndOutput(fc_after_reshape_float,
"shuffle_after_fc",
{output_name},
test_mode);
}
}
};
Expand Down Expand Up @@ -255,15 +323,20 @@ class FcOpConverter : public OpConverter {
if (enable_int8 || support_int8) {
// add conv1x1 layer
nvinfer1::DimsHW nv_ksize(1, 1);
auto* fc_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *X, n_output, nv_ksize,
weight.get(), bias.get());
auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_,
Convolution,
*X,
n_output,
nv_ksize,
weight.get(),
bias.get());
if (activation_type == "relu") {
fc_layer_int8->setName(
("ernie_fc_op_int8: Convolution (Output: " + output_name + ")")
.c_str());
PADDLE_ENFORCE_EQ(
op_desc.HasAttr("out_threshold"), true,
op_desc.HasAttr("out_threshold"),
true,
platform::errors::InvalidArgument(
"must have out threshold in fc layers in int8 mode"));
float out_scale = 0;
Expand All @@ -275,15 +348,20 @@ class FcOpConverter : public OpConverter {
}
engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0),
out_scale);
nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_layer_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8, "relu_after_ernie_fc_int8",
{output_name}, test_mode);
nvinfer1::IActivationLayer* relu_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_layer_int8->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_int8,
"relu_after_ernie_fc_int8",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_layer_int8,
"ernie_fc_op_int8: Convolution",
{output_name}, test_mode);
{output_name},
test_mode);
}
} else {
// add fc layer
Expand All @@ -292,25 +370,30 @@ class FcOpConverter : public OpConverter {
if (activation_type == "relu") {
fc_layer_float->setName(
("ernie_fc_op_float: (Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_layer_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
nvinfer1::IActivationLayer* relu_layer_float =
TRT_ENGINE_ADD_LAYER(engine_,
Activation,
*(fc_layer_float->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer_float,
"relu_after_ernie_fc_float", {output_name},
"relu_after_ernie_fc_float",
{output_name},
test_mode);
} else {
RreplenishLayerAndOutput(fc_layer_float, "ernie_fc_op_float",
{output_name}, test_mode);
RreplenishLayerAndOutput(
fc_layer_float, "ernie_fc_op_float", {output_name}, test_mode);
}
}
} else { // need reshape input before and after fc
PADDLE_ENFORCE_GT(
x_dim.nbDims, x_num_col_dims,
x_dim.nbDims,
x_num_col_dims,
platform::errors::InvalidArgument(
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims > x_num_col_dims, but "
"x_dim.nbDims : %d, x_num_col_dims : %d.",
x_dim.nbDims, x_num_col_dims));
x_dim.nbDims,
x_num_col_dims));
auto* reshape_before_fc_layer =
reshape_before_fc(X, x_dim, x_num_col_dims, output_name);
auto* reshape_itensor = reshape_before_fc_layer->getOutput(0);
Expand Down
Loading

0 comments on commit 245005d

Please sign in to comment.