Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preln fix #49802

Merged
merged 9 commits into from
Feb 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 40 additions & 11 deletions paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,24 @@ void PrelnResidualBias::operator()(PDNode *x, PDNode *y) {

} // namespace patterns

void setIntermediateOut(OpDesc *desc,
const std::string &out_name,
const std::string &scope_name) {
std::string new_name = scope_name + "/at." + out_name + ".new";
desc->SetOutput(out_name, {new_name});
}

void addIntermediateOut(Node *op_node,
const std::string &out_name,
const std::string &scope_name,
Graph *graph) {
std::string new_name = scope_name + "/at." + out_name + ".new";
VarDesc out_var(new_name);
out_var.SetPersistable(false);
auto *node_var = graph->CreateVarNode(&out_var);
IR_NODE_LINK_TO(op_node, node_var);
}

int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
bool with_bias) const {
PADDLE_ENFORCE_NOT_NULL(
Expand Down Expand Up @@ -207,7 +225,7 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
// on each other, so we make below check to ensure only one
// PrelnResidualBias pattern is delalted with.
for (auto op : elementwise1_out->inputs) {
if (op->Name() == "preln_residual_bias") return;
if (op->Name() == "fused_bias_dropout_residual_layer_norm") return;
}

if (!IsCompat(subgraph, graph)) {
Expand All @@ -218,31 +236,37 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
std::unordered_set<const Node *> del_node_set;
// Create an PrelnResidualBias op node
OpDesc new_desc;
new_desc.SetType("preln_residual_bias");
new_desc.SetType("fused_bias_dropout_residual_layer_norm");
// inputs
new_desc.SetInput("X", {subgraph.at(x)->Name()});
new_desc.SetInput("Y", {subgraph.at(y)->Name()});
new_desc.SetInput("Scale", {layer_norm_scale->Name()});
new_desc.SetInput("Bias", {layer_norm_bias->Name()});
new_desc.SetInput("Residual", {subgraph.at(y)->Name()});
new_desc.SetInput("LnScale", {layer_norm_scale->Name()});
new_desc.SetInput("LnBias", {layer_norm_bias->Name()});
if (with_bias) {
new_desc.SetInput("EleBias", {elementwise_bias->Name()});
new_desc.SetInput("Bias", {elementwise_bias->Name()});
}
// outputs
new_desc.SetOutput("Out_0", {layer_norm_out->Name()});
new_desc.SetOutput("Out_1", {elementwise1_out->Name()});
new_desc.SetOutput("Y", {layer_norm_out->Name()});
new_desc.SetOutput("BiasDropoutResidualOut", {elementwise1_out->Name()});
new_desc.SetOutput("LnMean", {layer_norm_mean->Name()});
new_desc.SetOutput("LnVariance", {layer_norm_variance->Name()});
setIntermediateOut(&new_desc, "DropoutMaskOut", "preln_residual_bias_fuse");
// attrs
new_desc.SetAttr("epsilon", layer_norm->Op()->GetAttr("epsilon"));
new_desc.SetAttr("ln_epsilon", layer_norm->Op()->GetAttr("epsilon"));
new_desc.SetAttr("dropout_rate", 0.0f);
new_desc.SetAttr("is_test", true);
new_desc.SetAttr("begin_norm_axis",
layer_norm->Op()->GetAttr("begin_norm_axis"));
auto fused_node = graph->CreateOpNode(&new_desc); // OpDesc will be copied.
addIntermediateOut(
fused_node, "DropoutMaskOut", "preln_residual_bias_fuse", graph);

if (with_bias) {
del_node_set.insert(elementwise0);
del_node_set.insert(elementwise0_out);
}
del_node_set.insert(elementwise1);
del_node_set.insert(layer_norm);
del_node_set.insert(layer_norm_mean);
del_node_set.insert(layer_norm_variance);
GraphSafeRemoveNodes(graph, del_node_set);
IR_NODE_LINK_TO(subgraph.at(x), fused_node);
IR_NODE_LINK_TO(subgraph.at(y), fused_node);
Expand All @@ -253,6 +277,9 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
IR_NODE_LINK_TO(layer_norm_bias, fused_node);
IR_NODE_LINK_TO(fused_node, layer_norm_out);
IR_NODE_LINK_TO(fused_node, elementwise1_out);
IR_NODE_LINK_TO(fused_node, layer_norm_mean);
IR_NODE_LINK_TO(fused_node, layer_norm_variance);

found_subgraph_count++;
};

Expand All @@ -261,6 +288,8 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph,
}

void PrelnResidualBiasFusePass::ApplyImpl(ir::Graph *graph) const {
VLOG(1) << "Fuse PrelnResidualBias into "
"fused_bias_dropout_residual_layer_norm op with dropout rate = 0";
PADDLE_ENFORCE_NOT_NULL(
graph, platform::errors::PreconditionNotMet("graph should not be null."));
FusePassBase::Init("preln_residual_bias_fuse", graph);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ void TrtSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const {
// attrs
new_desc.SetAttr("epsilon", layer_norm->Op()->GetAttr("epsilon"));

if (new_desc.HasAttr("begin_norm_axis")) {
if (layer_norm->Op()->HasAttr("begin_norm_axis")) {
int32_t begin_norm_axis = PADDLE_GET_CONST(
int32_t, layer_norm->Op()->GetAttr("begin_norm_axis"));
int32_t input_rank =
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2386,7 +2386,7 @@ USE_TRT_CONVERTER(rsqrt);
USE_TRT_CONVERTER(fused_preln_embedding_eltwise_layernorm)
USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm);
USE_TRT_CONVERTER(preln_skip_layernorm)
USE_TRT_CONVERTER(preln_residual_bias)
USE_TRT_CONVERTER(fused_bias_dropout_residual_layer_norm)
USE_TRT_CONVERTER(c_allreduce_sum)
USE_TRT_CONVERTER(roll)
USE_TRT_CONVERTER(strided_slice)
Expand Down
27 changes: 12 additions & 15 deletions paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,12 @@ class PrelnResidualBiasOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(4) << "convert fused preln_residual_bias op to tensorrt layer";
if (!engine_->with_dynamic_shape()) {
PADDLE_THROW(
platform::errors::Fatal("Unsupported static graph mode. Please set "
"dynamic shape of inputs."));
}
VLOG(4) << "convert fused_bias_dropout_residual_layer_norm op with "
"drop_rate = 0 to preln_residual_bias tensorrt layer";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]);
auto* input2 = engine_->GetITensor(op_desc.Input("Residual")[0]);
std::vector<nvinfer1::ITensor*> inputs;
inputs.push_back(input1);
inputs.push_back(input2);
Expand All @@ -50,18 +46,18 @@ class PrelnResidualBiasOpConverter : public OpConverter {
return temp_data;
};
framework::DDim bias_dims, scale_dims, ele_bias_dims;
auto* bias = get_persistable_data("Bias", &bias_dims);
auto* scale = get_persistable_data("Scale", &scale_dims);
auto* bias = get_persistable_data("LnBias", &bias_dims);
auto* scale = get_persistable_data("LnScale", &scale_dims);
auto const& vars = op_desc.Inputs(false);
bool has_bias = vars.find("EleBias") != vars.end();
bool has_bias = vars.find("Bias") != vars.end();
float* ele_bias =
has_bias ? get_persistable_data("EleBias", &ele_bias_dims) : nullptr;
has_bias ? get_persistable_data("Bias", &ele_bias_dims) : nullptr;

int bias_size = phi::product(bias_dims);

int scale_size = phi::product(scale_dims);
int ele_bias_size = has_bias ? phi::product(ele_bias_dims) : 0;
float epsilon = PADDLE_GET_CONST(float, op_desc.GetAttr("epsilon"));
float epsilon = PADDLE_GET_CONST(float, op_desc.GetAttr("ln_epsilon"));
bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
if (engine_->precision() == AnalysisConfig::Precision::kInt8) {
with_fp16 = true;
Expand Down Expand Up @@ -102,8 +98,8 @@ class PrelnResidualBiasOpConverter : public OpConverter {
plugin_inputs.emplace_back(input2);
layer = engine_->AddDynamicPlugin(plugin_inputs.data(), 2, plugin);
std::vector<std::string> output_names;
output_names.push_back(op_desc.Output("Out_0")[0]);
output_names.push_back(op_desc.Output("Out_1")[0]);
output_names.push_back(op_desc.Output("Y")[0]);
output_names.push_back(op_desc.Output("BiasDropoutResidualOut")[0]);
RreplenishLayerAndOutput(
layer, "preln_residual_bias", output_names, test_mode);
}
Expand All @@ -113,4 +109,5 @@ class PrelnResidualBiasOpConverter : public OpConverter {
} // namespace inference
} // namespace paddle

REGISTER_TRT_OP_CONVERTER(preln_residual_bias, PrelnResidualBiasOpConverter);
REGISTER_TRT_OP_CONVERTER(fused_bias_dropout_residual_layer_norm,
PrelnResidualBiasOpConverter);
20 changes: 17 additions & 3 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1451,7 +1451,21 @@ struct SimpleOpTypeSetTeller : public Teller {
return false;
}
}

if (op_type == "fused_bias_dropout_residual_layer_norm") {
if (!with_dynamic_shape) {
VLOG(3) << "fused_bias_dropout_residual_layer_norm should run on "
"dynamic shape mode.";
return false;
}
float dropout_rate =
PADDLE_GET_CONST(float, desc.GetAttr("dropout_rate"));
if (dropout_rate != 0.0f) {
VLOG(4) << "preln_residual_bias trt layer can not work with "
"fused_bias_dropout_residual_layer_norm op in which the "
"dropout_rate != 0, stop convert";
return false;
}
}
if (op_type == "fused_preln_embedding_eltwise_layernorm") {
if (!with_dynamic_shape) {
VLOG(3) << "fused_preln_embedding_eltwise_layernorm should run on "
Expand Down Expand Up @@ -2535,7 +2549,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"slice",
"strided_slice",
"fused_preln_embedding_eltwise_layernorm",
"preln_residual_bias",
"fused_bias_dropout_residual_layer_norm",
"c_allreduce_sum",
"c_allreduce_min",
"c_allreduce_max",
Expand Down Expand Up @@ -2683,7 +2697,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"strided_slice",
"fused_preln_embedding_eltwise_layernorm",
"preln_skip_layernorm",
"preln_residual_bias",
"fused_bias_dropout_residual_layer_norm",
"c_allreduce_sum",
"c_allreduce_min",
"c_allreduce_max",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,17 @@ class FusedBiasDropoutResidualLnOp : public framework::OperatorWithKernel {
"Output",
"LnVariance",
"FusedBiasDropoutResidualLnOp");
OP_INOUT_CHECK(ctx->HasOutput("BiasDropoutResidualOut"),
"Output",
"BiasDropoutResidualOut",
"FusedBiasDropoutResidualLnOp");
OP_INOUT_CHECK(ctx->HasOutput("DropoutMaskOut"),
"Output",
"DropoutMaskOut",
"FusedBiasDropoutResidualLnOp");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个不用删

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已恢复DropoutMaskOut的输出检查

OP_INOUT_CHECK(ctx->HasOutput("BiasDropoutResidualOut"),
"Output",
"BiasDropoutResidualOut",
"FusedBiasDropoutResidualLnOp");
OP_INOUT_CHECK(
ctx->HasOutput("Y"), "Output", "Y", "FusedBiasDropoutResidualLnOp");

auto x_dim = ctx->GetInputDim("X");
int left = 1;
for (int i = 0; i < x_dim.size() - 1; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,12 @@ class FusedBiasDropoutResidualLnOpKernel : public framework::OpKernel<T> {
auto *ln_mean_data =
dev_ctx.Alloc<U>(ln_mean, ln_mean->numel() * sizeof(U));
auto *ln_var_data = dev_ctx.Alloc<U>(ln_var, ln_var->numel() * sizeof(U));
auto *dropout_mask_out_data = dev_ctx.Alloc<uint8_t>(
dropout_mask_out, dropout_mask_out->numel() * sizeof(uint8_t));
auto *dropout_mask_out_data =
(dropout_mask_out == nullptr)
? nullptr
: dev_ctx.Alloc<uint8_t>(
dropout_mask_out,
dropout_mask_out->numel() * sizeof(uint8_t));
auto *y_data = dev_ctx.Alloc<T>(y, y->numel() * sizeof(T));

const auto input_x_dims = input_x->dims();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -854,9 +854,10 @@ void LaunchLayernormResidualDropoutBias(
residual,
rows * cols * sizeof(T),
ctx.stream());
PADDLE_ENFORCE_GPU_SUCCESS(cudaMemsetAsync(
mask_data, 0, rows * cols * sizeof(MaskType), ctx.stream()));

if (mask_data != nullptr) {
PADDLE_ENFORCE_GPU_SUCCESS(cudaMemsetAsync(
mask_data, 0, rows * cols * sizeof(MaskType), ctx.stream()));
}
// call layernorm forward
switch (GetDesiredBlockDim(cols)) {
FIXED_BLOCK_DIM_CASE(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,6 @@ string(REPLACE ".py" "" TEST_TRT_CONVERTER "${TEST_TRT_CONVERTER}")

if(NOT WITH_DISTRIBUTE)
list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES "test_delete_c_identity_op_pass")
list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES
"test_trt_convert_preln_residual_bias")
list(REMOVE_ITEM TEST_TRT_IR_PASSES "test_trt_convert_preln_residual_bias")
list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_preln_residual_bias")
list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES
"test_trt_convert_preln_residual_no_bias")
list(REMOVE_ITEM TEST_TRT_IR_PASSES "test_trt_convert_preln_residual_no_bias")
list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_preln_residual_no_bias")

list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES "test_trt_convert_c_allreduce")
list(REMOVE_ITEM TEST_TRT_IR_PASSES "test_trt_convert_c_allreduce")
list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_c_allreduce")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,24 @@ def clear_dynamic_shape():
self.dynamic_shape.opt_input_shape = {}

def generate_trt_nodes_num(attrs, dynamic_shape):
return 1, 4
if dynamic_shape:
return 1, 4
else:
return 0, 5

attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
# for static_shape, fall back to fluid fused op
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False
), 1e-2 # atol=1e-2 while rtol is 1e-8
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False
), 1e-2 # atol=1e-2 while rtol is 1e-8

# just support dynamic_shape
generate_dynamic_shape(attrs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,26 @@ def clear_dynamic_shape():
self.dynamic_shape.opt_input_shape = {}

def generate_trt_nodes_num(attrs, dynamic_shape):
return 1, 4
if dynamic_shape:
return 1, 4
else:
return 0, 5

attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]

# for static_shape, fall back to fluid fused op
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False
), 1e-2 # atol=1e-2 while rtol is 1e-8
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, False
), 1e-2 # atol=1e-2 while rtol is 1e-8

# just support dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def setUp(self):

self.fetch_list = [out, elementwise_out]
self.pass_names = "preln_residual_bias_fuse_pass"
self.fused_op_type = "preln_residual_bias"
self.fused_op_type = "fused_bias_dropout_residual_layer_norm"
self.num_fused_ops = 1
# self.graph_attrs = {
# "embedding_eltwise_layernorm_fuse_pass_flag": True,
Expand Down Expand Up @@ -72,7 +72,7 @@ def setUp(self):

self.fetch_list = [out, elementwise_out]
self.pass_names = "preln_residual_bias_fuse_pass"
self.fused_op_type = "preln_residual_bias"
self.fused_op_type = "fused_bias_dropout_residual_layer_norm"
self.num_fused_ops = 1

def test_check_program(self):
Expand Down