Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Paddle-TRT] Fix AI-Rank BERT emb_eltwise_layernorm input order #32482

Merged
merged 5 commits into from
Apr 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,10 +290,20 @@ static int BuildFusion(Graph* graph, const std::string& name_scope
ids.push_back(inner_pattern_ins[js[iter]].first->Name());
embs.push_back(inner_pattern_ins[js[iter]].second->Name());
}

OpDesc new_op_desc;
new_op_desc.SetType("fused_embedding_eltwise_layernorm");
new_op_desc.SetInput("Ids", ids);
new_op_desc.SetInput("Embs", embs);

new_op_desc.SetInput("WordId", {ids[0]});
new_op_desc.SetInput("PosId", {ids[1]});
new_op_desc.SetInput("SentId", {ids[2]});

new_op_desc.SetInput("WordEmbedding", {embs[0]});
new_op_desc.SetInput("PosEmbedding", {embs[1]});
new_op_desc.SetInput("SentEmbedding", {embs[2]});

new_op_desc.SetInput("Bias", {end_pattern_biases[k]->Name()});
new_op_desc.SetInput("Scale", {end_pattern_scales[k]->Name()});
new_op_desc.SetOutput("Out", {end_pattern_out[k]->Name()});
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/pipeline_trainer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
int place_id = section_config.place_id();
#if (defined PADDLE_WITH_NCCL)
place_ = platform::CUDAPlace(place_id);
#elif (defined WITH_ASCEND_CL)
#elif (defined WITH_ASCEND_CL) // NOLINT
place_ = platform::NPUPlace(place_id);
#endif
worker_ = DeviceWorkerFactory::CreateDeviceWorker(
Expand Down
43 changes: 22 additions & 21 deletions paddle/fluid/framework/unused_var_check.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,27 +53,28 @@ static const std::unordered_set<std::string> &GetOpWithUnusedVarAllowSet() {
// Use pointer here for safe static deinitialization
static auto *allow_set = new std::unordered_set<std::string>({
// called once
"batch_norm", // 0
"batch_norm_grad", // 0
"sync_batch_norm", // 0
"sync_batch_norm_grad", // 0
"inplace_abn", // 0
"inplace_abn_grad", // 0
"dgc_momentum", // 0
"fake_quantize_range_abs_max", // 0
"rmsprop", // 0
"sequence_conv_grad", // 0
"roi_perspective_transform_grad", // 0
"fill_zeros_like", // 1
"fill_any_like", // 1
"nce_grad", // 1
"precision_recall", // 1
"fusion_seqpool_cvm_concat", // 2
"fused_batch_norm_act", // 2
"fused_batch_norm_act_grad", // 2
"data_norm", // 0
"data_norm_grad", // 0
"update_loss_scaling", // 0
"batch_norm", // 0
"batch_norm_grad", // 0
"sync_batch_norm", // 0
"sync_batch_norm_grad", // 0
"inplace_abn", // 0
"inplace_abn_grad", // 0
"dgc_momentum", // 0
"fake_quantize_range_abs_max", // 0
"rmsprop", // 0
"sequence_conv_grad", // 0
"roi_perspective_transform_grad", // 0
"fill_zeros_like", // 1
"fill_any_like", // 1
"nce_grad", // 1
"precision_recall", // 1
"fusion_seqpool_cvm_concat", // 2
"fused_batch_norm_act", // 2
"fused_batch_norm_act_grad", // 2
"data_norm", // 0
"data_norm_grad", // 0
"update_loss_scaling", // 0
"fused_embedding_eltwise_layernorm", // 0
});
return *allow_set;
}
Expand Down
37 changes: 26 additions & 11 deletions paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,17 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
VLOG(4) << "convert fluid EmbEltwiseLayerNorm op to tensorrt layer";

framework::OpDesc op_desc(op, nullptr);
auto id_names = op_desc.Input("Ids");
auto emb_names = op_desc.Input("Embs");
auto word_id_name = op_desc.Input("WordId").front();
auto pos_id_name = op_desc.Input("PosId").front();
auto sent_id_name = op_desc.Input("SentId").front();
auto word_emb_name = op_desc.Input("WordEmbedding").front();
auto pos_emb_name = op_desc.Input("PosEmbedding").front();
auto sent_emb_name = op_desc.Input("SentEmbedding").front();
std::vector<std::string> id_names = {word_id_name, pos_id_name,
sent_id_name};
std::vector<std::string> emb_names = {word_emb_name, pos_emb_name,
sent_emb_name};

int input_num = id_names.size();

// Declare inputs
Expand Down Expand Up @@ -91,6 +100,12 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
if (enable_int8) {
output_fp16 = 1;
}
PADDLE_ENFORCE_EQ(
input_num, 3,
platform::errors::InvalidArgument(
"When using oss and var-len, embedding_eltwise_layernorm op"
"should have 3 inputs only, but got %d.",
input_num));
PADDLE_ENFORCE_EQ(
output_fp16, 1,
platform::errors::InvalidArgument(
Expand Down Expand Up @@ -125,15 +140,15 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
plugin_ptr->fields = fields.data();

std::vector<nvinfer1::ITensor*> plugin_inputs;
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(0)->getName())); // word_embedding,
// eval_placeholder_0
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(1)->getName())); // sent_embedding,
// eval_placeholder_1
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(2)->getName())); // cu_seqlens,
// eval_placeholder_2
plugin_inputs.emplace_back(
engine_->GetITensor(word_id_name)); // word_embedding,
// eval_placeholder_0
plugin_inputs.emplace_back(
engine_->GetITensor(sent_id_name)); // sent_embedding,
// eval_placeholder_1
plugin_inputs.emplace_back(
engine_->GetITensor(pos_id_name)); // cu_seqlens,
// eval_placeholder_2
auto max_seqlen_tensor =
engine_->GetITensor(engine_->network()->getInput(3)->getName());
auto* shuffle_layer =
Expand Down