rename conv2d_fusion op to fused_conv2d_add_act (#59431)

PaddlePaddle · Dec 6, 2023 · 96e5988 · 96e5988
1 parent 57e8186
commit 96e5988
Show file tree

Hide file tree

Showing 40 changed files with 240 additions and 231 deletions.
diff --git a/cmake/operators.cmake b/cmake/operators.cmake
@@ -688,7 +688,7 @@ function(prune_pybind_h)
 
   # add fused_op in op_list
   list(APPEND op_list "fc")
-  list(APPEND op_list "conv2d_fusion")
+  list(APPEND op_list "fused_conv2d_add_act")
   list(APPEND op_list "fusion_seqconv_eltadd_relu")
   list(APPEND op_list "fusion_seqpool_cvm_concat")
   list(APPEND op_list "fusion_gru")

diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -112,7 +112,7 @@ pass_library(conv2d_trans_filter_dilations_nxn_to_1x1_pass inference)
 pass_library(preln_residual_bias_fuse_pass inference)
 pass_library(constant_folding_pass inference)
 pass_library(auto_mixed_precision_pass inference)
-pass_library(conv2d_fusion_layout_transfer_pass inference)
+pass_library(fused_conv2d_add_act_layout_transfer_pass inference)
 pass_library(transfer_layout_elim_pass inference)
 pass_library(relu6_fuse_pass inference)
 pass_library(silu_fuse_pass inference)

diff --git a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
@@ -44,7 +44,7 @@ framework::proto::OpDesc PrepareOpDesc(
     const std::string& output) {
   auto proto = base_desc;
   framework::OpDesc desc(proto, nullptr);
-  desc.SetType("conv2d_fusion");
+  desc.SetType("fused_conv2d_add_act");
   desc.SetInput("Bias", {bias});
   desc.SetInput("ResidualData", {bias1});
   desc.SetAttr("activation", activation);

diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
@@ -40,7 +40,7 @@ framework::proto::OpDesc PrepareOpDesc(
     float alpha) {
   auto proto = base_desc;
   framework::OpDesc desc(proto, nullptr);
-  desc.SetType("conv2d_fusion");
+  desc.SetType("fused_conv2d_add_act");
   desc.SetInput("Bias", {bias});
   desc.SetInput("ResidualData", {});
   desc.SetAttr("activation", activation);
@@ -194,9 +194,9 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const {
     bool cutlass_can_fuse = CutlassTeller::Instance()->CbaCanSupport(
         conv_op->Op(), scope, act_op_type, Get<int>("gpu_device_id"));
     bool cudnn_can_fuse = cudnn_act_set.count(act_op_type);
-    // When this conv2d_fusion specified by problem size and act type is not
-    // supported by cutlass and not supported by cuDNN, we should not apply this
-    // pass.
+    // When this fused_conv2d_add_act specified by problem size and act type is
+    // not supported by cutlass and not supported by cuDNN, we should not apply
+    // this pass.
     if (!cutlass_can_fuse && !cudnn_can_fuse) {
       return;
     }

diff --git a/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc b/paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc
@@ -109,7 +109,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
 
     std::string act_type = "identity";
     framework::OpDesc new_op_desc(base_op_desc, nullptr);
-    new_op_desc.SetType("conv2d_fusion");
+    new_op_desc.SetType("fused_conv2d_add_act");
     new_op_desc.SetInput("Bias", {bias_name});
     new_op_desc.SetInput("ResidualData", {});
     new_op_desc.SetAttr("activation", act_type);
@@ -133,7 +133,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
     auto out_threshold_attr =
         elementwise_add_op_desc->GetNullableAttr("out_threshold");
     // set the out_threshold of the elementwise add op to be the out_threshold
-    // of the conv2d_fusion
+    // of the fused_conv2d_add_act
     if (out_threshold_attr.index()) {
       new_op_desc.SetAttr("out_threshold", out_threshold_attr);
     }
@@ -160,7 +160,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
   };
 
   gpd(graph, handler);
-  // check if detect conv2d_fusion subgraph!
+  // check if detect fused_conv2d_add_act subgraph!
   AddStatis(found_conv_eltwise_count);
 }
 

diff --git a/.../ir/conv2d_fusion_layout_transfer_pass.cc → ...ed_conv2d_add_act_layout_transfer_pass.cc b/.../ir/conv2d_fusion_layout_transfer_pass.cc → ...ed_conv2d_add_act_layout_transfer_pass.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.h"
+#include "paddle/fluid/framework/ir/fused_conv2d_add_act_layout_transfer_pass.h"
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -99,11 +99,11 @@ void InsertLayoutTransOp(ir::Graph *graph,
 
 }  // namespace
 
-void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
+void FusedConv2dAddActLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
   PADDLE_ENFORCE_NOT_NULL(
       graph,
       platform::errors::PreconditionNotMet("graph should not be nullptr."));
-  FusePassBase::Init("conv2d_fusion_layout_transfer", graph);
+  FusePassBase::Init("fused_conv2d_add_act_layout_transfer", graph);
   auto *scope = param_scope();
 
   // only float16 compute precision need insert transfer_layout.
@@ -118,36 +118,36 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
                     true,
                     platform::errors::InvalidArgument(
                         "the graph should be main graph when applying "
-                        "conv2d_fusion_layout_transfer_pass"));
+                        "fused_conv2d_add_act_layout_transfer_pass"));
 
   PADDLE_ENFORCE_NOT_NULL(
       scope,
       platform::errors::Fatal("scope must not be nullptr when applying "
-                              "conv2d_fusion_layout_transfer_pass"));
+                              "fused_conv2d_add_act_layout_transfer_pass"));
 
   // Not support multiple block now.
   std::unordered_map<ir::Node *, ir::Node *> cache;
   auto op_nodes = TopologySortOperations(*graph);
   auto iter = op_nodes.cbegin();
   auto *block_desc = (*iter)->Op()->Block();
 
-  // Process multiple conv2d_fusion shares weight.
+  // Process multiple fused_conv2d_add_act shares weight.
   std::unordered_set<std::string> weights_shape_nhwc;
 
   // Used to control the insertion of transfer_layout op.
   std::unordered_set<ir::Node *> vars_shape_nhwc;
 
-  // Only support conv2d_fusion now.
-  std::string target_op_type = "conv2d_fusion";
+  // Only support fused_conv2d_add_act now.
+  std::string target_op_type = "fused_conv2d_add_act";
   std::unordered_set<ir::Node *> valid_ops;
 
-  // Determine if this conv2d_fusion can run in cuDNN's NHWC mode,
+  // Determine if this fused_conv2d_add_act can run in cuDNN's NHWC mode,
   // will not set or change any attribute in op_desc
   auto cuDNNIsValid = [&](ir::Node *op_node) -> bool {
     auto filter_names = op_node->Op()->Input("Filter");
     constexpr int CUDNN_ALIGNMENT = 8;
-    // If filter's channel is not multiple of CUDNN_ALIGNMENT, conv2d_fusion not
-    // run at nhwc.
+    // If filter's channel is not multiple of CUDNN_ALIGNMENT,
+    // fused_conv2d_add_act not run at nhwc.
     for (const auto &filter_name : filter_names) {
       auto *filter_var = scope->FindLocalVar(filter_name);
       const auto &filter_tensor = filter_var->Get<phi::DenseTensor>();
@@ -195,7 +195,7 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
       auto *op_desc = op_node->Op();
 
       if (CutlassIsValid(op_node)) {
-        // conv2d_fusion must have this attribute because of signature.
+        // fused_conv2d_add_act must have this attribute because of signature.
         if (!op_desc->HasAttr("fuse_alpha")) {
           op_desc->SetAttr("fuse_alpha", 0.f);
         }
@@ -289,5 +289,5 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
 }  // namespace framework
 }  // namespace paddle
 
-REGISTER_PASS(conv2d_fusion_layout_transfer_pass,
-              paddle::framework::ir::Conv2dFusionLayoutTransferPass);
+REGISTER_PASS(fused_conv2d_add_act_layout_transfer_pass,
+              paddle::framework::ir::FusedConv2dAddActLayoutTransferPass);
diff --git a/...k/ir/conv2d_fusion_layout_transfer_pass.h → ...sed_conv2d_add_act_layout_transfer_pass.h b/...k/ir/conv2d_fusion_layout_transfer_pass.h → ...sed_conv2d_add_act_layout_transfer_pass.h
@@ -20,10 +20,10 @@ namespace paddle {
 namespace framework {
 namespace ir {
 
-class Conv2dFusionLayoutTransferPass : public FusePassBase {
+class FusedConv2dAddActLayoutTransferPass : public FusePassBase {
  public:
-  Conv2dFusionLayoutTransferPass() = default;
-  virtual ~Conv2dFusionLayoutTransferPass() = default;
+  FusedConv2dAddActLayoutTransferPass() = default;
+  virtual ~FusedConv2dAddActLayoutTransferPass() = default;
 
  protected:
   void ApplyImpl(ir::Graph* graph) const override;

diff --git a/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc b/paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
@@ -306,7 +306,7 @@ void QuantDequantFusePass::DeleteQuant(ir::Graph* graph,
       auto op_desc = quantized_node->Op();
       std::string quantized_op_type = op_desc->Type();
       if (quantized_op_type == "conv2d" ||
-          quantized_op_type == "conv2d_fusion" ||
+          quantized_op_type == "fused_conv2d_add_act" ||
           quantized_op_type == "depthwise_conv2d" ||
           quantized_op_type == "conv2d_transpose" ||
           quantized_op_type == "matrix_multiply") {
@@ -339,7 +339,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
   if (quantized_op_type == "conv2d" ||
       quantized_op_type == "depthwise_conv2d" ||
       quantized_op_type == "fused_conv2d" ||
-      quantized_op_type == "conv2d_fusion" ||
+      quantized_op_type == "fused_conv2d_add_act" ||
       quantized_op_type == "conv2d_transpose") {
     weight_name = "Filter";
     input_name = "Input";
@@ -348,7 +348,8 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
     input_name = "X";
   } else {
     PADDLE_THROW(platform::errors::Unimplemented(
-        "QuantDequantFuse: We only support conv2d, conv2d_fusion, fused_conv2d,"
+        "QuantDequantFuse: We only support conv2d, fused_conv2d_add_act, "
+        "fused_conv2d,"
         "conv2d_transpose, matrix_multiply(mul/matmul/matmul_v2) for now, but "
         "received: "
         "%s.",
@@ -573,7 +574,8 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
                                   quantized_op_node->Op()->Block());
     new_op_desc.SetType(quantized_op_type);
     new_op_desc.SetAttr("enable_int8", true);
-    if (quantized_op_type == "conv2d" || quantized_op_type == "conv2d_fusion" ||
+    if (quantized_op_type == "conv2d" ||
+        quantized_op_type == "fused_conv2d_add_act" ||
         quantized_op_type == "fused_conv2d" ||
         quantized_op_type == "depthwise_conv2d" ||
         quantized_op_type == "conv2d_transpose") {

diff --git a/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc b/paddle/fluid/framework/ir/trt_delete_weight_dequant_linear_op_pass.cc
@@ -275,19 +275,20 @@ void TrtDeleteWeightQuantDequantLinearOpPass::ApplyImpl(
             static_cast<float>(quantized_weight_data[i]) * weight_scale[0];
       }
     } else if (quant_axis == 0) {  // per_channel quant_dequant: conv2d,
-                                   // depthwise_conv2d, conv2d_fusion
+                                   // depthwise_conv2d, fused_conv2d_add_act
       PADDLE_ENFORCE_EQ(
           weight_scale_nums,
           w_dims[quant_axis],
           platform::errors::InvalidArgument(
               "When quant_axis == 0 means use per_channel quant_dequant, "
               "weight_scale'numbers should be equal channels."));
-      PADDLE_ENFORCE_EQ(w_dims.size(),
-                        4,
-                        platform::errors::InvalidArgument(
-                            "When quant_axis == 0 means use per_channel "
-                            "quant_dequant, (conv2d, depthwise_conv2d, "
-                            "conv2d_fusion)'s weight dims should be 4."));
+      PADDLE_ENFORCE_EQ(
+          w_dims.size(),
+          4,
+          platform::errors::InvalidArgument(
+              "When quant_axis == 0 means use per_channel "
+              "quant_dequant, (conv2d, depthwise_conv2d, "
+              "fused_conv2d_add_act)'s weight dims should be 4."));
 
       for (int i = 0; i < weight_tensor->numel(); i++) {
         int inner_size = static_cast<int>(w_dims[1] * w_dims[2] * w_dims[3]);

diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -193,7 +193,7 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
     "conv_elementwise_add_act_fuse_pass",
     "conv_elementwise_add2_act_fuse_pass",
     "conv_elementwise_add_fuse_pass",
-    "conv2d_fusion_layout_transfer_pass",
+    "fused_conv2d_add_act_layout_transfer_pass",
     "multihead_matmul_fuse_pass_v2",
     "fused_multi_transformer_encoder_pass",
     "fused_multi_transformer_decoder_pass",
@@ -303,10 +303,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
         "conv_elementwise_add_act_fuse_pass",   //
         "conv_elementwise_add2_act_fuse_pass",  //
 #endif
-        "conv_elementwise_add_fuse_pass",      //
-#endif                                         //
-        "transpose_flatten_concat_fuse_pass",  //
-        "conv2d_fusion_layout_transfer_pass",  //
+        "conv_elementwise_add_fuse_pass",             //
+#endif                                                //
+        "transpose_flatten_concat_fuse_pass",         //
+        "fused_conv2d_add_act_layout_transfer_pass",  //
         "transfer_layout_elim_pass",
         "auto_mixed_precision_pass",  //
         "identity_op_clean_pass",  // should be after auto_mixed_precision_pass.

diff --git a/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
@@ -132,7 +132,7 @@ void ConvertConv2d(TensorRTEngine* engine,
   bias.SetDataType(weight.get().type);
   bias.SetCount(0);
   bias.SetValues(nullptr);
-  if (op_desc.Type() == "conv2d_fusion") {
+  if (op_desc.Type() == "fused_conv2d_add_act") {
     auto* bias_tensor = scope.GetVar(op_desc.Input("Bias").front());
     auto* bias_tensor_data = bias_tensor->GetMutable<phi::DenseTensor>();
     bias =
@@ -265,5 +265,5 @@ class Deconv2dOpConverter : public OpConverter {
 }  // namespace paddle
 
 REGISTER_TRT_OP_CONVERTER(conv2d, Conv2dOpConverter);
-REGISTER_TRT_OP_CONVERTER(conv2d_fusion, Conv2dOpConverter);
+REGISTER_TRT_OP_CONVERTER(fused_conv2d_add_act, Conv2dOpConverter);
 REGISTER_TRT_OP_CONVERTER(conv2d_transpose, Deconv2dOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc
@@ -586,7 +586,7 @@ inline ExprWrapper ConvOutputSize(ExprWrapper ih,
   return oh;
 }
 
-nvinfer1::DimsExprs Conv2dFusionInferMeta(
+nvinfer1::DimsExprs FusedConv2dAddActInferMeta(
     int output_index,
     const nvinfer1::DimsExprs* inputs,
     int nb_inputs,
@@ -870,8 +870,9 @@ PD_REGISTER_DYNAMIC_INFER_META_FN(inverse, UnchangedInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(moe, MoeInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(pad3d, Pad3dInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(grid_sampler, GridSamplerInferMeta);
-PD_REGISTER_DYNAMIC_INFER_META_FN(conv2d_fusion, Conv2dFusionInferMeta);
-PD_REGISTER_DYNAMIC_INFER_META_FN(conv2d, Conv2dFusionInferMeta);
+PD_REGISTER_DYNAMIC_INFER_META_FN(fused_conv2d_add_act,
+                                  FusedConv2dAddActInferMeta);
+PD_REGISTER_DYNAMIC_INFER_META_FN(conv2d, FusedConv2dAddActInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(conv2d_transpose, Conv2dTransposeInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(p_norm, PNormInferMeta);
 PD_REGISTER_DYNAMIC_INFER_META_FN(memory_efficient_attention,

diff --git a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_registry.h b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta_registry.h
@@ -28,7 +28,7 @@ USE_TRT_DYNAMIC_INFER_META_FN(scatter_nd_add);
 USE_TRT_DYNAMIC_INFER_META_FN(pad3d);
 USE_TRT_DYNAMIC_INFER_META_FN(inverse);
 USE_TRT_DYNAMIC_INFER_META_FN(grid_sampler);
-USE_TRT_DYNAMIC_INFER_META_FN(conv2d_fusion);
+USE_TRT_DYNAMIC_INFER_META_FN(fused_conv2d_add_act);
 USE_TRT_DYNAMIC_INFER_META_FN(conv2d);
 USE_TRT_DYNAMIC_INFER_META_FN(conv2d_transpose);
 USE_TRT_DYNAMIC_INFER_META_FN(memory_efficient_attention);

diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -255,7 +255,7 @@ struct SimpleOpTypeSetTeller : public Teller {
     }
 
     if (op_type == "conv2d" || op_type == "conv2d_transpose" ||
-        op_type == "conv2d_fusion" || op_type == "depthwise_conv2d" ||
+        op_type == "fused_conv2d_add_act" || op_type == "depthwise_conv2d" ||
         op_type == "depthwise_conv2d_transpose") {
       if (desc.Input("Input").size() != 1) {
         VLOG(3) << "TRT Conv2d expect 1 input, but got "
@@ -270,7 +270,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       }
 
       if (desc.HasAttr("enable_int8")) {
-        if (op_type == "conv2d" || op_type == "conv2d_fusion") {
+        if (op_type == "conv2d" || op_type == "fused_conv2d_add_act") {
           if (!desc.HasAttr("Input_scale")) {
             VLOG(3) << "Input scale not found. TRT int8"
                        " requires conv/deconv to have "
@@ -304,7 +304,7 @@ struct SimpleOpTypeSetTeller : public Teller {
 
 // strides > 1 and 'SAME' is only supported by trt7.0 above
 #if !IS_TRT_VERSION_GE(7000)
-      if (op_type == "conv2d" || op_type == "conv2d_fusion" ||
+      if (op_type == "conv2d" || op_type == "fused_conv2d_add_act" ||
           op_type == "depthwise_conv2d") {
         if (desc.HasAttr("padding_algorithm") && with_dynamic_shape) {
           auto padding_algorithm =
@@ -2818,7 +2818,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "bmm",
       "range",
       "conv2d",
-      "conv2d_fusion",
+      "fused_conv2d_add_act",
       "pool2d",
       "relu",
       "elu",
@@ -2989,7 +2989,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "bmm",
       "range",
       "conv2d",
-      "conv2d_fusion",
+      "fused_conv2d_add_act",
       "pool2d",
       "relu",
       "elu",

diff --git a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py
@@ -23,7 +23,7 @@
     'SliceRawInferMeta',
     'StackInferMeta',
     'Conv2dTransposeInferMeta',
-    'Conv2dFusionInferMeta',
+    'FusedConv2dAddActInferMeta',
     'InterpolateInferMeta',
 }
 

diff --git a/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py b/paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
@@ -89,7 +89,7 @@
     'fusion_gru',
     'fusion_seqconv_eltadd_relu',
     'fusion_seqexpand_concat_fc',
-    'conv2d_fusion',
+    'fused_conv2d_add_act',
     'fusion_repeated_fc_relu',
     'fusion_squared_mat_sub',
     'fused_attention',