Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Renaming conv2d_fusion op to fused_conv2d_add_act op #59431

Merged
merged 3 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/operators.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ function(prune_pybind_h)

# add fused_op in op_list
list(APPEND op_list "fc")
list(APPEND op_list "conv2d_fusion")
list(APPEND op_list "fused_conv2d_add_act")
list(APPEND op_list "fusion_seqconv_eltadd_relu")
list(APPEND op_list "fusion_seqpool_cvm_concat")
list(APPEND op_list "fusion_gru")
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/ir/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ pass_library(conv2d_trans_filter_dilations_nxn_to_1x1_pass inference)
pass_library(preln_residual_bias_fuse_pass inference)
pass_library(constant_folding_pass inference)
pass_library(auto_mixed_precision_pass inference)
pass_library(conv2d_fusion_layout_transfer_pass inference)
pass_library(fused_conv2d_add_act_layout_transfer_pass inference)
pass_library(transfer_layout_elim_pass inference)
pass_library(relu6_fuse_pass inference)
pass_library(silu_fuse_pass inference)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ framework::proto::OpDesc PrepareOpDesc(
const std::string& output) {
auto proto = base_desc;
framework::OpDesc desc(proto, nullptr);
desc.SetType("conv2d_fusion");
desc.SetType("fused_conv2d_add_act");
desc.SetInput("Bias", {bias});
desc.SetInput("ResidualData", {bias1});
desc.SetAttr("activation", activation);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ framework::proto::OpDesc PrepareOpDesc(
float alpha) {
auto proto = base_desc;
framework::OpDesc desc(proto, nullptr);
desc.SetType("conv2d_fusion");
desc.SetType("fused_conv2d_add_act");
desc.SetInput("Bias", {bias});
desc.SetInput("ResidualData", {});
desc.SetAttr("activation", activation);
Expand Down Expand Up @@ -194,9 +194,9 @@ void ConvElementwiseAddActFusePass::ApplyImpl(ir::Graph* graph) const {
bool cutlass_can_fuse = CutlassTeller::Instance()->CbaCanSupport(
conv_op->Op(), scope, act_op_type, Get<int>("gpu_device_id"));
bool cudnn_can_fuse = cudnn_act_set.count(act_op_type);
// When this conv2d_fusion specified by problem size and act type is not
// supported by cutlass and not supported by cuDNN, we should not apply this
// pass.
// When this fused_conv2d_add_act specified by problem size and act type is
// not supported by cutlass and not supported by cuDNN, we should not apply
// this pass.
if (!cutlass_can_fuse && !cudnn_can_fuse) {
return;
}
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {

std::string act_type = "identity";
framework::OpDesc new_op_desc(base_op_desc, nullptr);
new_op_desc.SetType("conv2d_fusion");
new_op_desc.SetType("fused_conv2d_add_act");
new_op_desc.SetInput("Bias", {bias_name});
new_op_desc.SetInput("ResidualData", {});
new_op_desc.SetAttr("activation", act_type);
Expand All @@ -133,7 +133,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
auto out_threshold_attr =
elementwise_add_op_desc->GetNullableAttr("out_threshold");
// set the out_threshold of the elementwise add op to be the out_threshold
// of the conv2d_fusion
// of the fused_conv2d_add_act
if (out_threshold_attr.index()) {
new_op_desc.SetAttr("out_threshold", out_threshold_attr);
}
Expand All @@ -160,7 +160,7 @@ void ConvElementwiseAddFusePass::ApplyImpl(ir::Graph* graph) const {
};

gpd(graph, handler);
// check if detect conv2d_fusion subgraph!
// check if detect fused_conv2d_add_act subgraph!
AddStatis(found_conv_eltwise_count);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/framework/ir/conv2d_fusion_layout_transfer_pass.h"
#include "paddle/fluid/framework/ir/fused_conv2d_add_act_layout_transfer_pass.h"
#include <string>
#include <unordered_map>
#include <unordered_set>
Expand Down Expand Up @@ -99,11 +99,11 @@ void InsertLayoutTransOp(ir::Graph *graph,

} // namespace

void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
void FusedConv2dAddActLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
PADDLE_ENFORCE_NOT_NULL(
graph,
platform::errors::PreconditionNotMet("graph should not be nullptr."));
FusePassBase::Init("conv2d_fusion_layout_transfer", graph);
FusePassBase::Init("fused_conv2d_add_act_layout_transfer", graph);
auto *scope = param_scope();

// only float16 compute precision need insert transfer_layout.
Expand All @@ -118,36 +118,36 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
true,
platform::errors::InvalidArgument(
"the graph should be main graph when applying "
"conv2d_fusion_layout_transfer_pass"));
"fused_conv2d_add_act_layout_transfer_pass"));

PADDLE_ENFORCE_NOT_NULL(
scope,
platform::errors::Fatal("scope must not be nullptr when applying "
"conv2d_fusion_layout_transfer_pass"));
"fused_conv2d_add_act_layout_transfer_pass"));

// Not support multiple block now.
std::unordered_map<ir::Node *, ir::Node *> cache;
auto op_nodes = TopologySortOperations(*graph);
auto iter = op_nodes.cbegin();
auto *block_desc = (*iter)->Op()->Block();

// Process multiple conv2d_fusion shares weight.
// Process multiple fused_conv2d_add_act shares weight.
std::unordered_set<std::string> weights_shape_nhwc;

// Used to control the insertion of transfer_layout op.
std::unordered_set<ir::Node *> vars_shape_nhwc;

// Only support conv2d_fusion now.
std::string target_op_type = "conv2d_fusion";
// Only support fused_conv2d_add_act now.
std::string target_op_type = "fused_conv2d_add_act";
std::unordered_set<ir::Node *> valid_ops;

// Determine if this conv2d_fusion can run in cuDNN's NHWC mode,
// Determine if this fused_conv2d_add_act can run in cuDNN's NHWC mode,
// will not set or change any attribute in op_desc
auto cuDNNIsValid = [&](ir::Node *op_node) -> bool {
auto filter_names = op_node->Op()->Input("Filter");
constexpr int CUDNN_ALIGNMENT = 8;
// If filter's channel is not multiple of CUDNN_ALIGNMENT, conv2d_fusion not
// run at nhwc.
// If filter's channel is not multiple of CUDNN_ALIGNMENT,
// fused_conv2d_add_act not run at nhwc.
for (const auto &filter_name : filter_names) {
auto *filter_var = scope->FindLocalVar(filter_name);
const auto &filter_tensor = filter_var->Get<phi::DenseTensor>();
Expand Down Expand Up @@ -195,7 +195,7 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
auto *op_desc = op_node->Op();

if (CutlassIsValid(op_node)) {
// conv2d_fusion must have this attribute because of signature.
// fused_conv2d_add_act must have this attribute because of signature.
if (!op_desc->HasAttr("fuse_alpha")) {
op_desc->SetAttr("fuse_alpha", 0.f);
}
Expand Down Expand Up @@ -289,5 +289,5 @@ void Conv2dFusionLayoutTransferPass::ApplyImpl(ir::Graph *graph) const {
} // namespace framework
} // namespace paddle

REGISTER_PASS(conv2d_fusion_layout_transfer_pass,
paddle::framework::ir::Conv2dFusionLayoutTransferPass);
REGISTER_PASS(fused_conv2d_add_act_layout_transfer_pass,
paddle::framework::ir::FusedConv2dAddActLayoutTransferPass);
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ namespace paddle {
namespace framework {
namespace ir {

class Conv2dFusionLayoutTransferPass : public FusePassBase {
class FusedConv2dAddActLayoutTransferPass : public FusePassBase {
public:
Conv2dFusionLayoutTransferPass() = default;
virtual ~Conv2dFusionLayoutTransferPass() = default;
FusedConv2dAddActLayoutTransferPass() = default;
virtual ~FusedConv2dAddActLayoutTransferPass() = default;

protected:
void ApplyImpl(ir::Graph* graph) const override;
Expand Down
10 changes: 6 additions & 4 deletions paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ void QuantDequantFusePass::DeleteQuant(ir::Graph* graph,
auto op_desc = quantized_node->Op();
std::string quantized_op_type = op_desc->Type();
if (quantized_op_type == "conv2d" ||
quantized_op_type == "conv2d_fusion" ||
quantized_op_type == "fused_conv2d_add_act" ||
quantized_op_type == "depthwise_conv2d" ||
quantized_op_type == "conv2d_transpose" ||
quantized_op_type == "matrix_multiply") {
Expand Down Expand Up @@ -339,7 +339,7 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
if (quantized_op_type == "conv2d" ||
quantized_op_type == "depthwise_conv2d" ||
quantized_op_type == "fused_conv2d" ||
quantized_op_type == "conv2d_fusion" ||
quantized_op_type == "fused_conv2d_add_act" ||
quantized_op_type == "conv2d_transpose") {
weight_name = "Filter";
input_name = "Input";
Expand All @@ -348,7 +348,8 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
input_name = "X";
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"QuantDequantFuse: We only support conv2d, conv2d_fusion, fused_conv2d,"
"QuantDequantFuse: We only support conv2d, fused_conv2d_add_act, "
"fused_conv2d,"
"conv2d_transpose, matrix_multiply(mul/matmul/matmul_v2) for now, but "
"received: "
"%s.",
Expand Down Expand Up @@ -573,7 +574,8 @@ void QuantDequantFusePass::FuseDequant(ir::Graph* graph,
quantized_op_node->Op()->Block());
new_op_desc.SetType(quantized_op_type);
new_op_desc.SetAttr("enable_int8", true);
if (quantized_op_type == "conv2d" || quantized_op_type == "conv2d_fusion" ||
if (quantized_op_type == "conv2d" ||
quantized_op_type == "fused_conv2d_add_act" ||
quantized_op_type == "fused_conv2d" ||
quantized_op_type == "depthwise_conv2d" ||
quantized_op_type == "conv2d_transpose") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,19 +275,20 @@ void TrtDeleteWeightQuantDequantLinearOpPass::ApplyImpl(
static_cast<float>(quantized_weight_data[i]) * weight_scale[0];
}
} else if (quant_axis == 0) { // per_channel quant_dequant: conv2d,
// depthwise_conv2d, conv2d_fusion
// depthwise_conv2d, fused_conv2d_add_act
PADDLE_ENFORCE_EQ(
weight_scale_nums,
w_dims[quant_axis],
platform::errors::InvalidArgument(
"When quant_axis == 0 means use per_channel quant_dequant, "
"weight_scale'numbers should be equal channels."));
PADDLE_ENFORCE_EQ(w_dims.size(),
4,
platform::errors::InvalidArgument(
"When quant_axis == 0 means use per_channel "
"quant_dequant, (conv2d, depthwise_conv2d, "
"conv2d_fusion)'s weight dims should be 4."));
PADDLE_ENFORCE_EQ(
w_dims.size(),
4,
platform::errors::InvalidArgument(
"When quant_axis == 0 means use per_channel "
"quant_dequant, (conv2d, depthwise_conv2d, "
"fused_conv2d_add_act)'s weight dims should be 4."));

for (int i = 0; i < weight_tensor->numel(); i++) {
int inner_size = static_cast<int>(w_dims[1] * w_dims[2] * w_dims[3]);
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/inference/api/paddle_pass_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ const std::vector<std::string> kGpuLowerPrecisionPasses{
"conv_elementwise_add_act_fuse_pass",
"conv_elementwise_add2_act_fuse_pass",
"conv_elementwise_add_fuse_pass",
"conv2d_fusion_layout_transfer_pass",
"fused_conv2d_add_act_layout_transfer_pass",
"multihead_matmul_fuse_pass_v2",
"fused_multi_transformer_encoder_pass",
"fused_multi_transformer_decoder_pass",
Expand Down Expand Up @@ -303,10 +303,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", //
#endif
"conv_elementwise_add_fuse_pass", //
#endif //
"transpose_flatten_concat_fuse_pass", //
"conv2d_fusion_layout_transfer_pass", //
"conv_elementwise_add_fuse_pass", //
#endif //
"transpose_flatten_concat_fuse_pass", //
"fused_conv2d_add_act_layout_transfer_pass", //
"transfer_layout_elim_pass",
"auto_mixed_precision_pass", //
"identity_op_clean_pass", // should be after auto_mixed_precision_pass.
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/inference/tensorrt/convert/conv2d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ void ConvertConv2d(TensorRTEngine* engine,
bias.SetDataType(weight.get().type);
bias.SetCount(0);
bias.SetValues(nullptr);
if (op_desc.Type() == "conv2d_fusion") {
if (op_desc.Type() == "fused_conv2d_add_act") {
auto* bias_tensor = scope.GetVar(op_desc.Input("Bias").front());
auto* bias_tensor_data = bias_tensor->GetMutable<phi::DenseTensor>();
bias =
Expand Down Expand Up @@ -265,5 +265,5 @@ class Deconv2dOpConverter : public OpConverter {
} // namespace paddle

REGISTER_TRT_OP_CONVERTER(conv2d, Conv2dOpConverter);
REGISTER_TRT_OP_CONVERTER(conv2d_fusion, Conv2dOpConverter);
REGISTER_TRT_OP_CONVERTER(fused_conv2d_add_act, Conv2dOpConverter);
REGISTER_TRT_OP_CONVERTER(conv2d_transpose, Deconv2dOpConverter);
7 changes: 4 additions & 3 deletions paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ inline ExprWrapper ConvOutputSize(ExprWrapper ih,
return oh;
}

nvinfer1::DimsExprs Conv2dFusionInferMeta(
nvinfer1::DimsExprs FusedConv2dAddActInferMeta(
int output_index,
const nvinfer1::DimsExprs* inputs,
int nb_inputs,
Expand Down Expand Up @@ -870,8 +870,9 @@ PD_REGISTER_DYNAMIC_INFER_META_FN(inverse, UnchangedInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(moe, MoeInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(pad3d, Pad3dInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(grid_sampler, GridSamplerInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(conv2d_fusion, Conv2dFusionInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(conv2d, Conv2dFusionInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(fused_conv2d_add_act,
FusedConv2dAddActInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(conv2d, FusedConv2dAddActInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(conv2d_transpose, Conv2dTransposeInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(p_norm, PNormInferMeta);
PD_REGISTER_DYNAMIC_INFER_META_FN(memory_efficient_attention,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ USE_TRT_DYNAMIC_INFER_META_FN(scatter_nd_add);
USE_TRT_DYNAMIC_INFER_META_FN(pad3d);
USE_TRT_DYNAMIC_INFER_META_FN(inverse);
USE_TRT_DYNAMIC_INFER_META_FN(grid_sampler);
USE_TRT_DYNAMIC_INFER_META_FN(conv2d_fusion);
USE_TRT_DYNAMIC_INFER_META_FN(fused_conv2d_add_act);
USE_TRT_DYNAMIC_INFER_META_FN(conv2d);
USE_TRT_DYNAMIC_INFER_META_FN(conv2d_transpose);
USE_TRT_DYNAMIC_INFER_META_FN(memory_efficient_attention);
Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/inference/tensorrt/op_teller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ struct SimpleOpTypeSetTeller : public Teller {
}

if (op_type == "conv2d" || op_type == "conv2d_transpose" ||
op_type == "conv2d_fusion" || op_type == "depthwise_conv2d" ||
op_type == "fused_conv2d_add_act" || op_type == "depthwise_conv2d" ||
op_type == "depthwise_conv2d_transpose") {
if (desc.Input("Input").size() != 1) {
VLOG(3) << "TRT Conv2d expect 1 input, but got "
Expand All @@ -270,7 +270,7 @@ struct SimpleOpTypeSetTeller : public Teller {
}

if (desc.HasAttr("enable_int8")) {
if (op_type == "conv2d" || op_type == "conv2d_fusion") {
if (op_type == "conv2d" || op_type == "fused_conv2d_add_act") {
if (!desc.HasAttr("Input_scale")) {
VLOG(3) << "Input scale not found. TRT int8"
" requires conv/deconv to have "
Expand Down Expand Up @@ -304,7 +304,7 @@ struct SimpleOpTypeSetTeller : public Teller {

// strides > 1 and 'SAME' is only supported by trt7.0 above
#if !IS_TRT_VERSION_GE(7000)
if (op_type == "conv2d" || op_type == "conv2d_fusion" ||
if (op_type == "conv2d" || op_type == "fused_conv2d_add_act" ||
op_type == "depthwise_conv2d") {
if (desc.HasAttr("padding_algorithm") && with_dynamic_shape) {
auto padding_algorithm =
Expand Down Expand Up @@ -2818,7 +2818,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"bmm",
"range",
"conv2d",
"conv2d_fusion",
"fused_conv2d_add_act",
"pool2d",
"relu",
"elu",
Expand Down Expand Up @@ -2989,7 +2989,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"bmm",
"range",
"conv2d",
"conv2d_fusion",
"fused_conv2d_add_act",
"pool2d",
"relu",
"elu",
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/pir/dialect/op_generator/op_build_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
'SliceRawInferMeta',
'StackInferMeta',
'Conv2dTransposeInferMeta',
'Conv2dFusionInferMeta',
'FusedConv2dAddActInferMeta',
'InterpolateInferMeta',
}

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/pir/dialect/op_generator/ops_api_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@
'fusion_gru',
'fusion_seqconv_eltadd_relu',
'fusion_seqexpand_concat_fc',
'conv2d_fusion',
'fused_conv2d_add_act',
'fusion_repeated_fc_relu',
'fusion_squared_mat_sub',
'fused_attention',
Expand Down
Loading