PaddlePaddle · luotao1 · Jun 18, 2020 · May 8, 2020 · Jun 9, 2020 · Jun 9, 2020
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -1881,8 +1881,16 @@ PDNode *patterns::MultipleQuantize::operator()() {
 
 PDNode *patterns::MKLDNNInPlace::operator()() {
   const std::unordered_set<std::string> &supported_op_types = {
-      "abs",  "elementwise_add", "gelu", "leaky_relu", "relu", "softmax",
-      "sqrt", "swish",           "tanh"};
+      "abs",
+      "elementwise_mul",
+      "elementwise_add",
+      "gelu",
+      "leaky_relu",
+      "relu",
+      "softmax",
+      "sqrt",
+      "swish",
+      "tanh"};
 
   auto possible_inplace_op = pattern->NewNode(inplace_to_be_op_repr())
                                  ->assert_is_ops(supported_op_types);

diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
@@ -28,38 +28,30 @@ class ElementwiseMulOp : public ElementwiseOp {
   using Tensor = framework::Tensor;
   using ElementwiseOp::ElementwiseOp;
 
-#ifdef PADDLE_WITH_MKLDNN
-  static bool AreDimsAndFormatCorrect(const framework::ExecutionContext& ctx,
-                                      int simd_width,
-                                      mkldnn::memory::format_tag x_format) {
-    using Tensor = framework::Tensor;
-    using paddle::framework::vectorize;
-    using mkldnn::memory;
-    auto* x = ctx.Input<Tensor>("X");
-    auto* y = ctx.Input<Tensor>("Y");
-    auto x_dims = vectorize(x->dims());
-    const bool are_dims_divisable = !(x_dims[1] % simd_width);
-    const bool is_x_format_correct = x->format() == x_format;
-    const bool is_y_format_correct = vectorize(y->dims()).size() == 2;
-    return are_dims_divisable && is_x_format_correct && is_y_format_correct;
-  }
-#endif
-
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
     auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
 
 #ifdef PADDLE_WITH_MKLDNN
     using mkldnn::memory;
-    if (platform::CanMKLDNNBeUsed(ctx)) {
-      bool can_use_avx512_kernel =
-          platform::MayIUse(platform::avx512f) &&
-          AreDimsAndFormatCorrect(ctx, 16, memory::format_tag::nChw16c);
-      if (can_use_avx512_kernel) {
-        return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-                                       framework::DataLayout::kMKLDNN,
-                                       framework::LibraryType::kMKLDNN);
+    auto CanMKLDNNElementwiseMulBeUsed = [&]() {
+      auto x_dims = ctx.Input<Tensor>("X")->dims();
+      auto y_dims = ctx.Input<Tensor>("Y")->dims();
+      int rankdiff = x_dims.size() - y_dims.size();
+      // TODO(jczaja): Remove this when oneDNN performance for scalar
+      // broadcasting
+      // is improved (Ernie large situation)
+      if (rankdiff != 0 && y_dims.size() == 1 && y_dims[0] == 1) {
+        return false;
       }
+
+      return true;
+    };
+
+    if (platform::CanMKLDNNBeUsed(ctx) && CanMKLDNNElementwiseMulBeUsed()) {
+      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+                                     framework::DataLayout::kMKLDNN,
+                                     framework::LibraryType::kMKLDNN);
     }
 #endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());

diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -100,15 +100,7 @@ class ElementwiseOp : public framework::OperatorWithKernel {
     auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
 
 #ifdef PADDLE_WITH_MKLDNN
-    auto CanMKLDNNElementwiseAddBeUsed = [&]() {
-      int axis = ctx.Attr<int>("axis");
-      int rankdiff = ctx.Input<Tensor>("X")->dims().size() -
-                     ctx.Input<Tensor>("Y")->dims().size();
-      return (rankdiff == 0) || (axis == -1) || (axis == rankdiff);
-    };
-
-    if (platform::CanMKLDNNBeUsed(ctx) &&
-        (ctx.Type() != "elementwise_add" || CanMKLDNNElementwiseAddBeUsed())) {
+    if (platform::CanMKLDNNBeUsed(ctx)) {
       return framework::OpKernelType(input_data_type, ctx.GetPlace(),
                                      framework::DataLayout::kMKLDNN,
                                      framework::LibraryType::kMKLDNN);
@@ -148,6 +140,21 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault("");
     AddAttr<std::string>("y_data_format", "This parameter is no longer used.")
         .SetDefault("");
+    /* int8 parameters */
+    AddAttr<bool>("use_quantizer",
+                  "(bool, default false) "
+                  "Set to true for operators that should be quantized and use "
+                  "int8 kernel. Only used on CPU.")
+        .SetDefault(false);
+    AddAttr<float>("Scale_x",
+                   "(float, default 1.0f), The quantize scale of X tensor")
+        .SetDefault(1.0f);
+    AddAttr<float>("Scale_y",
+                   "(float, default 1.0f), The quantize scale of Y tensor")
+        .SetDefault(1.0f);
+    AddAttr<float>("Scale_out",
+                   "(float, default 1.0f), The quantize scale of output data")
+        .SetDefault(1.0f);
     AddOpComment();
   }
 

diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
@@ -1,74 +1,21 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/memory/memcpy.h"
-#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
-#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
-
-#include "paddle/fluid/framework/data_layout_transform.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h"
 
 namespace paddle {
 namespace operators {
-
-using framework::DataLayout;
-using framework::Tensor;
-using mkldnn::memory;
-using mkldnn::primitive;
-using mkldnn::reorder;
-using mkldnn::stream;
-using mkldnn::sum;
-
-template <typename T>
-class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto& dev_ctx =
-        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
-
-    const auto* x = ctx.Input<Tensor>("X");
-    const auto* y = ctx.Input<Tensor>("Y");
-    auto* z = ctx.Output<Tensor>("Out");
-
-    platform::BinaryMKLDNNHandler<T> handler(
-        dev_ctx, mkldnn_engine, ctx.GetPlace(), x, y, z, ctx.OutputName("Out"));
-
-    const auto src_x_memory = handler.AcquireSrcMemory(x);
-    const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
-
-    // For Inplace src and and dst are the same memory object
-    const auto dst_memory =
-        x->IsSharedBufferWith(*z) ? src_x_memory : handler.AcquireDstMemory(z);
-
-    const auto binary_prim = handler.AcquireForwardPrimitive();
-
-    mkldnn::stream astream(mkldnn_engine);
-
-    const std::unordered_map<int, dnnl::memory> args = {
-        {DNNL_ARG_SRC_0, *src_x_memory},
-        {DNNL_ARG_SRC_1, *src_y_memory},
-        {DNNL_ARG_DST, *dst_memory}};
-
-    binary_prim->execute(astream, args);
-    astream.wait();
-
-    z->set_layout(DataLayout::kMKLDNN);
-    z->set_format(platform::GetMKLDNNFormat(*dst_memory));
-  }
-};
-
 template <typename T>
 class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
  public:
@@ -106,8 +53,11 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_KERNEL(elementwise_add, MKLDNN, ::paddle::platform::CPUPlace,
-                   ops::EltwiseAddMKLDNNKernel<float>)
+REGISTER_OP_KERNEL(
+    elementwise_add, MKLDNN, ::paddle::platform::CPUPlace,
+    ops::EltwiseMKLDNNKernel<float, dnnl::algorithm::binary_add>,
+    ops::EltwiseMKLDNNKernel<int8_t, dnnl::algorithm::binary_add>,
+    ops::EltwiseMKLDNNKernel<uint8_t, dnnl::algorithm::binary_add>)
 
 REGISTER_OP_KERNEL(elementwise_add_grad, MKLDNN, ::paddle::platform::CPUPlace,
                    ops::EltwiseAddMKLDNNGradKernel<float>)
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
@@ -0,0 +1,79 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <unordered_map>
+#include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
+#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
+
+#include "paddle/fluid/framework/data_layout_transform.h"
+#include "paddle/fluid/platform/mkldnn_reuse.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::DataLayout;
+using framework::Tensor;
+using mkldnn::memory;
+using mkldnn::primitive;
+using mkldnn::stream;
+
+template <typename T, dnnl::algorithm BINARY_OP>
+class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    const auto& dev_ctx =
+        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
+    const auto& mkldnn_engine = dev_ctx.GetEngine();
+
+    const auto* x = ctx.Input<Tensor>("X");
+    const auto* y = ctx.Input<Tensor>("Y");
+    auto* z = ctx.Output<Tensor>("Out");
+
+    float scale_x = ctx.Attr<float>("Scale_x");
+    float scale_y = ctx.Attr<float>("Scale_y");
+    float scale_o = ctx.Attr<float>("Scale_out");
+
+    int axis = ctx.Attr<int>("axis");
+
+    platform::BinaryMKLDNNHandler<T> handler(
+        BINARY_OP, axis, dev_ctx, mkldnn_engine, ctx.GetPlace(), x, y, z,
+        scale_x, scale_y, scale_o, ctx.OutputName("Out"));
+
+    const auto src_x_memory = handler.AcquireSrcMemory(x);
+    const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
+
+    // For Inplace src and and dst are the same memory object
+    const auto dst_memory =
+        x->IsSharedBufferWith(*z) ? src_x_memory : handler.AcquireDstMemory(z);
+
+    const auto binary_prim = handler.AcquireForwardPrimitive();
+
+    mkldnn::stream astream(mkldnn_engine);
+
+    const std::unordered_map<int, dnnl::memory> args = {
+        {DNNL_ARG_SRC_0, *src_x_memory},
+        {DNNL_ARG_SRC_1, *src_y_memory},
+        {DNNL_ARG_DST, *dst_memory}};
+
+    binary_prim->execute(astream, args);
+    astream.wait();
+
+    z->set_layout(DataLayout::kMKLDNN);
+    z->set_format(platform::GetMKLDNNFormat(*dst_memory));
+  }
+};
+}  // namespace operators
+}  // namespace paddle