PaddlePaddle · Sand3r- · May 8, 2020 · May 19, 2020 · May 19, 2020 · May 19, 2020
@@ -158,6 +158,7 @@ class Tensor {
   void ShareBufferWith(const Tensor& tensor) {
     holder_ = tensor.holder_;
     offset_ = tensor.offset_;
+    type_ = tensor.type_;
   }
 
   bool IsSharedBufferWith(const Tensor& src) const {

diff --git a/paddle/fluid/operators/elementwise/CMakeLists.txt b/paddle/fluid/operators/elementwise/CMakeLists.txt
@@ -4,4 +4,3 @@ register_operators()
 cc_test(test_elementwise_add_op_inplace SRCS test_elementwise_add_op_inplace.cc DEPS op_registry elementwise_add_op scope device_context enforce executor)
 cc_test(test_elementwise_div_grad_grad SRCS test_elementwise_div_grad_grad.cc DEPS op_registry elementwise_div_op scope device_context enforce executor)
 cc_test(test_elementwise_add_grad_grad SRCS test_elementwise_add_grad_grad.cc DEPS op_registry elementwise_add_op scope device_context enforce executor)
-cc_test(test_elementwise_mul_op_correct_dims SRCS test_elementwise_mul_op_dim.cc DEPS op_registry elementwise_mul_op scope device_context enforce executor)
diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h
@@ -28,38 +28,16 @@ class ElementwiseMulOp : public ElementwiseOp {
   using Tensor = framework::Tensor;
   using ElementwiseOp::ElementwiseOp;
 
-#ifdef PADDLE_WITH_MKLDNN
-  static bool AreDimsAndFormatCorrect(const framework::ExecutionContext& ctx,
-                                      int simd_width,
-                                      mkldnn::memory::format_tag x_format) {
-    using Tensor = framework::Tensor;
-    using paddle::framework::vectorize;
-    using mkldnn::memory;
-    auto* x = ctx.Input<Tensor>("X");
-    auto* y = ctx.Input<Tensor>("Y");
-    auto x_dims = vectorize(x->dims());
-    const bool are_dims_divisable = !(x_dims[1] % simd_width);
-    const bool is_x_format_correct = x->format() == x_format;
-    const bool is_y_format_correct = vectorize(y->dims()).size() == 2;
-    return are_dims_divisable && is_x_format_correct && is_y_format_correct;
-  }
-#endif
-
   framework::OpKernelType GetExpectedKernelType(
       const framework::ExecutionContext& ctx) const override {
     auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
 
 #ifdef PADDLE_WITH_MKLDNN
     using mkldnn::memory;
     if (platform::CanMKLDNNBeUsed(ctx)) {
-      bool can_use_avx512_kernel =
-          platform::MayIUse(platform::avx512f) &&
-          AreDimsAndFormatCorrect(ctx, 16, memory::format_tag::nChw16c);
-      if (can_use_avx512_kernel) {
-        return framework::OpKernelType(input_data_type, ctx.GetPlace(),
-                                       framework::DataLayout::kMKLDNN,
-                                       framework::LibraryType::kMKLDNN);
-      }
+      return framework::OpKernelType(input_data_type, ctx.GetPlace(),
+                                     framework::DataLayout::kMKLDNN,
+                                     framework::LibraryType::kMKLDNN);
     }
 #endif
     return framework::OpKernelType(input_data_type, ctx.GetPlace());

diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -148,6 +148,21 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault("");
     AddAttr<std::string>("y_data_format", "This parameter is no longer used.")
         .SetDefault("");
+    /* int8 parameters */
+    AddAttr<bool>("use_quantizer",
+                  "(bool, default false) "
+                  "Set to true for operators that should be quantized and use "
+                  "int8 kernel. Only used on CPU.")
+        .SetDefault(false);
+    AddAttr<float>("Scale_x",
+                   "(float, default 1.0f), The quantize scale of X tensor")
+        .SetDefault(1.0f);
+    AddAttr<float>("Scale_y",
+                   "(float, default 1.0f), The quantize scale of Y tensor")
+        .SetDefault(1.0f);
+    AddAttr<float>("Scale_out",
+                   "(float, default 1.0f), The quantize scale of output data")
+        .SetDefault(1.0f);
     AddOpComment();
   }
 

diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_add_mkldnn_op.cc
@@ -1,74 +1,21 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/memory/memcpy.h"
-#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
-#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
-
-#include "paddle/fluid/framework/data_layout_transform.h"
-#include "paddle/fluid/platform/mkldnn_reuse.h"
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h"
 
 namespace paddle {
 namespace operators {
-
-using framework::DataLayout;
-using framework::Tensor;
-using mkldnn::memory;
-using mkldnn::primitive;
-using mkldnn::reorder;
-using mkldnn::stream;
-using mkldnn::sum;
-
-template <typename T>
-class EltwiseAddMKLDNNKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    const auto& dev_ctx =
-        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
-    const auto& mkldnn_engine = dev_ctx.GetEngine();
-
-    const auto* x = ctx.Input<Tensor>("X");
-    const auto* y = ctx.Input<Tensor>("Y");
-    auto* z = ctx.Output<Tensor>("Out");
-
-    platform::BinaryMKLDNNHandler<T> handler(
-        dev_ctx, mkldnn_engine, ctx.GetPlace(), x, y, z, ctx.OutputName("Out"));
-
-    const auto src_x_memory = handler.AcquireSrcMemory(x);
-    const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
-
-    // For Inplace src and and dst are the same memory object
-    const auto dst_memory =
-        x->IsSharedBufferWith(*z) ? src_x_memory : handler.AcquireDstMemory(z);
-
-    const auto binary_prim = handler.AcquireForwardPrimitive();
-
-    mkldnn::stream astream(mkldnn_engine);
-
-    const std::unordered_map<int, dnnl::memory> args = {
-        {DNNL_ARG_SRC_0, *src_x_memory},
-        {DNNL_ARG_SRC_1, *src_y_memory},
-        {DNNL_ARG_DST, *dst_memory}};
-
-    binary_prim->execute(astream, args);
-    astream.wait();
-
-    z->set_layout(DataLayout::kMKLDNN);
-    z->set_format(platform::GetMKLDNNFormat(*dst_memory));
-  }
-};
-
 template <typename T>
 class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
  public:
@@ -106,8 +53,11 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_KERNEL(elementwise_add, MKLDNN, ::paddle::platform::CPUPlace,
-                   ops::EltwiseAddMKLDNNKernel<float>)
+REGISTER_OP_KERNEL(
+    elementwise_add, MKLDNN, ::paddle::platform::CPUPlace,
+    ops::EltwiseMKLDNNKernel<float, dnnl::algorithm::binary_add>,
+    ops::EltwiseMKLDNNKernel<int8_t, dnnl::algorithm::binary_add>,
+    ops::EltwiseMKLDNNKernel<uint8_t, dnnl::algorithm::binary_add>)
 
 REGISTER_OP_KERNEL(elementwise_add_grad, MKLDNN, ::paddle::platform::CPUPlace,
                    ops::EltwiseAddMKLDNNGradKernel<float>)
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
@@ -0,0 +1,77 @@
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <unordered_map>
+#include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/operators/elementwise/elementwise_add_op.h"
+#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
+
+#include "paddle/fluid/framework/data_layout_transform.h"
+#include "paddle/fluid/platform/mkldnn_reuse.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::DataLayout;
+using framework::Tensor;
+using mkldnn::memory;
+using mkldnn::primitive;
+using mkldnn::stream;
+
+template <typename T, dnnl::algorithm BINARY_OP>
+class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    const auto& dev_ctx =
+        ctx.template device_context<paddle::platform::MKLDNNDeviceContext>();
+    const auto& mkldnn_engine = dev_ctx.GetEngine();
+
+    const auto* x = ctx.Input<Tensor>("X");
+    const auto* y = ctx.Input<Tensor>("Y");
+    auto* z = ctx.Output<Tensor>("Out");
+
+    float scale_x = ctx.Attr<float>("Scale_x");
+    float scale_y = ctx.Attr<float>("Scale_y");
+    float scale_o = ctx.Attr<float>("Scale_out");
+
+    platform::BinaryMKLDNNHandler<T> handler(
+        BINARY_OP, dev_ctx, mkldnn_engine, ctx.GetPlace(), x, y, z, scale_x,
+        scale_y, scale_o, ctx.OutputName("Out"));
+
+    const auto src_x_memory = handler.AcquireSrcMemory(x);
+    const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
+
+    // For Inplace src and and dst are the same memory object
+    const auto dst_memory =
+        x->IsSharedBufferWith(*z) ? src_x_memory : handler.AcquireDstMemory(z);
+
+    const auto binary_prim = handler.AcquireForwardPrimitive();
+
+    mkldnn::stream astream(mkldnn_engine);
+
+    const std::unordered_map<int, dnnl::memory> args = {
+        {DNNL_ARG_SRC_0, *src_x_memory},
+        {DNNL_ARG_SRC_1, *src_y_memory},
+        {DNNL_ARG_DST, *dst_memory}};
+
+    binary_prim->execute(astream, args);
+    astream.wait();
+
+    z->set_layout(DataLayout::kMKLDNN);
+    z->set_format(platform::GetMKLDNNFormat(*dst_memory));
+  }
+};
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,94 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <mkldnn/include/mkldnn.hpp>
-#include "paddle/fluid/operators/elementwise/elementwise_op.h"
-#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
-
-#include "paddle/fluid/operators/jit/kernels.h"
-#include "paddle/fluid/platform/cpu_info.h"
-#include "paddle/fluid/platform/mkldnn_helper.h"
-
-#ifdef PADDLE_WITH_XBYAK
-#include "xbyak/xbyak.h"
-#include "xbyak/xbyak_util.h"
-#endif
-
-namespace paddle {
-namespace operators {
-
-using framework::DataLayout;
-using mkldnn::memory;
-using platform::StringToMKLDNNFormat;
-
-template <typename T>
-static void ComputeBroadcastedMultiply(const T* x_data, const T* y_data,
-                                       T* z_data, int64_t n, int64_t c,
-                                       int64_t h, int64_t w, int simd_width,
-                                       void (*multiply)(const T*, const T*, T*,
-                                                        int, int)) {
-  const int64_t C = c / simd_width;
-#pragma omp parallel for collapse(2)
-  for (int ni = 0; ni < n; ni++) {
-    for (int ci = 0; ci < C; ci++) {
-      auto ptr_x =
-          x_data + ni * C * h * w * simd_width + ci * h * w * simd_width;
-
-      auto ptr_y = y_data + ni * C * simd_width + ci * simd_width;
-      auto ptr_z =
-          z_data + ni * C * h * w * simd_width + ci * h * w * simd_width;
-
-      multiply(ptr_x, ptr_y, ptr_z, h, w);
-    }
-  }
-}
-
-template <typename T>
-class ElementwiseMulMKLDNNKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    using Tensor = framework::Tensor;
-
-    int axis = ctx.Attr<int>("axis");
-    auto* x = ctx.Input<Tensor>("X");
-    auto* y = ctx.Input<Tensor>("Y");
-    auto* z = ctx.Output<Tensor>("Out");
-    const T* x_data = x->data<T>();
-    const T* y_data = y->data<T>();
-    T* z_data = z->mutable_data<T>(ctx.GetPlace());
-
-    auto x_dims = x->dims();
-    auto y_dims_untrimmed = y->dims();
-    auto x_int_dims = paddle::framework::vectorize<int64_t>(x_dims);
-
-    int pre, num, post, is_run_common_broadcast;
-    get_mid_dims(x_dims, y_dims_untrimmed, axis, &pre, &num, &post,
-                 &is_run_common_broadcast);
-
-    if (post == 1)
-      PADDLE_THROW(
-          platform::errors::Unimplemented("Not implemented when post is 1."));
-
-    const int64_t n = x_dims[0];
-    const int64_t c = x_dims[1];
-    const int64_t h = x_dims[2];
-    const int64_t w = x_dims[3];
-
-    const int simd_width = 16;
-    auto multiply =
-        jit::KernelFuncs<jit::NCHW16CMulNCTuple<T>, platform::CPUPlace>::Cache()
-            .At(0);
-    ComputeBroadcastedMultiply(x_data, y_data, z_data, n, c, h, w, simd_width,
-                               multiply);
-
-    z->set_layout(DataLayout::kMKLDNN);
-    z->set_format(x->format());
-  }
-};
-}  // namespace operators
-}  // namespace paddle
+#include "paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h"
 
 namespace ops = paddle::operators;
 
-REGISTER_OP_KERNEL(elementwise_mul, MKLDNN, ::paddle::platform::CPUPlace,
-                   ops::ElementwiseMulMKLDNNKernel<float>)
+REGISTER_OP_KERNEL(
+    elementwise_mul, MKLDNN, ::paddle::platform::CPUPlace,
+    ops::EltwiseMKLDNNKernel<float, dnnl::algorithm::binary_mul>,
+    ops::EltwiseMKLDNNKernel<int8_t, dnnl::algorithm::binary_mul>,
+    ops::EltwiseMKLDNNKernel<uint8_t, dnnl::algorithm::binary_mul>)