- modified UT

PaddlePaddle · Aug 5, 2021 · 98270c1 · 98270c1
1 parent 2b24a80
commit 98270c1
Show file tree

Hide file tree

Showing 7 changed files with 220 additions and 241 deletions.
diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mkldnn_op.h
@@ -47,7 +47,9 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
     float scale_o = ctx.Attr<float>("Scale_out");
     int axis = ctx.Attr<int>("axis");
 
-    platform::BinaryMKLDNNHandler<T> handler( BINARY_OP, axis, mkldnn_engine, ctx.GetPlace(), x, y, z, scale_x, scale_y, scale_o);
+    platform::BinaryMKLDNNHandler<T> handler(BINARY_OP, axis, mkldnn_engine,
+                                             ctx.GetPlace(), x, y, z, scale_x,
+                                             scale_y, scale_o);
 
     const auto src_x_memory = handler.AcquireSrcMemory(x);
     const auto src_y_memory = handler.AcquireSecondSrcMemory(y);

diff --git a/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc b/paddle/fluid/operators/elementwise/mkldnn/elementwise_mul_mkldnn_op.cc
@@ -48,8 +48,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
     if (dx) {
       // dx = dout*y
       platform::BinaryMKLDNNHandler<T> handler(
-          dnnl::algorithm::binary_mul, axis, mkldnn_engine,
-          ctx.GetPlace(), dout, y, dx, 1.0f, 1.0f, 1.0f);
+          dnnl::algorithm::binary_mul, axis, mkldnn_engine, ctx.GetPlace(),
+          dout, y, dx, 1.0f, 1.0f, 1.0f);
 
       const auto src_dout_memory = handler.AcquireSrcMemory(dout);
       const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
@@ -74,8 +74,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
       // Handler is having nullptr passed instead of output tensor as
       // we want Dst buffer to be allocated by oneDNN not to use Tensor
       platform::BinaryMKLDNNHandler<T> handler(
-          dnnl::algorithm::binary_mul, axis, mkldnn_engine,
-          ctx.GetPlace(), dout, x, nullptr, 1.0f, 1.0f, 1.0f);
+          dnnl::algorithm::binary_mul, axis, mkldnn_engine, ctx.GetPlace(),
+          dout, x, nullptr, 1.0f, 1.0f, 1.0f);
 
       const auto src_dout_memory = handler.AcquireSrcMemory(dout);
       const auto src_x_memory = handler.AcquireSecondSrcMemory(x);

diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
@@ -79,14 +79,15 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
                     paddle::platform::errors::PreconditionNotMet(
                         "Operator DNNL eletwise_forward must use CPUPlace"));
   auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
-  const auto& mkldnn_engine = dev_ctx.GetEngine();
+  const auto &mkldnn_engine = dev_ctx.GetEngine();
 
   const auto *x = ctx.Input<Tensor>("X");
   auto *y = ctx.Output<Tensor>("Out");
 
   bool is_inplaced = x->IsSharedBufferWith(*y);
 
-  platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, mkldnn_engine, ctx.GetPlace(), x);
+  platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, mkldnn_engine,
+                                               ctx.GetPlace(), x);
 
   auto src_memory_p = handler.AcquireSrcMemory(x);
   auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(y);
@@ -105,14 +106,14 @@ template <typename T>
 void eltwise_grad(const framework::ExecutionContext &ctx,
                   mkldnn::algorithm algorithm) {
   auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
-  const auto& mkldnn_engine = dev_ctx.GetEngine();
+  const auto &mkldnn_engine = dev_ctx.GetEngine();
 
   const auto *x = ctx.Input<Tensor>("X");
   const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
   auto *diff_x = ctx.Output<Tensor>(framework::GradVarName("X"));
 
-  platform::ActivationMKLDNNHandler<T> handler(
-      algorithm, ctx, mkldnn_engine, ctx.GetPlace(), x, diff_y);
+  platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, mkldnn_engine,
+                                               ctx.GetPlace(), x, diff_y);
 
   auto src_memory_p = handler.AcquireBackwardSrcMemory(x);
   auto diff_dst_memory_p = handler.AcquireDiffDstMemory(diff_y);

diff --git a/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc
@@ -37,10 +37,12 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
     bool is_inplaced = x->IsSharedBufferWith(*out);
 
     platform::ActivationMKLDNNHandler<T> handler(
-        mkldnn::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(), x);
+        mkldnn::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(),
+        x);
 
     auto src_memory_p = handler.AcquireSrcMemory(x);
-    auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
+    auto dst_memory_p =
+        is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
     auto activation_p = handler.AcquireForwardPrimitive();
 
     auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();

diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
@@ -33,12 +33,13 @@ using platform::to_void_cast;
 template <typename T>
 class SoftmaxMKLDNNHandler
     : public platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
-                                      mkldnn::softmax_backward> {
+                                               mkldnn::softmax_backward> {
  public:
   SoftmaxMKLDNNHandler(const mkldnn::engine mkldnn_engine,
                        platform::Place cpu_place, const Tensor* input,
                        Tensor* output, const int axis)
-      : platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward, mkldnn::softmax_backward>(
+      : platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
+                                          mkldnn::softmax_backward>(
             mkldnn_engine, cpu_place) {
     PADDLE_ENFORCE_EQ(
         input->dims(), output->dims(),
@@ -49,7 +50,8 @@ class SoftmaxMKLDNNHandler
     auto md = memory::desc(softmax_tz, platform::MKLDNNGetDataType<T>(),
                            input->format());
 
-    this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md, axis);
+    this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md,
+                                            axis);
   }
 
   SoftmaxMKLDNNHandler(const framework::ExecutionContext& ctx,
@@ -58,25 +60,26 @@ class SoftmaxMKLDNNHandler
                        const Tensor* out_grad, Tensor* in_x_grad,
                        const std::string& unique_name)
       : platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
-                                 mkldnn::softmax_backward>(mkldnn_engine, cpu_place) {
-      PADDLE_ENFORCE_EQ(
-          out_grad->dims(), in_x_grad->dims(),
-          platform::errors::InvalidArgument("The shape of softmax_grad's input "
-                                            "and output must be identical."));
-
-      auto dims = out_grad->dims();  // input and output share the same shape
-      const int axis = CanonicalAxis(ctx.Attr<int>("axis"), dims.size());
-      auto softmax_tz = framework::vectorize<int64_t>(dims);
-
-      auto data_softmax_md = MKLDNNMemDesc(
-          softmax_tz, platform::MKLDNNGetDataType<T>(), out->format());
-      auto diff_softmax_md = MKLDNNMemDesc(
-          softmax_tz, platform::MKLDNNGetDataType<T>(), out_grad->format());
-
-      this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring,
-                                              data_softmax_md, axis);
-      this->AcquireBackwardPrimitiveDescriptor(diff_softmax_md, data_softmax_md,
-                                               axis);
+                                          mkldnn::softmax_backward>(
+            mkldnn_engine, cpu_place) {
+    PADDLE_ENFORCE_EQ(
+        out_grad->dims(), in_x_grad->dims(),
+        platform::errors::InvalidArgument("The shape of softmax_grad's input "
+                                          "and output must be identical."));
+
+    auto dims = out_grad->dims();  // input and output share the same shape
+    const int axis = CanonicalAxis(ctx.Attr<int>("axis"), dims.size());
+    auto softmax_tz = framework::vectorize<int64_t>(dims);
+
+    auto data_softmax_md = MKLDNNMemDesc(
+        softmax_tz, platform::MKLDNNGetDataType<T>(), out->format());
+    auto diff_softmax_md = MKLDNNMemDesc(
+        softmax_tz, platform::MKLDNNGetDataType<T>(), out_grad->format());
+
+    this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring,
+                                            data_softmax_md, axis);
+    this->AcquireBackwardPrimitiveDescriptor(diff_softmax_md, data_softmax_md,
+                                             axis);
   }
 };
 
@@ -93,7 +96,8 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {
 
     const int axis = CanonicalAxis(ctx.Attr<int>("axis"), input->dims().size());
 
-    SoftmaxMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), input, output, axis);
+    SoftmaxMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), input,
+                                    output, axis);
 
     auto softmax_src_memory_p = handler.AcquireSrcMemory(input);
     // For Inplace src and and dst are the same memory object

diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
@@ -70,11 +70,16 @@ void RunOperator(const platform::Place &place, const std::string &op_type,
 
   std::map<const std::string, int> num_inputs = {{"softmax", 1},
                                                  {"relu", 1},
+                                                 {"conv2d", 2},
                                                  {"elementwise_add", 2},
                                                  {"elementwise_mul", 2}};
 
   std::string first_input = inplace == true ? output_name : "x";
 
+  std::string first_input_var_name = (op_type == "conv2d") ? "Input" : "X";
+  std::string second_input_var_name = (op_type == "conv2d") ? "Filter" : "Y";
+  std::string output_var_name = (op_type == "conv2d") ? "Output" : "Out";
+
   std::vector<InputVars> input_names = {
       {first_input, scope.Var(first_input)->GetMutable<framework::LoDTensor>()},
       {"x1", num_inputs[op_type] > 1
@@ -113,68 +118,37 @@ void RunOperator(const platform::Place &place, const std::string &op_type,
 
   auto &pool = platform::DeviceContextPool::Instance();
 
-  auto op = num_inputs[op_type] > 1
-                ? framework::OpRegistry::CreateOp(
-                      op_type, {{"X", {first_input}}, {"Y", {"x1"}}},
-                      {{"Out", {output_name}}}, {{"use_mkldnn", {true}}})
-                : framework::OpRegistry::CreateOp(
-                      op_type, {{"X", {first_input}}}, {{"Out", {output_name}}},
-                      {{"use_mkldnn", {true}}});
+  auto op =
+      num_inputs[op_type] > 1
+          ? framework::OpRegistry::CreateOp(
+                op_type, {{first_input_var_name, {first_input}},
+                          {second_input_var_name, {"x1"}}},
+                {{output_var_name, {output_name}}}, {{"use_mkldnn", {true}}})
+          : framework::OpRegistry::CreateOp(
+                op_type, {{first_input_var_name, {first_input}}},
+                {{output_var_name, {output_name}}}, {{"use_mkldnn", {true}}});
 
   op->Run(scope, place);
   pool.Get(place)->Wait();
 }
 
 TEST(test_softmax_reuse_cache, cpu_place) {
-  framework::DDim dims({32, 64});
+  framework::DDim dims({1, 16, 32, 64});
   platform::CPUPlace p;
   CacheTester ct;
-  RunOperator<float>(p, "softmax", dims, "softmax_out");
-  RunOperator<float>(p, "softmax", dims, "softmax_out");
+  RunOperator<float>(p, "conv2d", dims, "conv_out");
+  RunOperator<float>(p, "conv2d", dims, "conv_out");
   PADDLE_ENFORCE_EQ(ct.Analyze(4), true,
                     platform::errors::InvalidArgument(
                         "Wrong number of cached oneDNN objects"));
 }
 
 TEST(test_softmax_noreuse_cache, cpu_place) {
-  framework::DDim dims({32, 64});
-  platform::CPUPlace p;
-  CacheTester ct;
-  RunOperator<float>(p, "softmax", dims, "softmax_out");
-  RunOperator<float>(p, "softmax", dims, "softmax_out2");
-  PADDLE_ENFORCE_EQ(ct.Analyze(8), true,
-                    platform::errors::InvalidArgument(
-                        "Wrong number of cached oneDNN objects"));
-}
-
-TEST(test_softmax_inplace_cache, cpu_place) {
-  framework::DDim dims({32, 64});
-  platform::CPUPlace p;
-  CacheTester ct;
-  RunOperator<float>(p, "softmax", dims, "softmax_out");
-  RunOperator<float>(p, "softmax", dims, "softmax_out", true);
-  PADDLE_ENFORCE_EQ(ct.Analyze(7), true,
-                    platform::errors::InvalidArgument(
-                        "Wrong number of cached oneDNN objects"));
-}
-
-TEST(test_relu_inplace_cache, cpu_place) {
-  framework::DDim dims({32, 64});
-  platform::CPUPlace p;
-  CacheTester ct;
-  RunOperator<float>(p, "relu", dims, "relu_out");
-  RunOperator<float>(p, "relu", dims, "relu_out", true);
-  PADDLE_ENFORCE_EQ(ct.Analyze(7), true,
-                    platform::errors::InvalidArgument(
-                        "Wrong number of cached oneDNN objects"));
-}
-
-TEST(test_elementwise_add_reuse_cache, cpu_place) {
-  framework::DDim dims({32, 64});
+  framework::DDim dims({1, 16, 32, 64});
   platform::CPUPlace p;
   CacheTester ct;
-  RunOperator<float>(p, "elementwise_add", dims, "elementwise_add_out");
-  RunOperator<float>(p, "relu", dims, "elementwise_add_out", true);
+  RunOperator<float>(p, "conv2d", dims, "conv_out");
+  RunOperator<float>(p, "conv2d", dims, "conv_out2");
   PADDLE_ENFORCE_EQ(ct.Analyze(8), true,
                     platform::errors::InvalidArgument(
                         "Wrong number of cached oneDNN objects"));