From e78f3d95762dd623513e89bd6c2f0f95286fd10d Mon Sep 17 00:00:00 2001 From: Jakub Piasecki Date: Tue, 20 Jul 2021 18:54:15 +0200 Subject: [PATCH 1/7] added expand_v2 bf16/fp32 kernel --- paddle/fluid/operators/expand_op.cc | 14 +- paddle/fluid/operators/expand_v2_op.cc | 36 +++- .../operators/mkldnn/expand_v2_mkldnn_op.cc | 166 ++++++++++++++++++ .../reduce_ops/mkldnn/reduce_mkldnn_op.h | 12 +- paddle/fluid/platform/mkldnn_reuse.h | 39 ++-- 5 files changed, 226 insertions(+), 41 deletions(-) create mode 100644 paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index e7da08ff27711..02389c9baa537 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -75,9 +75,17 @@ class ExpandOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.device_context()); + auto input_data_type = + framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); + +#ifdef PADDLE_WITH_MKLDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } framework::OpKernelType GetKernelTypeForVar( diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index 618c1560c5eac..e6613453c2f59 100644 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -89,9 +89,17 @@ class ExpandV2Op : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType( - OperatorWithKernel::IndicateVarDataType(ctx, "X"), - ctx.device_context()); + auto input_data_type = + framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); + +#ifdef PADDLE_WITH_MKLDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } framework::OpKernelType GetKernelTypeForVar( @@ -130,6 +138,14 @@ class ExpandV2OpMaker : public framework::OpProtoAndCheckerMaker { "the corresponding value given by Attr(expand_times)."); AddAttr>("shape", "The expanded shape for each dimension.") .SetDefault({}); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); + AddAttr( + "mkldnn_data_type", + "(string, default \"float32\"). Data type of mkldnn kernel") + .SetDefault("float32") + .InEnum({"float32", "bfloat16"}); AddComment(R"DOC( Expand the input to the given shape. The rank of X should be in [1, 6] and size of 'shape' must be in [1, 6] also. @@ -200,9 +216,17 @@ class ExpandV2GradOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType( - ctx, framework::GradVarName("Out")), - ctx.device_context()); + auto input_data_type = + framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); + +#ifdef PADDLE_WITH_MKLDNN + if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); } framework::OpKernelType GetKernelTypeForVar( diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc new file mode 100644 index 0000000000000..6e3c46a0588e4 --- /dev/null +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -0,0 +1,166 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/mkldnn_reuse.h" + +namespace paddle { +namespace operators { + +using paddle::framework::Tensor; + +template +class ExpandMKLDNNKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + this->RunKernel(ctx); + } + + void RunKernel(const framework::ExecutionContext& ctx) const { + const auto& dev_ctx = + ctx.template device_context(); + const auto& onednn_engine = dev_ctx.GetEngine(); + + const auto* x = ctx.Input("X"); + auto* out = ctx.Output("Out"); + + auto x_vec_dims = framework::vectorize(x->dims()); + auto out_vec_dims = framework::vectorize(out->dims()); + + dnnl::memory::format_tag x_format_tag = x->format(); + if (x_vec_dims.size() != out_vec_dims.size()) { + x_format_tag = + GetExtendedFormatTag(x_vec_dims, out_vec_dims.size(), x_format_tag); + } + + out->set_format(x_format_tag); + + platform::BroadcastDataMKLDNNHandler handler( + dnnl::algorithm::binary_add, dev_ctx, onednn_engine, ctx.GetPlace(), + out, x, 0.0f, 1.0f, ctx.InputName("X"), x_vec_dims); + + auto src_memory_p = handler.AcquireSrcMemory(x); + auto dst_memory_p = handler.AcquireDstMemory(out); + auto binary_p = handler.AcquireForwardPrimitive(); + + const std::unordered_map args = { + {DNNL_ARG_SRC_0, *dst_memory_p}, + {DNNL_ARG_SRC_1, *src_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}}; + + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + binary_p->execute(astream, args); + astream.wait(); + + out->set_layout(framework::DataLayout::kMKLDNN); + out->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); + } + + private: + dnnl::memory::format_tag GetExtendedFormatTag( + std::vector& dims, int new_size, + mkldnn::memory::format_tag format_tag) const { + mkldnn::memory::desc md(dims, platform::MKLDNNGetDataType(), format_tag); + std::vector new_dims(new_size, 1); + std::copy(dims.begin(), dims.end(), + new_dims.begin() + new_size - dims.size()); + + dims = std::move(new_dims); + return platform::GetMKLDNNFormat(md.reshape(dims)); + } +}; + +template +class ExpandGradMKLDNNKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + this->RunKernel(ctx); + } + + void RunKernel(const framework::ExecutionContext& ctx) const { + const auto& dev_ctx = + ctx.template device_context(); + const auto& onednn_engine = dev_ctx.GetEngine(); + + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + + auto dx_vec_dims = framework::vectorize(dx->dims()); + auto dout_vec_dims = framework::vectorize(dout->dims()); + + dnnl::memory::format_tag dx_format_tag = dout->format(); + if (dx_vec_dims.size() != dout_vec_dims.size()) { + dx_vec_dims.insert(dx_vec_dims.begin(), + dout_vec_dims.size() - dx_vec_dims.size(), 1); + // dx_format_tag = GetExtendedFormatTag(x_vec_dims, out_vec_dims.size(), + // dx_format_tag); + } + + // out->set_format(x_format_tag); + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + if (dout_vec_dims == dx_vec_dims) { + mkldnn::memory::data_type dout_type = + framework::ToMKLDNNDataType(dout->type()); + std::string key = platform::CreateKey( + dev_ctx, dout_vec_dims, dout->format(), dout->format(), dout_type); + platform::ReorderMKLDNNHandler reorder_handler( + dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key); + + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( + dout->format(), platform::to_void_cast(dout->data())); + + auto reorder_dst_memory_p = + reorder_handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace()); + + auto reorder_p = reorder_handler.AcquireReorder(reorder_src_memory_p, + reorder_dst_memory_p); + + reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); + astream.wait(); + + dx->set_layout(framework::DataLayout::kMKLDNN); + dx->set_format( + platform::GetMKLDNNFormat(reorder_dst_memory_p->get_desc())); + } else { + platform::ReductionMKLDNNHandler handler( + dnnl::algorithm::reduction_sum, 0.0f, 0.0f, dev_ctx, onednn_engine, + ctx.GetPlace(), dout, dx, ctx.InputName("X"), dx_vec_dims); + + auto src_memory_p = handler.AcquireSrcMemory(dout); + auto dst_memory_p = handler.AcquireDstMemory(dx); + + std::unordered_map reduction_args = { + {DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}}; + + auto reduction_p = handler.AcquireForwardPrimitive(); + + reduction_p->execute(astream, reduction_args); + astream.wait(); + dx->set_layout(framework::DataLayout::kMKLDNN); + dx->set_format(platform::GetMKLDNNFormat(dst_memory_p->get_desc().reshape( + paddle::framework::vectorize(dx->dims())))); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_KERNEL(expand_v2, MKLDNN, paddle::platform::CPUPlace, + ops::ExpandMKLDNNKernel, + ops::ExpandMKLDNNKernel); + +REGISTER_OP_KERNEL(expand_v2_grad, MKLDNN, paddle::platform::CPUPlace, + ops::ExpandGradMKLDNNKernel, + ops::ExpandGradMKLDNNKernel); \ No newline at end of file diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h index 40cd3ba974f04..6a9aae046f386 100644 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h +++ b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h @@ -165,23 +165,21 @@ class ReduceGradMKLDNNKernel : public framework::OpKernel { x_format_tag = getPlainFormatTag(output_dx); } - output_dx->mutable_data(ctx.GetPlace()); output_dx->set_format(x_format_tag); - output_dx->set_layout(input_dy->layout()); platform::BroadcastDataMKLDNNHandler handler( binary_type, dev_ctx, onednn_engine, ctx.GetPlace(), output_dx, input_dy, scale_x, scale_y, ctx.InputName(framework::GradVarName("Out")), input_dims); - const auto src_dx_memory = handler.AcquireSrcMemory(output_dx); - const auto src_dy_memory = handler.AcquireSecondSrcMemory(input_dy); + const auto src_memory_p = handler.AcquireSrcMemory(input_dy); + const auto dst_memory_p = handler.AcquireDstMemory(output_dx); const auto binary_prim = handler.AcquireForwardPrimitive(); const std::unordered_map args = { - {DNNL_ARG_SRC_0, *src_dx_memory}, - {DNNL_ARG_SRC_1, *src_dy_memory}, - {DNNL_ARG_DST, *src_dx_memory}}; + {DNNL_ARG_SRC_0, *dst_memory_p}, + {DNNL_ARG_SRC_1, *src_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}}; auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); binary_prim->execute(astream, args); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 58622fb2529b8..81cd2793f376d 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -695,8 +695,8 @@ class BroadcastDataMKLDNNHandler BroadcastDataMKLDNNHandler(const dnnl::algorithm algo, const MKLDNNDeviceContext& dev_ctx, const mkldnn::engine engine, - platform::Place cpu_place, const Tensor* x, - const Tensor* y, float scale_x, float scale_y, + platform::Place cpu_place, const Tensor* out, + const Tensor* x, float scale_x, float scale_y, const std::string& uniq_name, const std::vector& input_dims) : platform::MKLDNNHandlerT( @@ -711,19 +711,12 @@ class BroadcastDataMKLDNNHandler x->format(), MKLDNNMemoryFormat::undef, platform::errors::InvalidArgument("Wrong format set for X tensor.")); - PADDLE_ENFORCE_EQ( - y->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument("Wrong layout set for Y tensor.")); - PADDLE_ENFORCE_NE( - y->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument("Wrong format set for Y tensor.")); - - const auto src0_tz = framework::vectorize(x->dims()); + const auto src0_tz = framework::vectorize(out->dims()); const auto src0_md = dnnl::memory::desc( - src0_tz, platform::MKLDNNGetDataType(), x->format()); + src0_tz, platform::MKLDNNGetDataType(), out->format()); const auto src1_md = dnnl::memory::desc( - input_dims, platform::MKLDNNGetDataType(), x->format()); + input_dims, platform::MKLDNNGetDataType(), out->format()); dnnl::primitive_attr attributes; attributes.set_scales(DNNL_ARG_SRC_0, 0, {scale_x}); @@ -733,19 +726,15 @@ class BroadcastDataMKLDNNHandler src1_md, src0_md); } } - - std::shared_ptr AcquireSrcMemory(framework::Tensor* input) { - T* input_data = input->data(); - memset(input_data, 0, this->fwd_pd_->src_desc().get_size()); - return this->AcquireMemoryFromPrimitive( - this->fwd_pd_->src_desc(), to_void_cast(input_data), "@src0_mem_p"); - } - - std::shared_ptr AcquireSecondSrcMemory( - const framework::Tensor* input) { - const T* input_data = input->data(); - return this->AcquireMemoryFromPrimitive( - this->fwd_pd_->src1_desc(), to_void_cast(input_data), "@src1_mem_p"); + + template + std::shared_ptr AcquireDstMemory(framework::Tensor* output) { + T_out* ptr = output->mutable_data( + this->place_, this->fwd_pd_->dst_desc().get_size()); + ; + memset(ptr, 0, this->fwd_pd_->dst_desc().get_size()); + return this->AcquireMemoryFromPrimitive(this->fwd_pd_->dst_desc(), ptr, + "@dst_mem_p"); } }; From a40c25d957e2ce594026dc5f571d5f3b81801ec6 Mon Sep 17 00:00:00 2001 From: Jakub Piasecki Date: Tue, 20 Jul 2021 19:00:30 +0200 Subject: [PATCH 2/7] minor change --- paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc index 6e3c46a0588e4..a5969d0e44402 100644 --- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -102,11 +102,8 @@ class ExpandGradMKLDNNKernel : public framework::OpKernel { if (dx_vec_dims.size() != dout_vec_dims.size()) { dx_vec_dims.insert(dx_vec_dims.begin(), dout_vec_dims.size() - dx_vec_dims.size(), 1); - // dx_format_tag = GetExtendedFormatTag(x_vec_dims, out_vec_dims.size(), - // dx_format_tag); } - // out->set_format(x_format_tag); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); if (dout_vec_dims == dx_vec_dims) { mkldnn::memory::data_type dout_type = From b9b34fe306f3c42e26d349ffe775f4bdfa8a092e Mon Sep 17 00:00:00 2001 From: Jakub Piasecki Date: Wed, 21 Jul 2021 14:40:03 +0200 Subject: [PATCH 3/7] CI fix --- paddle/fluid/operators/expand_op.cc | 14 +++----------- paddle/fluid/operators/expand_v2_op.cc | 4 ++-- .../fluid/operators/mkldnn/expand_v2_mkldnn_op.cc | 3 +-- paddle/fluid/platform/mkldnn_reuse.h | 2 +- 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index 02389c9baa537..e7da08ff27711 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -75,17 +75,9 @@ class ExpandOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto input_data_type = - framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); - -#ifdef PADDLE_WITH_MKLDNN - if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { - return framework::OpKernelType(input_data_type, ctx.GetPlace(), - framework::DataLayout::kMKLDNN, - framework::LibraryType::kMKLDNN); - } -#endif - return framework::OpKernelType(input_data_type, ctx.GetPlace()); + return framework::OpKernelType( + OperatorWithKernel::IndicateVarDataType(ctx, "X"), + ctx.device_context()); } framework::OpKernelType GetKernelTypeForVar( diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index e6613453c2f59..3c2b939e79957 100644 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -216,8 +216,8 @@ class ExpandV2GradOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { - auto input_data_type = - framework::OperatorWithKernel::IndicateVarDataType(ctx, "X"); + auto input_data_type = framework::OperatorWithKernel::IndicateVarDataType( + ctx, framework::GradVarName("Out")); #ifdef PADDLE_WITH_MKLDNN if (this->CanMKLDNNBeUsed(ctx, input_data_type)) { diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc index a5969d0e44402..efbeaa78941f3 100644 --- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -98,7 +98,6 @@ class ExpandGradMKLDNNKernel : public framework::OpKernel { auto dx_vec_dims = framework::vectorize(dx->dims()); auto dout_vec_dims = framework::vectorize(dout->dims()); - dnnl::memory::format_tag dx_format_tag = dout->format(); if (dx_vec_dims.size() != dout_vec_dims.size()) { dx_vec_dims.insert(dx_vec_dims.begin(), dout_vec_dims.size() - dx_vec_dims.size(), 1); @@ -160,4 +159,4 @@ REGISTER_OP_KERNEL(expand_v2, MKLDNN, paddle::platform::CPUPlace, REGISTER_OP_KERNEL(expand_v2_grad, MKLDNN, paddle::platform::CPUPlace, ops::ExpandGradMKLDNNKernel, - ops::ExpandGradMKLDNNKernel); \ No newline at end of file + ops::ExpandGradMKLDNNKernel); diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 81cd2793f376d..f63d45d7ff6ae 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -726,7 +726,7 @@ class BroadcastDataMKLDNNHandler src1_md, src0_md); } } - + template std::shared_ptr AcquireDstMemory(framework::Tensor* output) { T_out* ptr = output->mutable_data( From 3bd6438fe1bcdb405bcc197e086fce5b31af8e1f Mon Sep 17 00:00:00 2001 From: Jakub Piasecki Date: Fri, 23 Jul 2021 18:20:10 +0200 Subject: [PATCH 4/7] added missing test file --- .../mkldnn/test_expand_v2_mkldnn_op.py | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py new file mode 100644 index 0000000000000..88b98a1fdca8e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py @@ -0,0 +1,103 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.fluid import compiler, Program, program_guard, core +from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_float_to_uint16 + + +class TestExpandV2OneDNNOp(OpTest): + def setUp(self): + self.op_type = "expand_v2" + self.init_data() + self.x = np.random.random(self.ori_shape).astype("float32") + self.set_inputs() + self.attrs = {'shape': self.shape, 'use_mkldnn': True} + output = np.tile(self.inputs['X'], self.expand_times) + self.outputs = {'Out': output} + + def set_inputs(self): + self.inputs = {'X': self.x} + + def init_data(self): + self.ori_shape = [1, 140] + self.shape = [12, 140] + self.expand_times = [12, 1] + + def test_check_output(self): + self.check_output_with_place(core.CPUPlace()) + + def test_check_grad(self): + self.check_grad_with_place(core.CPUPlace(), ["X"], "Out") + + +class TestExpandV2ExpandDimOneDNNOp(TestExpandV2OneDNNOp): + def init_data(self): + self.ori_shape = [120] + self.shape = [2, 120] + self.expand_times = [2, 1] + +class TestExpandV2CopyScenarioOneDNNOp(TestExpandV2OneDNNOp): + def init_data(self): + self.ori_shape = (2, 10, 5) + self.shape = (2, 10, 5) + self.expand_times = (1, 1, 1) + +class TestExpandV2CopyScenarioShapeNotGivenOneDNNOp(TestExpandV2OneDNNOp): + def init_data(self): + self.ori_shape = (2, 4, 5, 7) + self.shape = (-1, -1, -1, -1) + self.expand_times = (1, 1, 1, 1) + + +# BF16 TESTS +def create_expand_v2_bf16_test_class(parent): + @OpTestTool.skip_if_not_cpu_bf16() + class TestExpandV2BF16OneDNNOp(parent): + def set_inputs(self): + self.inputs = {"X": convert_float_to_uint16(self.x)} + + def calculate_grads(self): + self.dout = self.outputs['Out'] + self.dx = self.dout.copy() + + for i in range (len(self.shape)): + if self.expand_times[i] != 1: + self.dx = np.sum(self.dx, axis=i, keepdims=True) + + def test_check_grad(self): + self.calculate_grads() + self.check_grad_with_place( + core.CPUPlace(), ["X"], + "Out", + user_defined_grads=[convert_float_to_uint16(self.dx)], + user_defined_grad_outputs=[convert_float_to_uint16(self.dout)]) + + cls_name = "{0}_{1}".format(parent.__name__, "Expand_v2_BF16") + TestExpandV2BF16OneDNNOp.__name__ = cls_name + globals()[cls_name] = TestExpandV2BF16OneDNNOp + + +create_expand_v2_bf16_test_class(TestExpandV2OneDNNOp) +create_expand_v2_bf16_test_class(TestExpandV2ExpandDimOneDNNOp) +create_expand_v2_bf16_test_class(TestExpandV2CopyScenarioOneDNNOp) +create_expand_v2_bf16_test_class(TestExpandV2CopyScenarioShapeNotGivenOneDNNOp) + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() From 0b7247fdfdd0f010bbf43cc3f40f5585f4b9dcec Mon Sep 17 00:00:00 2001 From: Jakub Piasecki Date: Fri, 23 Jul 2021 18:49:33 +0200 Subject: [PATCH 5/7] added formatting --- .../fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py index 88b98a1fdca8e..eceef665a0304 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py @@ -52,12 +52,14 @@ def init_data(self): self.shape = [2, 120] self.expand_times = [2, 1] + class TestExpandV2CopyScenarioOneDNNOp(TestExpandV2OneDNNOp): def init_data(self): self.ori_shape = (2, 10, 5) self.shape = (2, 10, 5) self.expand_times = (1, 1, 1) + class TestExpandV2CopyScenarioShapeNotGivenOneDNNOp(TestExpandV2OneDNNOp): def init_data(self): self.ori_shape = (2, 4, 5, 7) @@ -76,7 +78,7 @@ def calculate_grads(self): self.dout = self.outputs['Out'] self.dx = self.dout.copy() - for i in range (len(self.shape)): + for i in range(len(self.shape)): if self.expand_times[i] != 1: self.dx = np.sum(self.dx, axis=i, keepdims=True) From 2b00c60438780cbce0e9b8daa4ca0a4178f50120 Mon Sep 17 00:00:00 2001 From: Jakub Piasecki Date: Wed, 28 Jul 2021 17:59:28 +0200 Subject: [PATCH 6/7] reduced binary size --- .../operators/mkldnn/expand_v2_mkldnn_op.cc | 85 +++++++++---------- .../mkldnn/test_expand_v2_mkldnn_op.py | 2 + 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc index efbeaa78941f3..ffd64a841ecb3 100644 --- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -14,28 +14,30 @@ limitations under the License. */ #include "paddle/fluid/platform/mkldnn_reuse.h" -namespace paddle { -namespace operators { +namespace { using paddle::framework::Tensor; +using paddle::framework::vectorize; +using paddle::framework::GradVarName; +using paddle::framework::ExecutionContext; +using paddle::platform::MKLDNNDeviceContext; template -class ExpandMKLDNNKernel : public framework::OpKernel { +class ExpandMKLDNNKernel : public paddle::framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { + void Compute(const ExecutionContext& ctx) const override { this->RunKernel(ctx); } - void RunKernel(const framework::ExecutionContext& ctx) const { - const auto& dev_ctx = - ctx.template device_context(); + void RunKernel(const ExecutionContext& ctx) const { + const auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); const auto* x = ctx.Input("X"); auto* out = ctx.Output("Out"); - auto x_vec_dims = framework::vectorize(x->dims()); - auto out_vec_dims = framework::vectorize(out->dims()); + auto x_vec_dims = vectorize(x->dims()); + auto out_vec_dims = vectorize(out->dims()); dnnl::memory::format_tag x_format_tag = x->format(); if (x_vec_dims.size() != out_vec_dims.size()) { @@ -45,7 +47,7 @@ class ExpandMKLDNNKernel : public framework::OpKernel { out->set_format(x_format_tag); - platform::BroadcastDataMKLDNNHandler handler( + paddle::platform::BroadcastDataMKLDNNHandler handler( dnnl::algorithm::binary_add, dev_ctx, onednn_engine, ctx.GetPlace(), out, x, 0.0f, 1.0f, ctx.InputName("X"), x_vec_dims); @@ -58,62 +60,62 @@ class ExpandMKLDNNKernel : public framework::OpKernel { {DNNL_ARG_SRC_1, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}}; - auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + auto& astream = MKLDNNDeviceContext::tls().get_stream(); binary_p->execute(astream, args); astream.wait(); - out->set_layout(framework::DataLayout::kMKLDNN); - out->set_format(platform::GetMKLDNNFormat(*dst_memory_p)); + out->set_layout(paddle::framework::DataLayout::kMKLDNN); + out->set_format(paddle::platform::GetMKLDNNFormat(*dst_memory_p)); } private: dnnl::memory::format_tag GetExtendedFormatTag( std::vector& dims, int new_size, mkldnn::memory::format_tag format_tag) const { - mkldnn::memory::desc md(dims, platform::MKLDNNGetDataType(), format_tag); + mkldnn::memory::desc md(dims, paddle::platform::MKLDNNGetDataType(), + format_tag); std::vector new_dims(new_size, 1); std::copy(dims.begin(), dims.end(), new_dims.begin() + new_size - dims.size()); dims = std::move(new_dims); - return platform::GetMKLDNNFormat(md.reshape(dims)); + return paddle::platform::GetMKLDNNFormat(md.reshape(dims)); } }; template -class ExpandGradMKLDNNKernel : public framework::OpKernel { +class ExpandGradMKLDNNKernel : public paddle::framework::OpKernel { public: - void Compute(const framework::ExecutionContext& ctx) const override { + void Compute(const ExecutionContext& ctx) const override { this->RunKernel(ctx); } - void RunKernel(const framework::ExecutionContext& ctx) const { - const auto& dev_ctx = - ctx.template device_context(); + void RunKernel(const ExecutionContext& ctx) const { + const auto& dev_ctx = ctx.template device_context(); const auto& onednn_engine = dev_ctx.GetEngine(); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dout = ctx.Input(GradVarName("Out")); + auto* dx = ctx.Output(GradVarName("X")); - auto dx_vec_dims = framework::vectorize(dx->dims()); - auto dout_vec_dims = framework::vectorize(dout->dims()); + auto dx_vec_dims = vectorize(dx->dims()); + auto dout_vec_dims = vectorize(dout->dims()); if (dx_vec_dims.size() != dout_vec_dims.size()) { dx_vec_dims.insert(dx_vec_dims.begin(), dout_vec_dims.size() - dx_vec_dims.size(), 1); } - auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); + auto& astream = MKLDNNDeviceContext::tls().get_stream(); if (dout_vec_dims == dx_vec_dims) { mkldnn::memory::data_type dout_type = - framework::ToMKLDNNDataType(dout->type()); - std::string key = platform::CreateKey( + paddle::framework::ToMKLDNNDataType(dout->type()); + std::string key = paddle::platform::CreateKey( dev_ctx, dout_vec_dims, dout->format(), dout->format(), dout_type); - platform::ReorderMKLDNNHandler reorder_handler( + paddle::platform::ReorderMKLDNNHandler reorder_handler( dout_vec_dims, dout->type(), dout_type, dev_ctx, onednn_engine, key); auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( - dout->format(), platform::to_void_cast(dout->data())); + dout->format(), paddle::platform::to_void_cast(dout->data())); auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace()); @@ -124,11 +126,11 @@ class ExpandGradMKLDNNKernel : public framework::OpKernel { reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); - dx->set_layout(framework::DataLayout::kMKLDNN); + dx->set_layout(paddle::framework::DataLayout::kMKLDNN); dx->set_format( - platform::GetMKLDNNFormat(reorder_dst_memory_p->get_desc())); + paddle::platform::GetMKLDNNFormat(reorder_dst_memory_p->get_desc())); } else { - platform::ReductionMKLDNNHandler handler( + paddle::platform::ReductionMKLDNNHandler handler( dnnl::algorithm::reduction_sum, 0.0f, 0.0f, dev_ctx, onednn_engine, ctx.GetPlace(), dout, dx, ctx.InputName("X"), dx_vec_dims); @@ -142,21 +144,18 @@ class ExpandGradMKLDNNKernel : public framework::OpKernel { reduction_p->execute(astream, reduction_args); astream.wait(); - dx->set_layout(framework::DataLayout::kMKLDNN); - dx->set_format(platform::GetMKLDNNFormat(dst_memory_p->get_desc().reshape( - paddle::framework::vectorize(dx->dims())))); + dx->set_layout(paddle::framework::DataLayout::kMKLDNN); + dx->set_format(paddle::platform::GetMKLDNNFormat( + dst_memory_p->get_desc().reshape(vectorize(dx->dims())))); } } }; +} // anonymous namespace -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; REGISTER_OP_KERNEL(expand_v2, MKLDNN, paddle::platform::CPUPlace, - ops::ExpandMKLDNNKernel, - ops::ExpandMKLDNNKernel); + ExpandMKLDNNKernel, + ExpandMKLDNNKernel); REGISTER_OP_KERNEL(expand_v2_grad, MKLDNN, paddle::platform::CPUPlace, - ops::ExpandGradMKLDNNKernel, - ops::ExpandGradMKLDNNKernel); + ExpandGradMKLDNNKernel, + ExpandGradMKLDNNKernel); diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py index eceef665a0304..63c87ce11507d 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py @@ -21,6 +21,8 @@ from paddle.fluid.tests.unittests.op_test import OpTest, OpTestTool, convert_float_to_uint16 +@OpTestTool.skip_if(core.is_compiled_with_cuda(), + "CUDA required dygraph so oneDNN UT must be skipped") class TestExpandV2OneDNNOp(OpTest): def setUp(self): self.op_type = "expand_v2" From aee5afb0bb3ab984fb7bd4a5e906e2a507a8dc6e Mon Sep 17 00:00:00 2001 From: Jakub Piasecki Date: Wed, 28 Jul 2021 18:10:27 +0200 Subject: [PATCH 7/7] CI fix --- .../fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py index 63c87ce11507d..51d7fe971674d 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_expand_v2_mkldnn_op.py @@ -30,7 +30,7 @@ def setUp(self): self.x = np.random.random(self.ori_shape).astype("float32") self.set_inputs() self.attrs = {'shape': self.shape, 'use_mkldnn': True} - output = np.tile(self.inputs['X'], self.expand_times) + output = np.tile(self.x, self.expand_times) self.outputs = {'Out': output} def set_inputs(self): @@ -90,7 +90,7 @@ def test_check_grad(self): core.CPUPlace(), ["X"], "Out", user_defined_grads=[convert_float_to_uint16(self.dx)], - user_defined_grad_outputs=[convert_float_to_uint16(self.dout)]) + user_defined_grad_outputs=[self.dout]) cls_name = "{0}_{1}".format(parent.__name__, "Expand_v2_BF16") TestExpandV2BF16OneDNNOp.__name__ = cls_name