Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PHI decoupling] Move MKLDNN code #48352

Merged
merged 20 commits into from
Nov 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 1 addition & 118 deletions paddle/fluid/framework/data_layout_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,8 @@

#include "paddle/fluid/framework/data_layout_transform.h"

#include "paddle/phi/kernels/funcs/math_function.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_reuse.h"
#endif
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -92,119 +89,5 @@ void TransDataLayout(const OpKernelType& kernel_type_for_var,
out->set_layout(expected_kernel_type.data_layout_);
}

#ifdef PADDLE_WITH_MKLDNN
using dnnl::memory;
using dnnl::primitive;
using dnnl::reorder;

void* GetDataFromTensor(const phi::DenseTensor& tensor,
dnnl::memory::data_type type) {
switch (type) {
case dnnl::memory::data_type::f32:
return phi::funcs::to_void_cast(tensor.data<float>());
case dnnl::memory::data_type::s8:
return phi::funcs::to_void_cast(tensor.data<int8_t>());
case dnnl::memory::data_type::u8:
return phi::funcs::to_void_cast(tensor.data<unsigned char>());
case dnnl::memory::data_type::s32:
return phi::funcs::to_void_cast(tensor.data<int32_t>());
case dnnl::memory::data_type::bf16:
return phi::funcs::to_void_cast(
tensor.data<paddle::platform::bfloat16>());
default:
PADDLE_THROW(
platform::errors::InvalidArgument("Wrong mkldnn type provided."));
}
}

void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
const OpKernelType& expected_kernel_type,
const phi::DenseTensor& in,
phi::DenseTensor* out) {
auto in_layout = kernel_type_for_var.data_layout_;
auto out_layout = expected_kernel_type.data_layout_;
auto place = expected_kernel_type.place_;

PADDLE_ENFORCE(
in_layout == DataLayout::ONEDNN && out_layout != DataLayout::ONEDNN,
platform::errors::InvalidArgument(
"TransDataLayoutFromMKLDNN only supports transform from MKLDNN to "
"non-MKLDNN"));

innerTransDataLayoutFromMKLDNN(
in_layout,
paddle::platform::MKLDNNDeviceContext::tls().get_cur_paddle_data_layout(),
in,
out,
place);
}

void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
DataLayout out_layout,
const phi::DenseTensor& in,
phi::DenseTensor* out,
platform::Place place,
bool always_copy) {
// Set default as NCHW in case not specified
out_layout =
out_layout == DataLayout::kAnyLayout ? DataLayout::kNCHW : out_layout;

auto& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext*>(pool.Get(place));
auto& cpu_engine = dev_ctx->GetEngine();

auto in_tz = phi::vectorize<int64_t>(in.dims());
auto out_tz = in_tz;

memory::data_type in_type =
ToMKLDNNDataType(framework::TransToProtoVarType(in.dtype()));
PADDLE_ENFORCE_NE(
in_type,
memory::data_type::undef,
platform::errors::InvalidArgument(
"Input tensor type (%s) is not supported.",
DataTypeToString(framework::TransToProtoVarType(in.dtype()))));

auto out_format =
phi::funcs::OneDNNFormatForSize(in_tz.size(), ToOneDNNFormat(out_layout));
dnnl::memory::desc out_mem_desc(out_tz, in_type, out_format);

// output tensor has the same dims as input. Reorder don't change dims
out->set_mem_desc(out_mem_desc);
out->Resize(in.dims());

// Note(0x45f): Using initialized() to support slice Tensors
// with shapes like [0, 0, 0].
if (in.initialized() && ((in.mem_desc() != out->mem_desc()) || always_copy)) {
void* in_data = GetDataFromTensor(in, in_type);

phi::funcs::ReorderOneDNNHandler handler(
in_tz, in.dtype(), in_type, cpu_engine);

auto reorder_src_memory_p =
handler.AcquireSrcMemory(in.mem_desc(), in_data);
auto reorder_dst_memory_p =
handler.AcquireDstMemory(out, out->mem_desc(), place);
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("ext_reorder",
platform::TracerEventType::UserDefined,
2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();
} else {
out->ShareDataWith(in);
}
// For exepected NHWC data format we need to reshape the Output tensor
// As MKL-DNN description was in NCHW and paddle is expecting NHWC
phi::funcs::MatchShapeToLayout(out, in_layout, out_layout);

out->set_layout(DataLayout::kNCHW);
}
#endif

} // namespace framework
} // namespace paddle
51 changes: 2 additions & 49 deletions paddle/fluid/framework/data_layout_transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/phi/kernels/funcs/data_layout_transform.h"

namespace paddle {
namespace framework {
Expand All @@ -29,7 +30,7 @@ class OpKernelType;
} // namespace paddle

#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/phi/backends/onednn/onednn_helper.h"
#endif

namespace paddle {
Expand All @@ -51,54 +52,6 @@ struct CastDataLayout {
void apply();
};

#ifdef PADDLE_WITH_MKLDNN
using OneDNNDataType = dnnl::memory::data_type;

inline OneDNNMemoryFormat ToOneDNNFormat(const DataLayout& layout) {
switch (layout) {
case DataLayout::kNHWC:
return OneDNNMemoryFormat::nhwc;
case DataLayout::kNCHW:
return OneDNNMemoryFormat::nchw;
case DataLayout::kNCDHW:
return OneDNNMemoryFormat::ncdhw;
case DataLayout::kNDHWC:
return OneDNNMemoryFormat::ndhwc;
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Fail to convert layout %s to oneDNN format.",
phi::DataLayoutToString(layout)));
}
}

inline OneDNNDataType ToMKLDNNDataType(proto::VarType::Type type) {
static std::unordered_map<int, OneDNNDataType> dict{
{DataTypeTrait<float>::DataType(), OneDNNDataType::f32},
{DataTypeTrait<int8_t>::DataType(), OneDNNDataType::s8},
{DataTypeTrait<uint8_t>::DataType(), OneDNNDataType::u8},
{DataTypeTrait<int32_t>::DataType(), OneDNNDataType::s32},
{DataTypeTrait<platform::bfloat16>::DataType(), OneDNNDataType::bf16}};
auto iter = dict.find(static_cast<int>(type));
if (iter != dict.end()) return iter->second;
return OneDNNDataType::undef;
}

void innerTransDataLayoutFromMKLDNN(DataLayout in_layout,
DataLayout out_layout,
const phi::DenseTensor& in,
phi::DenseTensor* out,
platform::Place place,
bool always_copy = false);

void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
const OpKernelType& expected_kernel_type,
const phi::DenseTensor& in,
phi::DenseTensor* out);

void* GetDataFromTensor(const phi::DenseTensor& tensor, OneDNNDataType type);

#endif

std::vector<int> GetAxis(const DataLayout& from, const DataLayout& to);

void TransDataLayout(const OpKernelType& kernel_type_for_var,
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/data_layout_transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ TEST(DataTransformBf16, GetDataFromTensorDNNL) {
place);

void* in_data =
paddle::framework::GetDataFromTensor(in, dnnl::memory::data_type::bf16);
phi::funcs::GetDataFromTensor(in, dnnl::memory::data_type::bf16);
EXPECT_EQ(in_data,
phi::funcs::to_void_cast(in.data<paddle::platform::bfloat16>()));
}
Expand All @@ -64,7 +64,7 @@ TEST(DataTransformInt32, GetDataFromTensorDNNL) {
in.mutable_data<int32_t>(phi::make_ddim({2, 3, 1, 2}), place);

void* in_data =
paddle::framework::GetDataFromTensor(in, dnnl::memory::data_type::s32);
phi::funcs::GetDataFromTensor(in, dnnl::memory::data_type::s32);
EXPECT_EQ(in_data, phi::funcs::to_void_cast(in.data<int32_t>()));
}
#endif
34 changes: 22 additions & 12 deletions paddle/fluid/framework/data_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,38 +57,48 @@ void TransformData(const OpKernelType &expected_kernel_type,
"No layout transform needed between two oneDNN OPKernels."));

if (lin != DataLayout::ONEDNN && lout == DataLayout::ONEDNN) {
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
// Case1 - transform from Non-ONEDNN OPKernel to ONEDNN OPKernel
// Just set layout/format. No real transform occur

auto out_format = phi::funcs::OneDNNFormatForSize(in.dims().size(),
ToOneDNNFormat(lin));
auto out_format = phi::funcs::OneDNNFormatForSize(
in.dims().size(), phi::funcs::ToOneDNNFormat(lin));
out.ShareDataWith(input_tensor);
// For NHWC data we need reshape of tensors as MKL-DNN
// is expecting NHWC dims description order
if (lin == DataLayout::kNHWC || lin == DataLayout::kNDHWC) {
phi::funcs::MatchShapeToLayout(&out, lin, lout);
// We register only NHWC assuming that model is consistent e.g. either
// NHWC or NCHW
paddle::platform::MKLDNNDeviceContext::tls()
.set_cur_paddle_data_layout(lin);
phi::OneDNNContext::tls().set_cur_paddle_data_layout(lin);
}
dnnl::memory::desc out_mem_desc(
vectorize(out.dims()),
ToMKLDNNDataType(TransToProtoVarType(in.type())),
phi::funcs::ToOneDNNDataType(in.dtype()),
out_format);
out.set_mem_desc(out_mem_desc);
} else {
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel
// Do transform via MKLDNN lib
TransDataLayoutFromMKLDNN(
kernel_type_for_var, expected_kernel_type, in, &out);
// Case2 - transfrom from ONEDNN OPKernel to Non-ONEDNN OPKernel
// Do transform via ONEDNN lib
PADDLE_ENFORCE(
kernel_type_for_var.data_layout_ == DataLayout::ONEDNN &&
expected_kernel_type.data_layout_ != DataLayout::ONEDNN,
platform::errors::InvalidArgument(
"TransDataLayoutFromOneDNN only supports "
"transform from ONEDNN to non-ONEDNN"));

phi::funcs::TransDataLayoutFromOneDNN(
kernel_type_for_var.data_layout_,
phi::OneDNNContext::tls().get_cur_paddle_data_layout(),
in,
&out,
expected_kernel_type.place_);
}
} else {
// Case3 - transfrom between Non-MKLDNN OPKernels
// Case3 - transfrom between Non-ONEDNN OPKernels
TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
}
#else
// Case3 - transfrom between Non-MKLDNN OPKernels
// Case3 - transfrom between Non-ONEDNN OPKernels
TransDataLayout(kernel_type_for_var, expected_kernel_type, in, &out);
#endif
transformed = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,8 @@ void ApplyDataTransform(const OpKernelType& expected_kernel_key,
if ((tensor_in->layout() == DataLayout::ONEDNN) &&
(var->IsType<phi::DenseTensor>() == true) &&
(expected_kernel_key.data_layout_ != DataLayout::ONEDNN) &&
(paddle::platform::MKLDNNDeviceContext::tls()
.get_cur_paddle_data_layout() == DataLayout::kNHWC)) {
(phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
DataLayout::kNHWC)) {
VLOG(7) << "Created reshaped dummy input based on MKL-DNN "
"phi::DenseTensor , "
"but kNHWC layout"
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2304,8 +2304,8 @@ Scope* OperatorWithKernel::PrepareData(
if ((tensor_in->layout() == DataLayout::ONEDNN) &&
(var->IsType<phi::DenseTensor>() == true) &&
(expected_kernel_key.data_layout_ != DataLayout::ONEDNN) &&
(paddle::platform::MKLDNNDeviceContext::tls()
.get_cur_paddle_data_layout() == DataLayout::kNHWC) &&
(phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
DataLayout::kNHWC) &&
(tensor_in->dims().size() >= 3)) {
// Mixed execution : oneDNN and GPU is not supported!
if (!new_scope) {
Expand Down Expand Up @@ -2757,8 +2757,8 @@ OpKernelType OperatorWithKernel::GetKernelTypeForVar(
// then we also need to rotate shape NHWC -> NCWH
if ((expected_kernel_type.data_layout_ == phi::DataLayout::ONEDNN) &&
(tensor.layout() != phi::DataLayout::ONEDNN) &&
paddle::platform::MKLDNNDeviceContext::tls()
.get_cur_paddle_data_layout() == phi::DataLayout::kNHWC) {
phi::OneDNNContext::tls().get_cur_paddle_data_layout() ==
phi::DataLayout::kNHWC) {
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(),
phi::DataLayout::kNHWC);
Expand Down
15 changes: 7 additions & 8 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -868,13 +868,12 @@ void AnalysisPredictor::MkldnnPreSet(
const std::vector<std::vector<int>> &inputs_shape) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(2) << "AnalysisPredictor::ZeroCopyRun get_cur_mkldnn_session_id="
<< platform::MKLDNNDeviceContext::tls().get_cur_mkldnn_session_id();
<< phi::OneDNNContext::tls().get_cur_mkldnn_session_id();
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0) {
VLOG(2) << "In mkldnn cache clear mode.";
platform::MKLDNNDeviceContext::tls().set_cur_mkldnn_session_id(
platform::MKLDNNDeviceContextThreadLocals::
kMKLDNNSessionID_CacheClearing);
phi::OneDNNContext::tls().set_cur_mkldnn_session_id(
phi::OneDNNContextThreadLocals::kMKLDNNSessionID_CacheClearing);
// Set current_input_shape for caching dynamic shape.
std::stringstream ss;
for (size_t i = 0; i < inputs_shape.size(); ++i) {
Expand All @@ -883,9 +882,9 @@ void AnalysisPredictor::MkldnnPreSet(
}
}
VLOG(2) << "Set input shape=" << ss.str();
platform::MKLDNNDeviceContext::tls().set_cur_input_shape_str(ss.str());
phi::OneDNNContext::tls().set_cur_input_shape_str(ss.str());
}
platform::MKLDNNDeviceContext::tls().set_cur_input_shape_cache_capacity(
phi::OneDNNContext::tls().set_cur_input_shape_cache_capacity(
config_.mkldnn_cache_capacity_);

#endif
Expand All @@ -895,11 +894,11 @@ void AnalysisPredictor::MkldnnPostReset() {
#ifdef PADDLE_WITH_MKLDNN
// In cache clearing mode.
if (config_.mkldnn_cache_capacity_ > 0 &&
static_cast<platform::MKLDNNDeviceContext *>(
static_cast<phi::OneDNNContext *>(
(&platform::DeviceContextPool::Instance())->Get(platform::CPUPlace()))
->GetCachedObjectsNumber() > 0) {
if (VLOG_IS_ON(2)) {
auto shape_blob_size = static_cast<platform::MKLDNNDeviceContext *>(
auto shape_blob_size = static_cast<phi::OneDNNContext *>(
(&platform::DeviceContextPool::Instance())
->Get(platform::CPUPlace()))
->GetShapeBlobSize();
Expand Down
Loading