Skip to content

Commit

Permalink
- modified UT
Browse files Browse the repository at this point in the history
  • Loading branch information
jczaja committed Aug 5, 2021
1 parent 2b24a80 commit 98270c1
Show file tree
Hide file tree
Showing 7 changed files with 220 additions and 241 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {
float scale_o = ctx.Attr<float>("Scale_out");
int axis = ctx.Attr<int>("axis");

platform::BinaryMKLDNNHandler<T> handler( BINARY_OP, axis, mkldnn_engine, ctx.GetPlace(), x, y, z, scale_x, scale_y, scale_o);
platform::BinaryMKLDNNHandler<T> handler(BINARY_OP, axis, mkldnn_engine,
ctx.GetPlace(), x, y, z, scale_x,
scale_y, scale_o);

const auto src_x_memory = handler.AcquireSrcMemory(x);
const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
if (dx) {
// dx = dout*y
platform::BinaryMKLDNNHandler<T> handler(
dnnl::algorithm::binary_mul, axis, mkldnn_engine,
ctx.GetPlace(), dout, y, dx, 1.0f, 1.0f, 1.0f);
dnnl::algorithm::binary_mul, axis, mkldnn_engine, ctx.GetPlace(),
dout, y, dx, 1.0f, 1.0f, 1.0f);

const auto src_dout_memory = handler.AcquireSrcMemory(dout);
const auto src_y_memory = handler.AcquireSecondSrcMemory(y);
Expand All @@ -74,8 +74,8 @@ class EltwiseMulMKLDNNGradKernel : public ElemwiseGradKernel<T> {
// Handler is having nullptr passed instead of output tensor as
// we want Dst buffer to be allocated by oneDNN not to use Tensor
platform::BinaryMKLDNNHandler<T> handler(
dnnl::algorithm::binary_mul, axis, mkldnn_engine,
ctx.GetPlace(), dout, x, nullptr, 1.0f, 1.0f, 1.0f);
dnnl::algorithm::binary_mul, axis, mkldnn_engine, ctx.GetPlace(),
dout, x, nullptr, 1.0f, 1.0f, 1.0f);

const auto src_dout_memory = handler.AcquireSrcMemory(dout);
const auto src_x_memory = handler.AcquireSecondSrcMemory(x);
Expand Down
11 changes: 6 additions & 5 deletions paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,15 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
paddle::platform::errors::PreconditionNotMet(
"Operator DNNL eletwise_forward must use CPUPlace"));
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto &mkldnn_engine = dev_ctx.GetEngine();

const auto *x = ctx.Input<Tensor>("X");
auto *y = ctx.Output<Tensor>("Out");

bool is_inplaced = x->IsSharedBufferWith(*y);

platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, mkldnn_engine, ctx.GetPlace(), x);
platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, mkldnn_engine,
ctx.GetPlace(), x);

auto src_memory_p = handler.AcquireSrcMemory(x);
auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(y);
Expand All @@ -105,14 +106,14 @@ template <typename T>
void eltwise_grad(const framework::ExecutionContext &ctx,
mkldnn::algorithm algorithm) {
auto &dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
const auto &mkldnn_engine = dev_ctx.GetEngine();

const auto *x = ctx.Input<Tensor>("X");
const auto *diff_y = ctx.Input<Tensor>(framework::GradVarName("Out"));
auto *diff_x = ctx.Output<Tensor>(framework::GradVarName("X"));

platform::ActivationMKLDNNHandler<T> handler(
algorithm, ctx, mkldnn_engine, ctx.GetPlace(), x, diff_y);
platform::ActivationMKLDNNHandler<T> handler(algorithm, ctx, mkldnn_engine,
ctx.GetPlace(), x, diff_y);

auto src_memory_p = handler.AcquireBackwardSrcMemory(x);
auto diff_dst_memory_p = handler.AcquireDiffDstMemory(diff_y);
Expand Down
6 changes: 4 additions & 2 deletions paddle/fluid/operators/mkldnn/scale_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ class ScaleMKLDNNKernel : public framework::OpKernel<T> {
bool is_inplaced = x->IsSharedBufferWith(*out);

platform::ActivationMKLDNNHandler<T> handler(
mkldnn::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(), x);
mkldnn::algorithm::eltwise_linear, ctx, mkldnn_engine, ctx.GetPlace(),
x);

auto src_memory_p = handler.AcquireSrcMemory(x);
auto dst_memory_p = is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
auto dst_memory_p =
is_inplaced ? src_memory_p : handler.AcquireDstMemory(out);
auto activation_p = handler.AcquireForwardPrimitive();

auto& astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
Expand Down
50 changes: 27 additions & 23 deletions paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@ using platform::to_void_cast;
template <typename T>
class SoftmaxMKLDNNHandler
: public platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward> {
mkldnn::softmax_backward> {
public:
SoftmaxMKLDNNHandler(const mkldnn::engine mkldnn_engine,
platform::Place cpu_place, const Tensor* input,
Tensor* output, const int axis)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward, mkldnn::softmax_backward>(
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward>(
mkldnn_engine, cpu_place) {
PADDLE_ENFORCE_EQ(
input->dims(), output->dims(),
Expand All @@ -49,7 +50,8 @@ class SoftmaxMKLDNNHandler
auto md = memory::desc(softmax_tz, platform::MKLDNNGetDataType<T>(),
input->format());

this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md, axis);
this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring, md,
axis);
}

SoftmaxMKLDNNHandler(const framework::ExecutionContext& ctx,
Expand All @@ -58,25 +60,26 @@ class SoftmaxMKLDNNHandler
const Tensor* out_grad, Tensor* in_x_grad,
const std::string& unique_name)
: platform::MKLDNNHandlerNoCachingT<T, mkldnn::softmax_forward,
mkldnn::softmax_backward>(mkldnn_engine, cpu_place) {
PADDLE_ENFORCE_EQ(
out_grad->dims(), in_x_grad->dims(),
platform::errors::InvalidArgument("The shape of softmax_grad's input "
"and output must be identical."));

auto dims = out_grad->dims(); // input and output share the same shape
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), dims.size());
auto softmax_tz = framework::vectorize<int64_t>(dims);

auto data_softmax_md = MKLDNNMemDesc(
softmax_tz, platform::MKLDNNGetDataType<T>(), out->format());
auto diff_softmax_md = MKLDNNMemDesc(
softmax_tz, platform::MKLDNNGetDataType<T>(), out_grad->format());

this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring,
data_softmax_md, axis);
this->AcquireBackwardPrimitiveDescriptor(diff_softmax_md, data_softmax_md,
axis);
mkldnn::softmax_backward>(
mkldnn_engine, cpu_place) {
PADDLE_ENFORCE_EQ(
out_grad->dims(), in_x_grad->dims(),
platform::errors::InvalidArgument("The shape of softmax_grad's input "
"and output must be identical."));

auto dims = out_grad->dims(); // input and output share the same shape
const int axis = CanonicalAxis(ctx.Attr<int>("axis"), dims.size());
auto softmax_tz = framework::vectorize<int64_t>(dims);

auto data_softmax_md = MKLDNNMemDesc(
softmax_tz, platform::MKLDNNGetDataType<T>(), out->format());
auto diff_softmax_md = MKLDNNMemDesc(
softmax_tz, platform::MKLDNNGetDataType<T>(), out_grad->format());

this->AcquireForwardPrimitiveDescriptor(prop_kind::forward_scoring,
data_softmax_md, axis);
this->AcquireBackwardPrimitiveDescriptor(diff_softmax_md, data_softmax_md,
axis);
}
};

Expand All @@ -93,7 +96,8 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {

const int axis = CanonicalAxis(ctx.Attr<int>("axis"), input->dims().size());

SoftmaxMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), input, output, axis);
SoftmaxMKLDNNHandler<T> handler(mkldnn_engine, ctx.GetPlace(), input,
output, axis);

auto softmax_src_memory_p = handler.AcquireSrcMemory(input);
// For Inplace src and and dst are the same memory object
Expand Down
66 changes: 20 additions & 46 deletions paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,16 @@ void RunOperator(const platform::Place &place, const std::string &op_type,

std::map<const std::string, int> num_inputs = {{"softmax", 1},
{"relu", 1},
{"conv2d", 2},
{"elementwise_add", 2},
{"elementwise_mul", 2}};

std::string first_input = inplace == true ? output_name : "x";

std::string first_input_var_name = (op_type == "conv2d") ? "Input" : "X";
std::string second_input_var_name = (op_type == "conv2d") ? "Filter" : "Y";
std::string output_var_name = (op_type == "conv2d") ? "Output" : "Out";

std::vector<InputVars> input_names = {
{first_input, scope.Var(first_input)->GetMutable<framework::LoDTensor>()},
{"x1", num_inputs[op_type] > 1
Expand Down Expand Up @@ -113,68 +118,37 @@ void RunOperator(const platform::Place &place, const std::string &op_type,

auto &pool = platform::DeviceContextPool::Instance();

auto op = num_inputs[op_type] > 1
? framework::OpRegistry::CreateOp(
op_type, {{"X", {first_input}}, {"Y", {"x1"}}},
{{"Out", {output_name}}}, {{"use_mkldnn", {true}}})
: framework::OpRegistry::CreateOp(
op_type, {{"X", {first_input}}}, {{"Out", {output_name}}},
{{"use_mkldnn", {true}}});
auto op =
num_inputs[op_type] > 1
? framework::OpRegistry::CreateOp(
op_type, {{first_input_var_name, {first_input}},
{second_input_var_name, {"x1"}}},
{{output_var_name, {output_name}}}, {{"use_mkldnn", {true}}})
: framework::OpRegistry::CreateOp(
op_type, {{first_input_var_name, {first_input}}},
{{output_var_name, {output_name}}}, {{"use_mkldnn", {true}}});

op->Run(scope, place);
pool.Get(place)->Wait();
}

TEST(test_softmax_reuse_cache, cpu_place) {
framework::DDim dims({32, 64});
framework::DDim dims({1, 16, 32, 64});
platform::CPUPlace p;
CacheTester ct;
RunOperator<float>(p, "softmax", dims, "softmax_out");
RunOperator<float>(p, "softmax", dims, "softmax_out");
RunOperator<float>(p, "conv2d", dims, "conv_out");
RunOperator<float>(p, "conv2d", dims, "conv_out");
PADDLE_ENFORCE_EQ(ct.Analyze(4), true,
platform::errors::InvalidArgument(
"Wrong number of cached oneDNN objects"));
}

TEST(test_softmax_noreuse_cache, cpu_place) {
framework::DDim dims({32, 64});
platform::CPUPlace p;
CacheTester ct;
RunOperator<float>(p, "softmax", dims, "softmax_out");
RunOperator<float>(p, "softmax", dims, "softmax_out2");
PADDLE_ENFORCE_EQ(ct.Analyze(8), true,
platform::errors::InvalidArgument(
"Wrong number of cached oneDNN objects"));
}

TEST(test_softmax_inplace_cache, cpu_place) {
framework::DDim dims({32, 64});
platform::CPUPlace p;
CacheTester ct;
RunOperator<float>(p, "softmax", dims, "softmax_out");
RunOperator<float>(p, "softmax", dims, "softmax_out", true);
PADDLE_ENFORCE_EQ(ct.Analyze(7), true,
platform::errors::InvalidArgument(
"Wrong number of cached oneDNN objects"));
}

TEST(test_relu_inplace_cache, cpu_place) {
framework::DDim dims({32, 64});
platform::CPUPlace p;
CacheTester ct;
RunOperator<float>(p, "relu", dims, "relu_out");
RunOperator<float>(p, "relu", dims, "relu_out", true);
PADDLE_ENFORCE_EQ(ct.Analyze(7), true,
platform::errors::InvalidArgument(
"Wrong number of cached oneDNN objects"));
}

TEST(test_elementwise_add_reuse_cache, cpu_place) {
framework::DDim dims({32, 64});
framework::DDim dims({1, 16, 32, 64});
platform::CPUPlace p;
CacheTester ct;
RunOperator<float>(p, "elementwise_add", dims, "elementwise_add_out");
RunOperator<float>(p, "relu", dims, "elementwise_add_out", true);
RunOperator<float>(p, "conv2d", dims, "conv_out");
RunOperator<float>(p, "conv2d", dims, "conv_out2");
PADDLE_ENFORCE_EQ(ct.Analyze(8), true,
platform::errors::InvalidArgument(
"Wrong number of cached oneDNN objects"));
Expand Down
Loading

0 comments on commit 98270c1

Please sign in to comment.