Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[oneDNN] Cache oneDNN stream not to recreate in each oneDNN op #30358

Merged
merged 7 commits into from
Jan 25, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/framework/data_layout_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);

mkldnn::stream astream(cpu_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("ext_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
platform::ReorderMKLDNNHandler handler(tz, dout->type(), dout_type, dev_ctx,
onednn_engine, key);

mkldnn::stream astream(onednn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
auto reorder_src_memory_p = handler.AcquireSrcMemory(
dout->format(), platform::to_void_cast(dout->data<T>()));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class EltwiseMKLDNNKernel : public framework::OpKernel<T> {

const auto binary_prim = handler.AcquireForwardPrimitive();

mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();

const std::unordered_map<int, dnnl::memory> args = {
{DNNL_ARG_SRC_0, *src_x_memory},
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ class GRUMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::gru_forward> {
memory_p = std::make_shared<dnnl::memory>(this->fwd_pd_->src_iter_desc(),
this->engine_);

dnnl::stream astream(this->engine_);
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
jczaja marked this conversation as resolved.
Show resolved Hide resolved
dnnl::reorder(user_h0_memory, *memory_p, attr_)
.execute(astream, user_h0_memory, *memory_p);

Expand Down Expand Up @@ -284,7 +284,7 @@ class GRUMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::gru_forward> {
memory_p = std::make_shared<dnnl::memory>(
this->fwd_pd_->weights_layer_desc(), this->engine_);

dnnl::stream astream(this->engine_);
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
dnnl::reorder(user_memory, *memory_p, attr_)
.execute(astream, user_memory, *memory_p);

Expand Down Expand Up @@ -337,7 +337,7 @@ class GRUMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::gru_forward> {
memory_p = std::make_shared<dnnl::memory>(
this->fwd_pd_->weights_iter_desc(), this->engine_);

dnnl::stream astream(this->engine_);
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
dnnl::reorder(user_memory, *memory_p, attr_)
.execute(astream, user_memory, *memory_p);

Expand Down Expand Up @@ -469,7 +469,7 @@ class FusionGRUMKLDNNKernel : public framework::OpKernel<T> {

auto gru_forward_p = handler.AcquireForwardPrimitive();

dnnl::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
gru_forward_p->execute(astream, gru_args);
astream.wait();

Expand Down
10 changes: 5 additions & 5 deletions paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ class MultiGRUHandler {

auto gru_forward_p0 = AcquireGruPrimitive(layer, dir);

dnnl::stream astream(engine_);
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
gru_forward_p0->execute(astream, gru_args);
astream.wait();
return out_mem;
Expand All @@ -315,7 +315,7 @@ class MultiGRUHandler {
memory_p = std::make_shared<dnnl::memory>(
gru_pds_[{layer, dir}]->src_iter_desc(), engine_);

dnnl::stream astream(engine_);
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
dnnl::reorder(user_h0_memory, *memory_p, attrs_[2 * layer + (dir == R2L)])
.execute(astream, user_h0_memory, *memory_p);

Expand Down Expand Up @@ -354,7 +354,7 @@ class MultiGRUHandler {
memory_p = std::make_shared<dnnl::memory>(
gru_pds_[{layer, dir}]->weights_layer_desc(), engine_);

dnnl::stream astream(engine_);
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
dnnl::reorder(user_memory, *memory_p, attrs_[2 * layer + (dir == R2L)])
.execute(astream, user_memory, *memory_p);

Expand Down Expand Up @@ -410,7 +410,7 @@ class MultiGRUHandler {
memory_p = std::make_shared<dnnl::memory>(
gru_pds_[{layer, dir}]->weights_iter_desc(), engine_);

dnnl::stream astream(engine_);
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
dnnl::reorder(user_memory, *memory_p, attrs_[2 * layer + (dir == R2L)])
.execute(astream, user_memory, *memory_p);

Expand Down Expand Up @@ -516,7 +516,7 @@ class MultiGRUHandler {

auto concat_p = AcquireConcatPrimitive(layer);

dnnl::stream astream(engine_);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
concat_p->execute(astream, concat_args);
astream.wait();
return out_mem;
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx,
x->IsSharedBufferWith(*y) ? src_memory_p : handler.AcquireDstMemory(y);
auto activation_p = handler.AcquireForwardPrimitive();

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_p->execute(astream, {{MKLDNN_ARG_FROM, *src_memory_p},
{MKLDNN_ARG_TO, *dst_memory_p}});
astream.wait();
Expand Down Expand Up @@ -156,7 +156,7 @@ void eltwise_grad(const framework::ExecutionContext &ctx,
auto diff_src_memory_p = handler.AcquireDiffSrcMemory(diff_x);
auto activation_backward_p = handler.AcquireBackwardPrimitive();

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
activation_backward_p->execute(astream,
{{MKLDNN_ARG_SRC, *src_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p},
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
y->set_layout(DataLayout::kMKLDNN);
y->set_format(platform::GetMKLDNNFormat(*dst_memory));

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
batch_norm_p->execute(astream,
{{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_SCALE_SHIFT, *scaleshift_memory},
Expand Down Expand Up @@ -321,7 +321,7 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
// finally create batch_norm backward primitive
auto batch_norm_bwd_p = handler.AcquireBackwardPrimitive();

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
batch_norm_bwd_p->execute(
astream, {{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_MEAN, *mean_memory},
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ class ConcatMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
output->mutable_data<T>(place, concat_pd->dst_desc().get_size()));
}

mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
std::unordered_map<int, memory> args;
for (size_t i = 0; i < multi_input.size(); ++i) {
args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, (*srcs).at(i)});
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
args.insert({MKLDNN_ARG_BIAS, *bias_memory_p});
}

mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
conv_p->execute(astream, args);
astream.wait();

Expand Down Expand Up @@ -553,7 +553,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
conv_p = std::static_pointer_cast<mkldnn::convolution_forward>(
dev_ctx.GetBlob(prim_key));

mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();

if (conv_p == nullptr || !is_test) {
float fuse_alpha = ctx.Attr<float>("fuse_alpha");
Expand Down Expand Up @@ -1045,7 +1045,7 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
user_weights_md, to_void_cast<T>(filter_data));
auto user_diff_dst_memory_p = handler.AcquireDiffDstMemory(
user_diff_dst_md, to_void_cast<T>(output_grad_data));
mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (filter_grad) {
auto src_memory_p = handler.AcquireSrcMemoryFromWeightsPrimitive(
user_src_memory_p, pipeline);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

auto conv_p = handler.AcquireConvolution();

mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (bias) {
const T* bias_data = bias->data<T>();
auto user_bias_md = platform::MKLDNNMemDesc(
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace()));
}

mkldnn::stream astream(engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();

Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class FCPrimitiveFactory {
}

void Execute() {
mkldnn::stream astream(engine_);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (bias_) {
fc_->execute(astream, {{MKLDNN_ARG_SRC, *input_},
{MKLDNN_ARG_WEIGHTS, *weights_},
Expand Down Expand Up @@ -280,7 +280,7 @@ class FCPrimitiveFactory {
auto dst_mem = std::make_shared<memory>(dst_desc, engine_);

auto reorder = mkldnn::reorder(src_mem, *dst_mem);
mkldnn::stream astream(engine_);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();

{
platform::RecordEvent record_reorder("int_reorder",
Expand Down Expand Up @@ -309,7 +309,7 @@ class FCPrimitiveFactory {
attributes.set_output_scales(mask, scale_data);
auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes);

mkldnn::stream astream(engine_);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ class InterpolateMKLDNNKernel : public framework::OpKernel<T> {
auto resampling_prim = handler.AcquireForwardPrimitive();
const std::unordered_map<int, dnnl::memory> args = {
{DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}};
mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
resampling_prim->execute(astream, args);
astream.wait();

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/mkldnn/layer_norm_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class LayerNormMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

auto layer_norm_p = handler.AcquireForwardPrimitive();

dnnl::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
std::unordered_map<int, dnnl::memory> args;

args.insert({DNNL_ARG_SRC, *src_memory});
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/mkldnn/lrn_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class LRNMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto workspace_memory = handler.AcquireWorkspaceMemory(mid);
mid->set_layout(framework::DataLayout::kMKLDNN);

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (!workspace_memory->get_desc().is_zero()) {
mid->set_format(platform::GetMKLDNNFormat(*workspace_memory));
lrn_p->execute(astream, {{MKLDNN_ARG_SRC, *src_memory},
Expand Down Expand Up @@ -118,7 +118,7 @@ class LRNMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {

auto lrn_bwd = handler.AcquireBackwardPrimitive();

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
lrn_bwd->execute(astream, {{MKLDNN_ARG_SRC, *src_memory},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory},
{MKLDNN_ARG_DIFF_SRC, *diff_src_memory},
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class MulPrimitiveFactory {

auto reorder = mkldnn::reorder(reorder_pd);

mkldnn::stream astream(engine_);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
Expand Down Expand Up @@ -184,7 +184,7 @@ class MulPrimitiveFactory {
}

void Execute() {
mkldnn::stream astream(engine_);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
(*mul_).execute(astream, {{MKLDNN_ARG_SRC, *x_input_},
{MKLDNN_ARG_WEIGHTS, *y_input_},
{MKLDNN_ARG_DST, *output_}});
Expand Down Expand Up @@ -270,7 +270,7 @@ class MulPrimitiveFactory {

auto reorder = mkldnn::reorder(src_mem, dst_mem);

mkldnn::stream astream(engine_);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();

{
platform::RecordEvent record_reorder("int_reorder",
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

auto pool_p = handler.AcquireForwardPrimitive();

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
if ((ctx.Attr<bool>("is_test") == false) &&
(ctx.Attr<std::string>("pooling_type") == "max")) {
// Training
Expand Down Expand Up @@ -154,7 +154,7 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {

auto pool_bwd_p = handler.AcquireBackwardPrimitive();

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
if (pooling_type == "max") {
// Max - pooling needs Workspace
auto workspace_memory = handler.AcquireWorkspaceMemory();
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
}
}

mkldnn::stream astream(engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
}
}

dnnl::stream astream(engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel<T> {

auto softmax_p = handler.AcquireForwardPrimitive();

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = paddle::platform::MKLDNNDeviceContext::tls().get_stream();
softmax_p->execute(astream, {{DNNL_ARG_SRC, *softmax_src_memory_p},
{DNNL_ARG_DST, *softmax_dst_memory_p}});
astream.wait();
Expand Down Expand Up @@ -164,7 +164,7 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel<T> {

auto softmax_bwd_p = handler.AcquireBackwardPrimitive();

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
softmax_bwd_p->execute(astream,
{{MKLDNN_ARG_DST, *dst_memory_p},
{MKLDNN_ARG_DIFF_DST, *diff_dst_memory_p},
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
args.insert({MKLDNN_ARG_DST, *dst_mem});

mkldnn::stream astream(dev_ctx.GetEngine());
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
sum_p->execute(astream, args);
astream.wait();

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class TransposeMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto transpose_p = handler.AcquireTranspose(transpose_dst_memory_p,
transpose_src_memory_p);

mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
transpose_p->execute(astream, *transpose_src_memory_p,
*transpose_dst_memory_p);
astream.wait();
Expand Down Expand Up @@ -116,7 +116,7 @@ class TransposeMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto transpose_p = handler.AcquireTranspose(transpose_dst_memory_p,
transpose_src_memory_p);

mkldnn::stream astream(mkldnn_engine);
auto astream = platform::MKLDNNDeviceContext::tls().get_stream();
transpose_p->execute(astream, *transpose_src_memory_p,
*transpose_dst_memory_p);
astream.wait();
Expand Down
Loading