Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[master] Implemented oneDNN Backward Adaptive Pooling kernel #20825

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
30886fe
feat: started working on integrating adaptive pooling merge to master
piotrwolinski-intel Nov 8, 2021
c3e8e68
feat: added onednn adaptive pooling to master
piotrwolinski-intel Nov 10, 2021
ce8f001
fix: changed SupportDNNLPooling condition to SupportDNNL in adaptive_…
piotrwolinski-intel Nov 15, 2021
54bc2a0
clang fix
piotrwolinski-intel Nov 15, 2021
8f96f32
changed typo in comment
piotrwolinski-intel Nov 15, 2021
0df5a32
finished adaptive backward pooling on master
piotrwolinski-intel Nov 22, 2021
daaa9d3
Been working on solving issue with backward adaptive pooling
piotrwolinski-intel Dec 13, 2021
d4da970
Unified oneDNN pooling implementation calls
piotrwolinski-intel Dec 13, 2021
3ef827a
Added include for vector
piotrwolinski-intel Dec 13, 2021
f16638a
feat: added onednn adaptive pooling to master
piotrwolinski-intel Nov 10, 2021
c4e173f
fix: changed SupportDNNLPooling condition to SupportDNNL in adaptive_…
piotrwolinski-intel Nov 15, 2021
debb768
finished adaptive backward pooling on master
piotrwolinski-intel Nov 22, 2021
724ee52
Been working on solving issue with backward adaptive pooling
piotrwolinski-intel Dec 13, 2021
5e176d5
Further working on implementing backward propagation to adaptive pooling
piotrwolinski-intel Dec 20, 2021
e2508c9
MXNetError: could not execute a primitive dnnl_pooling-inl.h:248
piotrwolinski-intel Dec 22, 2021
30c52b5
Finished adaptive pooling backward kernel. Only need to remove debug …
piotrwolinski-intel Dec 28, 2021
8954371
Changed 2 template functions for standard ones. Added is_adaptive_poo…
piotrwolinski-intel Dec 29, 2021
1329bf6
Tests tweaks
piotrwolinski-intel Dec 29, 2021
8b95412
Removed unnecessary comments
piotrwolinski-intel Jan 3, 2022
f5f0e01
Added clang style. Changed unnecessary param field for a member funct…
piotrwolinski-intel Jan 3, 2022
2aed1a8
Removed ceil and floor functions that were used on the integer values
piotrwolinski-intel Jan 3, 2022
689fbda
Added more shapes to test
piotrwolinski-intel Jan 3, 2022
177bfc8
Working on adaptive backward pooling test
piotrwolinski-intel Jan 10, 2022
a8bfea1
Changed SupportDNNLAveragePooling function so that calculations are n…
piotrwolinski-intel Jan 10, 2022
592a158
Added convolution before pooling in test_adaptive_pooling in test_dnn…
piotrwolinski-intel Jan 10, 2022
099cba3
Added whitespaces in test_adaptive_pooling
piotrwolinski-intel Jan 11, 2022
20dffdd
Modified SupportDNNLAveragePooling and UseAdaptivePaddingKernel to op…
piotrwolinski-intel Jan 11, 2022
401934a
Added num_filter as a parameter in test_adaptive_pooling
piotrwolinski-intel Jan 12, 2022
5a2ee58
Finished test_adaptive_pooling with added convolution before pooling
piotrwolinski-intel Jan 17, 2022
ecc2a6f
Added formatting
piotrwolinski-intel Jan 17, 2022
2c3c6f7
Added additional include
piotrwolinski-intel Jan 17, 2022
1944422
Fixed path in include
piotrwolinski-intel Jan 17, 2022
7ec285a
Fixed wrong conditions with backward storage type
piotrwolinski-intel Jan 17, 2022
f8a40a8
Changed test_adaptive_pooling to make use of pytest parameters
piotrwolinski-intel Jan 18, 2022
ae4d43a
Removed brackets and unnecessary comments
piotrwolinski-intel Jan 24, 2022
04a89ca
Implemented changes suggested in review
piotrwolinski-intel Feb 15, 2022
7ea5b99
Formatted diff
piotrwolinski-intel Feb 15, 2022
8ea761d
Reformatted diff
piotrwolinski-intel Feb 15, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/operator/contrib/adaptive_avg_pooling-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ inline void AdaptiveAvgPoolOpBackward(const nnvm::NodeAttrs& attrs,
const std::vector<TBlob>& outputs) {
CHECK_EQ(inputs.size(), 1U);
CHECK_EQ(outputs.size(), 1U);

mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
if (IsWriting(req[0])) {
// zero grad before backwarding
Expand All @@ -116,7 +117,7 @@ static bool AdaptiveAvgPoolOpInferShape(const nnvm::NodeAttrs& attrs,
if (mxnet::op::shape_is_none(dshape)) {
return false;
}
if (param.output_size.has_value()) {
if (param.IsAdaptivePooling()) {
if (param.output_size.value().ndim() == 1) {
dshape[2] = param.output_size.value()[0];
dshape[3] = param.output_size.value()[0];
Expand Down
150 changes: 108 additions & 42 deletions src/operator/contrib/adaptive_avg_pooling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@
// #include "elemwise_op_common.h"
#include "../elemwise_op_common.h"
#if MXNET_USE_ONEDNN == 1
#include "../nn/dnnl/dnnl_base-inl.h"
#include "../nn/dnnl/dnnl_pooling-inl.h"
#endif // MXNET_USE_ONEDNN

#define START_IND(a, b, c) static_cast<int>(std::floor(static_cast<float>(a * c) / b))
#define END_IND(a, b, c) static_cast<int>(std::ceil(static_cast<float>((a + 1) * c) / b))
#define DIV_ROUND_UP(a, b) ((a + (b - 1)) / b)

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -169,39 +171,6 @@ void AdaptiveAvgPoolUpdateOutput(mshadow::Stream<cpu>* s,
}
}

template <typename xpu, typename DType, typename AccReal>
void AdaptiveAvgPoolUpdateGradInput(mshadow::Stream<cpu>* s,
const std::vector<TBlob>& input,
const std::vector<TBlob>& output) {
Tensor<xpu, 4, DType> gradOut = input[0].get<xpu, 4, DType>(s);
Tensor<xpu, 4, DType> gradIn = output[0].get<xpu, 4, DType>(s);

DType* gradOutput_data = gradOut.dptr_;
DType* gradInput_data = gradIn.dptr_;

int64_t sizeB = gradIn.size(0);
int64_t sizeD = gradIn.size(1);
int64_t isizeH = gradIn.size(2);
int64_t isizeW = gradIn.size(3);

int64_t osizeH = gradOut.size(2);
int64_t osizeW = gradOut.size(3);

int64_t b;
#pragma omp parallel for private(b) \
num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
for (b = 0; b < sizeB; b++) {
SpatialAdaptiveAveragePooling_updateGradInput_frame<DType>(
gradInput_data + b * sizeD * isizeH * isizeW,
gradOutput_data + b * sizeD * osizeH * osizeW,
sizeD,
isizeH,
isizeW,
osizeH,
osizeW);
}
}

#if MXNET_USE_ONEDNN == 1
bool SupportDNNLAveragePooling(const NDArray& in_data, const NDArray& out_data) {
for (int64_t idx = 2; idx < in_data.shape().ndim(); ++idx) {
Expand All @@ -218,15 +187,44 @@ bool SupportDNNLAveragePooling(const NDArray& in_data, const NDArray& out_data)
const int IW = in_data.shape()[3];
const int OH = out_data.shape()[2];
const int OW = out_data.shape()[3];
const int strides_H = floor((IH << 1) / OH) - floor(IH / OH);
const int strides_W = floor((IW << 1) / OW) - floor(IW / OW);
const int kernel_H = ceil((IH << 1) / OH) - floor(IH / OH);
const int kernel_W = ceil((IW << 1) / OW) - floor(IW / OW);
const int strides_H = ((IH << 1) / OH) - (IH / OH);
const int strides_W = ((IW << 1) / OW) - (IW / OW);
const int kernel_H = DIV_ROUND_UP((IH << 1) / OH, 1) - (IH / OH);
const int kernel_W = DIV_ROUND_UP((IW << 1) / OW, 1) - (IW / OW);
const int pad_l_top = (strides_H * (OH - 1) + kernel_H - IH) / 2;
const int pad_l_left = (strides_W * (OW - 1) + kernel_W - IW) / 2;
return pad_l_top == 0 && pad_l_left == 0;
}

void AdaptiveAvgPoolOpBackwardExCPU(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs) {
CHECK_EQ(inputs.size(), 1U);

if (SupportDNNLAveragePooling(outputs[0], inputs[0])) {
DNNL_OPCHECK_INIT(true, outputs.size(), inputs, outputs);
DNNLRun(DNNLPoolingGradCompute, attrs, ctx, inputs, req, outputs);
DNNL_OPCHECK_RUN(AdaptiveAvgPoolOpBackward<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
FallBackCompute(AdaptiveAvgPoolOpBackward<cpu>, attrs, ctx, inputs, req, outputs);
}

inline static bool BackwardAdaptivePoolingStorageType(const nnvm::NodeAttrs& attrs,
const int dev_mask,
DispatchMode* dispatch_mode,
std::vector<int>* in_attrs,
std::vector<int>* out_attrs) {
CHECK_EQ(in_attrs->size(), 1);
CHECK_EQ(out_attrs->size(), 1);

// support_dnnl is set to true, because at this point there is no way
// to check if DNNLAdaptivePooling is supported
return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
}

void AdaptiveAvgPoolComputeExCPU(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
Expand All @@ -238,18 +236,62 @@ void AdaptiveAvgPoolComputeExCPU(const nnvm::NodeAttrs& attrs,
oneDNN doesn't support adaptive pooling.
Fallback is needed when padding is not equal 0;
*/
const PoolingParam& param = nnvm::get<PoolingParam>(attrs.parsed);
if (SupportDNNL(inputs[0]) && SupportDNNLAveragePooling(inputs[0], outputs[0])) {
const NDArray* workspace = nullptr;
DNNL_OPCHECK_INIT(false, 1, inputs, outputs);
DNNLPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace, true);
DNNLRun(DNNLPoolingCompute, attrs, ctx, inputs, req, outputs);
DNNL_OPCHECK_RUN(PoolingCompute<cpu>, attrs, ctx, inputs, req, outputs);
return;
}
FallBackCompute(AdaptiveAvgPoolOpForward<cpu>, attrs, ctx, inputs, req, outputs);
}

inline static bool AdaptivePoolingStorageType(const nnvm::NodeAttrs& attrs,
const int dev_mask,
DispatchMode* dispatch_mode,
std::vector<int>* in_attrs,
std::vector<int>* out_attrs) {
CHECK_EQ(in_attrs->size(), 1);
CHECK_EQ(out_attrs->size(), 1);

// support_dnnl is set to true, because at this point there is no way
// to check if DNNLAdaptivePooling is supported
return DNNLStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
}
#endif

template <typename xpu, typename DType, typename AccReal>
void AdaptiveAvgPoolUpdateGradInput(mshadow::Stream<cpu>* s,
const std::vector<TBlob>& input,
const std::vector<TBlob>& output) {
Tensor<xpu, 4, DType> gradOut = input[0].get<xpu, 4, DType>(s);
Tensor<xpu, 4, DType> gradIn = output[0].get<xpu, 4, DType>(s);

DType* gradOutput_data = gradOut.dptr_;
DType* gradInput_data = gradIn.dptr_;

int64_t sizeB = gradIn.size(0);
int64_t sizeD = gradIn.size(1);
int64_t isizeH = gradIn.size(2);
int64_t isizeW = gradIn.size(3);

int64_t osizeH = gradOut.size(2);
int64_t osizeW = gradOut.size(3);

int64_t b;
#pragma omp parallel for private(b) \
num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
for (b = 0; b < sizeB; b++) {
SpatialAdaptiveAveragePooling_updateGradInput_frame<DType>(
gradInput_data + b * sizeD * isizeH * isizeW,
gradOutput_data + b * sizeD * osizeH * osizeW,
sizeD,
isizeH,
isizeW,
osizeH,
osizeW);
}
}

NNVM_REGISTER_OP(_contrib_AdaptiveAvgPooling2D)
.describe(R"code(
Applies a 2D adaptive average pooling over a 4D input with the shape of (NCHW).
Expand All @@ -262,25 +304,49 @@ The pooling kernel and stride sizes are automatically chosen for desired output
(N x C x height x width) for any input (NCHW).

)code" ADD_FILELINE)
.set_attr_parser(ParamParser<PoolingParam>)
.set_attr_parser(PoolingParamParser)
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<mxnet::FInferShape>("FInferShape", AdaptiveAvgPoolOpInferShape)
.set_attr<FCompute>("FCompute<cpu>", AdaptiveAvgPoolOpForward<cpu>)
.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseNone{"_backward_contrib_AdaptiveAvgPooling2D"})
#if MXNET_USE_ONEDNN == 1
.set_attr<FInferStorageType>("FInferStorageType", AdaptivePoolingStorageType)
.set_attr<bool>("TIsDNNL", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", AdaptiveAvgPoolComputeExCPU)
#endif
.add_argument("data", "NDArray-or-Symbol", "Input data")
.add_arguments(PoolingParam::__FIELDS__());

NNVM_REGISTER_OP(_backward_contrib_AdaptiveAvgPooling2D)
.set_attr_parser(ParamParser<PoolingParam>)
.set_attr_parser(PoolingParamParser)
.set_num_inputs(1)
.set_num_outputs(1)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
#if MXNET_USE_ONEDNN == 1
.set_attr<FInferStorageType>("FInferStorageType", BackwardAdaptivePoolingStorageType)
// Different backend requires different FInplaceOption
.set_attr<nnvm::FInplaceOption>("FInplaceOption",
[](const NodeAttrs& attrs) {
const PoolingParam& param =
nnvm::get<PoolingParam>(attrs.parsed);
if (DNNLRequireWorkspace(param) && param.IsAdaptivePooling())
return std::vector<std::pair<int, int>>{{1, 0}};
return std::vector<std::pair<int, int>>();
})
.set_attr<FResourceRequest>("FResourceRequest",
[](const NodeAttrs& n) {
return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
})
.set_attr<bool>("TIsDNNL", true)
.set_attr<FComputeEx>("FComputeEx<cpu>", AdaptiveAvgPoolOpBackwardExCPU)
#else
.set_attr<nnvm::FInplaceOption>("FInplaceOption",
[](const NodeAttrs& attrs) {
return std::vector<std::pair<int, int>>();
})
#endif
.set_attr<FCompute>("FCompute<cpu>", AdaptiveAvgPoolOpBackward<cpu>);

} // namespace op
Expand Down
68 changes: 38 additions & 30 deletions src/operator/nn/dnnl/dnnl_pooling-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@

#include <dnnl.hpp>
#include <utility>
#include <vector>

#include "../pooling-inl.h"
#include "./dnnl_base-inl.h"

#define DIV_ROUND_UP(a, b) ((a + (b - 1)) / b)

namespace mxnet {
namespace op {

Expand All @@ -54,7 +57,8 @@ class DNNLPoolingFwd {
void Execute(const NDArray& in_data,
const OpReqType req,
const NDArray& out_data,
const NDArray* workspace);
const NDArray* workspace,
const bool use_adaptive_pooling);

private:
bool with_workspace_;
Expand Down Expand Up @@ -92,19 +96,19 @@ void UseAdaptivePaddingKernel(T* kernel,
T* strides,
T* pad_l,
T* pad_r,
const NDArray& in_data,
const NDArray& out_data) {
const int IH = in_data.shape()[2];
const int IW = in_data.shape()[3];
const int OH = out_data.shape()[2];
const int OW = out_data.shape()[3];

strides->at(0) = floor((IH << 1) / OH) - floor(IH / OH);
strides->at(1) = floor((IW << 1) / OW) - floor(IW / OW);
kernel->at(0) = ceil((IH << 1) / OH) - floor(IH / OH);
kernel->at(1) = ceil((IW << 1) / OW) - floor(IW / OW);
pad_l->at(0) = (strides->at(0) * (OH - 1) + kernel->at(0) - IH) >> 1;
pad_l->at(1) = (strides->at(1) * (OW - 1) + kernel->at(1) - IW) >> 1;
const mxnet::TShape& input_shape,
const mxnet::TShape& output_shape) {
const int IH = input_shape[2];
const int IW = input_shape[3];
const int OH = output_shape[2];
const int OW = output_shape[3];

strides->at(0) = ((IH << 1) / OH) - (IH / OH);
strides->at(1) = ((IW << 1) / OW) - (IW / OW);
kernel->at(0) = DIV_ROUND_UP((IH << 1) / OH, 1) - (IH / OH);
kernel->at(1) = DIV_ROUND_UP((IW << 1) / OW, 1) - (IW / OW);
pad_l->at(0) = (strides->at(0) * (OH - 1) + kernel->at(0) - IH) / 2;
pad_l->at(1) = (strides->at(1) * (OW - 1) + kernel->at(1) - IW) / 2;
}

inline int GetPaddingSizeFull(dim_t x, int padl, int padr, int k, int s) {
Expand Down Expand Up @@ -168,31 +172,35 @@ inline bool SupportDNNLPooling(const PoolingParam& param, const NDArray& input)
}

inline bool DNNLRequireWorkspace(const PoolingParam& param) {
return param.pool_type != pool_enum::kAvgPooling;
return param.pool_type != pool_enum::kAvgPooling && !param.IsAdaptivePooling();
}

typedef ParamOpSign<PoolingParam> DNNLPoolingSignature;
void DNNLPoolingCompute(const OpContext& ctx,
const PoolingParam& param,
const NDArray& in_data,
const OpReqType req,
const NDArray& out_data,
const NDArray* workspace,
const bool use_adaptive_pooling);

void DNNLPoolingGradCompute(const OpContext& ctx,
const PoolingParam& param,
const NDArray& out_grad,
const NDArray& in_data,
const NDArray* workspace,
const OpReqType req,
const NDArray& in_grad);

DNNLPoolingFwd& GetPoolingFwd(const PoolingParam& param,
const bool is_train,
const NDArray& data,
const NDArray& output,
const bool use_adaptive_pooling);

DNNLPoolingBwd& GetPoolingBwd(const PoolingParam& param,
const NDArray& in_data,
const NDArray& in_grad,
const NDArray& out_grad,
const bool use_adaptive_pooling);

void DNNLPoolingGradCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& outputs);

void DNNLPoolingCompute(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
const std::vector<NDArray>& in_data,
const std::vector<OpReqType>& req,
const std::vector<NDArray>& out_data);

} // namespace op
} // namespace mxnet
#endif // MXNET_USE_ONEDNN == 1
Expand Down
Loading