This repository has been archived by the owner on Nov 17, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MKLDNN] add quantized sum #14614
Merged
Merged
[MKLDNN] add quantized sum #14614
Changes from 24 commits
Commits
Show all changes
32 commits
Select commit
Hold shift + click to select a range
d928ef4
add quantized sum
rongzha1 45d831f
fix gpu compiler error and cpu testcase fail
rongzha1 fe60be3
add default forward function for quantized_sum
rongzha1 b90de11
skip quantized_sum for gpu ctx
rongzha1 b2c6b07
fix comments
rongzha1 18c7283
fix indetation and comments
rongzha1 659a002
retrigger CI
rongzha1 1f20274
Merge remote-tracking branch 'origin/master' into rong_int8_pr
rongzha1 e8e580b
alloc memeory through TmpMemMgr
rongzha1 c96103f
fix comments Apr.12
triplekings 4a4556b
change sum to elemwise_add
rongzha1 f156005
change Sum to ElemwiseAdd
rongzha1 55b0103
fix indents
rongzha1 f51d055
fix conflict
rongzha1 3a794c4
retrigger CI
rongzha1 5679389
Merge remote-tracking branch 'origin/master' into rong_int8_pr
rongzha1 11a6206
Merge remote-tracking branch 'origin' into rong_int8_pr
triplekings 4ddf2c7
trigger CI
rongzha1 4e5b586
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 a444555
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 89c30a3
fix indentation and typo
rongzha1 9cb8bbe
trigger CI
rongzha1 e55b27b
fix typo
rongzha1 fa3d1e4
fix typo
rongzha1 11cd34a
remove USE_MKLDNN macro for requantize params
rongzha1 c18eeec
rename param same as its op
rongzha1 c3ef05d
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 45d914a
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 34bec4d
trigger CI
rongzha1 3d5c2e7
Merge remote-tracking branch 'origin' into rong_int8_pr
rongzha1 440a7a5
trigger CI
rongzha1 3e6762e
trigger CI
rongzha1 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
60 changes: 60 additions & 0 deletions
60
src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add-inl.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
/*! | ||
* \file mkldnn_quantized_elemwise_add-inl.h | ||
* \brief | ||
* \author Rong Zhang | ||
*/ | ||
|
||
#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZED_ELEMWISE_ADD_INL_H_ | ||
#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZED_ELEMWISE_ADD_INL_H_ | ||
#if MXNET_USE_MKLDNN == 1 | ||
|
||
#include "../../tensor/elemwise_unary_op.h" | ||
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
struct RequantizeElemwiseAddParam : public dmlc::Parameter<RequantizeElemwiseAddParam> { | ||
dmlc::optional<float> min_calib_range; | ||
dmlc::optional<float> max_calib_range; | ||
DMLC_DECLARE_PARAMETER(RequantizeElemwiseAddParam) { | ||
DMLC_DECLARE_FIELD(min_calib_range) | ||
.set_default(dmlc::optional<float>()) | ||
.describe("The minimum scalar value in the form of float32 obtained " | ||
"through calibration. If present, it will be used to requantize the " | ||
"int8 output data."); | ||
DMLC_DECLARE_FIELD(max_calib_range) | ||
.set_default(dmlc::optional<float>()) | ||
.describe("The maximum scalar value in the form of float32 obtained " | ||
"through calibration. If present, it will be used to requantize the " | ||
"int8 output data."); | ||
} | ||
}; | ||
|
||
namespace quantized_elemwise_add_enum { | ||
enum QuantizedElemwiseAddOutputs { kOut, kMin, kMax }; | ||
enum QuantizedElemwiseAddInputs { kDataA, kDataB, kAMin, kAMax, kBMin, kBMax}; | ||
} | ||
|
||
} // namespace op | ||
} // namespace mxnet | ||
|
||
#endif // MXNET_USE_MKLDNN == 1 | ||
#endif // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZED_ELEMWISE_ADD_INL_H_ |
206 changes: 206 additions & 0 deletions
206
src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
/*! | ||
* Copyright (c) 2019 by Contributors | ||
* \file mkldnn_quantized_elemwise_add.cc | ||
* \brief | ||
*/ | ||
|
||
#if MXNET_USE_MKLDNN == 1 | ||
#include "./mkldnn_quantized_elemwise_add-inl.h" | ||
#include "../../nn/mkldnn/mkldnn_ops-inl.h" | ||
#include "../../nn/mkldnn/mkldnn_base-inl.h" | ||
#include "../quantization_utils.h" | ||
|
||
namespace mxnet { | ||
namespace op { | ||
|
||
DMLC_REGISTER_PARAMETER(RequantizeElemwiseAddParam); | ||
|
||
static inline float GetScale(const NDArray& data, float min, float max) { | ||
auto data_range = (data.dtype() == mshadow::kInt8) ? kInt8Range : kUint8Range; | ||
return data_range / MaxAbs(min, max); | ||
} | ||
|
||
static void MKLDNNQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs, const OpContext& ctx, | ||
const std::vector<NDArray>& in_data, | ||
const std::vector<OpReqType>& req, | ||
const std::vector<NDArray>& out_data) { | ||
const RequantizeElemwiseAddParam& params = nnvm::get<RequantizeElemwiseAddParam>(attrs.parsed); | ||
// A, B, A_min, A_max, B_min, B_max | ||
CHECK_EQ(in_data.size(), 6U) << "should be A, B, A_min, A_max, B_min, B_max"; | ||
// C, C_min, C_max | ||
CHECK_EQ(out_data.size(), 3U) << "should be C, C_min, C_max"; | ||
// Collect data min,max,absmax | ||
const float dataA_min = in_data[quantized_elemwise_add_enum::kAMin].data().dptr<float>()[0]; | ||
const float dataB_min = in_data[quantized_elemwise_add_enum::kBMin].data().dptr<float>()[0]; | ||
const float dataA_max = in_data[quantized_elemwise_add_enum::kAMax].data().dptr<float>()[0]; | ||
const float dataB_max = in_data[quantized_elemwise_add_enum::kBMax].data().dptr<float>()[0]; | ||
const float dataA_absmax = MaxAbs(dataA_min, dataA_max); | ||
const float dataB_absmax = MaxAbs(dataB_min, dataB_max); | ||
|
||
auto dataA_mem = in_data[quantized_elemwise_add_enum::kDataA].GetMKLDNNData(); | ||
auto dataB_mem = in_data[quantized_elemwise_add_enum::kDataB].GetMKLDNNData(); | ||
const bool is_dataA_int8 = (in_data[quantized_elemwise_add_enum::kDataA].dtype() | ||
== mshadow::kInt8); | ||
const size_t dataA_range = is_dataA_int8 ? kInt8Range : kUint8Range; | ||
|
||
const float A_scale = GetScale(in_data[quantized_elemwise_add_enum::kDataA], | ||
dataA_min, | ||
dataA_max); | ||
const float B_scale = GetScale(in_data[quantized_elemwise_add_enum::kDataB], | ||
dataB_min, | ||
dataB_max); | ||
// rescaled_mem is for reorder mkldnn memory | ||
mkldnn::memory *rescaled_mem; | ||
|
||
// output default set as int32 | ||
size_t output_data_range = kInt32Range; | ||
auto output_data_type = mkldnn::memory::s32; | ||
// dataA && dataB are uint8 | ||
if (out_data[quantized_elemwise_add_enum::kOut].dtype() == mshadow::kInt8) { | ||
output_data_range = kInt8Range; | ||
output_data_type = mkldnn::memory::s8; | ||
} else if (out_data[quantized_elemwise_add_enum::kOut].dtype() == mshadow::kUint8) { | ||
output_data_range = kUint8Range; | ||
output_data_type = mkldnn::memory::u8; | ||
} else { | ||
output_data_range = kInt32Range; | ||
output_data_type = mkldnn::memory::s32; | ||
} | ||
|
||
float output_min = 0; | ||
float output_max = 0; | ||
float out_data_scale = 0; | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
output_min = params.min_calib_range.value(); | ||
output_max = params.max_calib_range.value(); | ||
out_data_scale = output_data_range / MaxAbs(output_min, output_max); | ||
} else { | ||
output_max = dataA_absmax + dataB_absmax; | ||
output_min = -output_max; | ||
} | ||
// 2: scale 0 for dataA, scale 1 for data B | ||
const int scales_num = 2; | ||
std::vector<float> scales(scales_num, 1); | ||
if (in_data[quantized_elemwise_add_enum::kDataA].dtype() | ||
!= in_data[quantized_elemwise_add_enum::kDataB].dtype()) { | ||
auto s8_pd = (is_dataA_int8 == true) | ||
? dataA_mem->get_primitive_desc() | ||
: dataB_mem->get_primitive_desc(); | ||
rescaled_mem = TmpMemMgr::Get()->Alloc(s8_pd); | ||
float u8_reorder_scale = 0; | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
if (is_dataA_int8 == true) { | ||
u8_reorder_scale = out_data_scale / B_scale; | ||
scales[0] = out_data_scale / A_scale; | ||
} else { | ||
u8_reorder_scale = out_data_scale / A_scale; | ||
scales[1] = out_data_scale / B_scale; | ||
} | ||
} else { | ||
// x*dataA_absmax/dataA_range = y*(dataA_absmax+dataB_absmax)/output_range | ||
if (is_dataA_int8 == true) { | ||
u8_reorder_scale = dataB_absmax * output_data_range | ||
/ ((dataA_absmax + dataB_absmax) * kUint8Range); | ||
scales[0] = dataA_absmax * output_data_range | ||
/ ((dataA_absmax + dataB_absmax) * dataA_range); | ||
} else { | ||
u8_reorder_scale = dataA_absmax * output_data_range | ||
/ ((dataA_absmax + dataB_absmax) * dataA_range); | ||
scales[1] = dataB_absmax * output_data_range | ||
/ ((dataA_absmax + dataB_absmax) * kInt8Range); | ||
} | ||
} | ||
std::vector<float> reorder_scale = {u8_reorder_scale}; | ||
primitive_attr reorder_attr; | ||
reorder_attr.set_int_output_round_mode(round_mode::round_nearest); | ||
reorder_attr.set_output_scales(0, reorder_scale); | ||
auto u8_mem = (is_dataA_int8 == true) ? dataB_mem : dataA_mem; | ||
const auto reorder_pd = mkldnn::reorder::primitive_desc(u8_mem->get_primitive_desc(), | ||
s8_pd, | ||
reorder_attr); | ||
MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *u8_mem, *rescaled_mem)); | ||
|
||
if (is_dataA_int8 == true) { | ||
dataB_mem = rescaled_mem; | ||
} else { | ||
dataA_mem = rescaled_mem; | ||
} | ||
} else { | ||
// same data type and has same data range | ||
if (params.max_calib_range.has_value() && params.min_calib_range.has_value()) { | ||
scales[0] = out_data_scale / A_scale; | ||
scales[1] = out_data_scale / B_scale; | ||
} else { | ||
scales[0] = dataA_absmax * output_data_range / ((dataA_absmax + dataB_absmax) * dataA_range); | ||
scales[1] = dataB_absmax * output_data_range / ((dataA_absmax + dataB_absmax) * dataA_range); | ||
} | ||
} | ||
|
||
std::vector<mkldnn::primitive::at> in_prims; | ||
std::vector<mkldnn::memory::primitive_desc> in_pds; | ||
in_prims.push_back(*dataA_mem); | ||
in_prims.push_back(*dataB_mem); | ||
in_pds.push_back(dataA_mem->get_primitive_desc()); | ||
in_pds.push_back(dataB_mem->get_primitive_desc()); | ||
size_t i_ndim = in_data[quantized_elemwise_add_enum::kDataA].shape().ndim(); | ||
mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim); | ||
for (size_t i = 0; i < i_ndim; i++) { | ||
i_dims[i] = static_cast<int>(in_data[quantized_elemwise_add_enum::kDataA].shape()[i]); | ||
} | ||
mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>( | ||
in_pds[quantized_elemwise_add_enum::kDataA].desc().data.format); | ||
auto output_desc = mkldnn::memory::desc(i_dims, output_data_type, i_fmt); | ||
mkldnn::sum::primitive_desc pdesc(output_desc, scales, in_pds); | ||
auto mem = CreateMKLDNNMem(out_data[quantized_elemwise_add_enum::kOut], | ||
pdesc.dst_primitive_desc(), | ||
req[0], | ||
&in_data[0]); | ||
MKLDNNStream *stream = MKLDNNStream::Get(); | ||
stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second)); | ||
CommitOutput(out_data[quantized_elemwise_add_enum::kOut], mem); | ||
stream->Submit(); | ||
|
||
out_data[quantized_elemwise_add_enum::kMin].data().dptr<float>()[0] = output_min; | ||
out_data[quantized_elemwise_add_enum::kMax].data().dptr<float>()[0] = output_max; | ||
} | ||
|
||
inline static bool ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask, | ||
DispatchMode* dispatch_mode, std::vector<int>* in_attrs, | ||
std::vector<int>* out_attrs) { | ||
// Check num of inputs: A, B, A_min, A_max, B_min, B_max | ||
CHECK_EQ(in_attrs->size(), 6U); | ||
// Check num of outputs: C, C_min, C_max | ||
CHECK_EQ(out_attrs->size(), 3U); | ||
|
||
return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs); | ||
} | ||
|
||
NNVM_REGISTER_OP(_contrib_quantized_elemwise_add) | ||
.set_attr<FInferStorageType>("FInferStorageType", ElemwiseAddStorageType) | ||
.set_attr<FComputeEx>("FComputeEx<cpu>", MKLDNNQuantizedElemwiseAddForward) | ||
.set_attr<bool>("TIsMKLDNN", true) | ||
.set_attr_parser(ParamParser<RequantizeElemwiseAddParam>) | ||
.add_arguments(RequantizeElemwiseAddParam::__FIELDS__()); | ||
} // namespace op | ||
} // namespace mxnet | ||
|
||
#endif // MXNET_USE_MKLDNN == 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's
quantize
in the operator name butrequantize
in the param name. Is it intentional?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. this is for fusion with requantized