Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[BUGFIX] Fix AmpCast for float16 (#19749)
Browse files Browse the repository at this point in the history
* Fix AmpCast for float16

OneDNN doesn't support float16 format so fallback to standard
implementation is needed.
It fixes issue 19631.

* Enable amp_cast test for float16 on CPU context
  • Loading branch information
anko-intel authored Feb 5, 2021
1 parent 35d5ffe commit 0a65920
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 19 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ List of Contributors
* [Joe Evans](https://github.com/josephevans)
* [Zhaoqi Zhu](https://github.com/zha0q1)
* [Harshit Sharma](https://github.com/harshitshrma)
* [Andrzej Kotlowski](https://github.com/anko-intel)

Label Bot
---------
Expand Down
38 changes: 21 additions & 17 deletions src/operator/tensor/amp_cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,29 @@ static void AMPCastExCPU(const nnvm::NodeAttrs& attrs,
if (req[0] == kWriteInplace) {
return;
}
mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
auto data = inputs[0];
if (data.IsView() && data.IsMKLDNNData())
data = data.Reorder2Default();
const auto i_mem = data.GetMKLDNNData();
const size_t i_ndim = data.shape().ndim();
mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
for (size_t i = 0; i < i_ndim; i++) {
i_dims[i] = static_cast<int>(data.shape()[i]);
if (data.dtype() != mshadow::kFloat16 && outputs[0].dtype() != mshadow::kFloat16) {
mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
if (data.IsView() && data.IsMKLDNNData())
data = data.Reorder2Default();
const auto i_mem = data.GetMKLDNNData();
const size_t i_ndim = data.shape().ndim();
mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
for (size_t i = 0; i < i_ndim; i++) {
i_dims[i] = static_cast<int>(data.shape()[i]);
}
const auto o_desc =
mkldnn::memory::desc(i_dims, get_mkldnn_type(outputs[0].dtype()),
static_cast<mkldnn::memory::format_tag>(GetDefaultFormat(i_ndim)));
const auto out_mem = CreateMKLDNNMem(outputs[0], o_desc, req[0]);
mkldnn_args_map_t reorder_args;
reorder_args[MKLDNN_ARG_SRC] = *i_mem;
reorder_args[MKLDNN_ARG_DST] = *out_mem.second;
MKLDNNStream::Get()->RegisterPrimArgs(mkldnn::reorder(*i_mem, *out_mem.second), reorder_args);
MKLDNNStream::Get()->Submit();
return;
}
const auto o_desc =
mkldnn::memory::desc(i_dims, get_mkldnn_type(outputs[0].dtype()),
static_cast<mkldnn::memory::format_tag>(GetDefaultFormat(i_ndim)));
const auto out_mem = CreateMKLDNNMem(outputs[0], o_desc, req[0]);
mkldnn_args_map_t reorder_args;
reorder_args[MKLDNN_ARG_SRC] = *i_mem;
reorder_args[MKLDNN_ARG_DST] = *out_mem.second;
MKLDNNStream::Get()->RegisterPrimArgs(mkldnn::reorder(*i_mem, *out_mem.second), reorder_args);
MKLDNNStream::Get()->Submit();
FallBackCompute(AMPCastCompute<cpu>, attrs, ctx, inputs, req, outputs);
}

inline static bool AMPCastStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask,
Expand Down
3 changes: 1 addition & 2 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4856,8 +4856,7 @@ def check_cast(op, input_np, expected_output):
fp32_val, model_fp16_val, np_fp16_val)

check_cast(mx.sym.Cast, input_np, expected_output)
if default_context().device_type == 'gpu':
check_cast(mx.sym.amp_cast, input_np, expected_output)
check_cast(mx.sym.amp_cast, input_np, expected_output)


@with_seed()
Expand Down

0 comments on commit 0a65920

Please sign in to comment.