[BUGFIX] Fix AmpCast for float16 (#19749)

* Fix AmpCast for float16 OneDNN doesn't support float16 format so fallback to standard implementation is needed. It fixes issue 19631. * Enable amp_cast test for float16 on CPU context
apache · Feb 5, 2021 · 0a65920 · 0a65920
1 parent 35d5ffe
commit 0a65920
Show file tree

Hide file tree

Showing 3 changed files with 23 additions and 19 deletions.
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -254,6 +254,7 @@ List of Contributors
 * [Joe Evans](https://github.com/josephevans)
 * [Zhaoqi Zhu](https://github.com/zha0q1)
 * [Harshit Sharma](https://github.com/harshitshrma)
+* [Andrzej Kotlowski](https://github.com/anko-intel)
 
 Label Bot
 ---------

diff --git a/src/operator/tensor/amp_cast.cc b/src/operator/tensor/amp_cast.cc
@@ -41,25 +41,29 @@ static void AMPCastExCPU(const nnvm::NodeAttrs& attrs,
   if (req[0] == kWriteInplace) {
     return;
   }
-  mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
   auto data = inputs[0];
-  if (data.IsView() && data.IsMKLDNNData())
-    data = data.Reorder2Default();
-  const auto i_mem = data.GetMKLDNNData();
-  const size_t i_ndim = data.shape().ndim();
-  mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
-  for (size_t i = 0; i < i_ndim; i++) {
-    i_dims[i] = static_cast<int>(data.shape()[i]);
+  if (data.dtype() != mshadow::kFloat16 && outputs[0].dtype() != mshadow::kFloat16) {
+    mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
+    if (data.IsView() && data.IsMKLDNNData())
+      data = data.Reorder2Default();
+    const auto i_mem = data.GetMKLDNNData();
+    const size_t i_ndim = data.shape().ndim();
+    mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
+    for (size_t i = 0; i < i_ndim; i++) {
+      i_dims[i] = static_cast<int>(data.shape()[i]);
+    }
+    const auto o_desc =
+        mkldnn::memory::desc(i_dims, get_mkldnn_type(outputs[0].dtype()),
+                            static_cast<mkldnn::memory::format_tag>(GetDefaultFormat(i_ndim)));
+    const auto out_mem = CreateMKLDNNMem(outputs[0], o_desc, req[0]);
+    mkldnn_args_map_t reorder_args;
+    reorder_args[MKLDNN_ARG_SRC] = *i_mem;
+    reorder_args[MKLDNN_ARG_DST] = *out_mem.second;
+    MKLDNNStream::Get()->RegisterPrimArgs(mkldnn::reorder(*i_mem, *out_mem.second), reorder_args);
+    MKLDNNStream::Get()->Submit();
+    return;
   }
-  const auto o_desc =
-      mkldnn::memory::desc(i_dims, get_mkldnn_type(outputs[0].dtype()),
-                           static_cast<mkldnn::memory::format_tag>(GetDefaultFormat(i_ndim)));
-  const auto out_mem = CreateMKLDNNMem(outputs[0], o_desc, req[0]);
-  mkldnn_args_map_t reorder_args;
-  reorder_args[MKLDNN_ARG_SRC] = *i_mem;
-  reorder_args[MKLDNN_ARG_DST] = *out_mem.second;
-  MKLDNNStream::Get()->RegisterPrimArgs(mkldnn::reorder(*i_mem, *out_mem.second), reorder_args);
-  MKLDNNStream::Get()->Submit();
+  FallBackCompute(AMPCastCompute<cpu>, attrs, ctx, inputs, req, outputs);
 }
 
 inline static bool AMPCastStorageType(const nnvm::NodeAttrs& attrs, const int dev_mask,

diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
@@ -4856,8 +4856,7 @@ def check_cast(op, input_np, expected_output):
                     fp32_val, model_fp16_val, np_fp16_val)
 
     check_cast(mx.sym.Cast, input_np, expected_output)
-    if default_context().device_type == 'gpu':
-        check_cast(mx.sym.amp_cast, input_np, expected_output)
+    check_cast(mx.sym.amp_cast, input_np, expected_output)
 
 
 @with_seed()