[BYOC][ACL] Add maximum support for float32 (#6506)

* ACL integration: add maximum support for float32. * Added the code generation flow in arm_compute_lib.py * Added the runtime calls in acl_runtime.cc Change-Id: I69c5522f05a46c1dd235da5d57fe499134de0425 * Add maximum to the list of supported functions Change-Id: Ia49087756be4c3ac92a3dc76fe03fb00de468f8d
apache · Sep 23, 2020 · b6e6395 · b6e6395
1 parent 56b18ec
commit b6e6395
Show file tree

Hide file tree

Showing 4 changed files with 132 additions and 1 deletion.
diff --git a/docs/deploy/arm_compute_lib.rst b/docs/deploy/arm_compute_lib.rst
@@ -232,6 +232,8 @@ Operator support
 +----------------------+-------------------------------------------------------------------------+
 | reshape              | fp32, uint8                                                             |
 +----------------------+-------------------------------------------------------------------------+
+| maximum              | fp32                                                                    |
++----------------------+-------------------------------------------------------------------------+
 
 .. note::
     A composite operator is a series of operators that map to a single Arm Compute Library operator. You can view this

diff --git a/python/tvm/relay/op/contrib/arm_compute_lib.py b/python/tvm/relay/op/contrib/arm_compute_lib.py
@@ -337,3 +337,11 @@ def global_avg_pool2d(attrs, args):
     if attrs.layout != "NHWC":
         return False
     return True
+
+
+@tvm.ir.register_op_attr("maximum", "target.arm_compute_lib")
+def maximum(attrs, args):
+    """Check if the external ACL codegen for maximum should be used."""
+    type_a = args[0].checked_type
+    type_b = args[0].checked_type
+    return (type_a.dtype == "float32") and (type_b.dtype == "float32")
diff --git a/src/runtime/contrib/arm_compute_lib/acl_runtime.cc b/src/runtime/contrib/arm_compute_lib/acl_runtime.cc
@@ -31,6 +31,7 @@
 #ifdef TVM_GRAPH_RUNTIME_ARM_COMPUTE_LIB
 #include <arm_compute/core/Types.h>
 #include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h>
+#include <arm_compute/runtime/NEON/functions/NEElementwiseOperations.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h>
@@ -139,12 +140,13 @@ class ACLRuntime : public JSONRuntimeBase {
           CreateGlobalPoolingLayer(&layer_, node);
         } else if ("reshape" == op_name) {
           CreateReshapeLayer(&layer_, node);
+        } else if ("maximum" == op_name) {
+          CreateMaximumLayer(&layer_, node);
         } else {
           LOG(FATAL) << "Unsupported op: " << op_name;
         }
       }
     }
-
     this->layer_.function->prepare();
     if (num_pools > 0) mm->populate(this->allocator_, num_pools);
   }
@@ -401,6 +403,21 @@ class ACLRuntime : public JSONRuntimeBase {
     layer->function = function;
   }
 
+  /*!
+   * \brief Create a maximum layer.
+   *
+   * \param layer The ACL layer to build. Containing inputs, outputs and the ACL function.
+   * \param node The JSON representation of the operator.
+   */
+  void CreateMaximumLayer(CachedLayer* layer, const JSONGraphNode& node) {
+    layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[0]));
+    layer->inputs.push_back(MakeACLTensorFromJSONEntry(node.GetInputs()[1]));
+    layer->outputs.push_back(MakeACLTensorFromJSONNode(node));
+    auto function = std::make_shared<arm_compute::NEElementwiseMax>();
+    function->configure(&layer->inputs[0], &layer->inputs[1], &layer->outputs[0]);
+    layer->function = function;
+  }
+
   /*! \brief Allow ACL functions to request auxiliary memory from TVM. */
   ACLAllocator allocator_;
   /*!

diff --git a/tests/python/contrib/test_arm_compute_lib/test_maximum.py b/tests/python/contrib/test_arm_compute_lib/test_maximum.py
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Arm Compute Library integration reshape tests."""
+
+import numpy as np
+
+import tvm
+from tvm import relay
+
+from .infrastructure import (
+    skip_runtime_test,
+    skip_codegen_test,
+    build_and_run,
+    verify,
+    verify_codegen,
+)
+from .infrastructure import Device
+
+
+def _get_model(input_shape, dtype, var_names):
+    """Return a model and any parameters it may have."""
+    a = relay.var(next(var_names), shape=input_shape, dtype=dtype)
+    b = relay.var(next(var_names), shape=input_shape, dtype=dtype)
+    max = relay.maximum(a, b)
+    return max
+
+
+def _get_expected_codegen(shape, dtype):
+    node = {
+        "op": "kernel",
+        "name": "maximum",
+        "inputs": [[0, 0, 0], [1, 0, 0]],
+        "attrs": {
+            "num_inputs": "2",
+            "num_outputs": "1",
+            "shape": [[list(shape)]],
+            "dtype": [[dtype]],
+        },
+    }
+
+    inputs = [
+        {"op": "input", "name": "", "attrs": {"shape": [[list(shape)]], "dtype": [[dtype]]}},
+        {"op": "input", "name": "", "attrs": {"shape": [[list(shape)]], "dtype": [[dtype]]}},
+    ]
+    inputs.append(node)
+    return inputs
+
+
+def test_maximum():
+    Device.load("test_config.json")
+
+    if skip_runtime_test():
+        return
+
+    device = Device()
+    np.random.seed(0)
+
+    for dtype, low, high, atol, rtol in [
+        ("float32", -127, 128, 0.001, 0.001),
+        ("float32", -1, 1, 0.001, 0.001),
+    ]:
+        inputs = {
+            "a": tvm.nd.array(np.random.uniform(low, high, (100, 100)).astype(dtype)),
+            "b": tvm.nd.array(np.random.uniform(low, high, (100, 100)).astype(dtype)),
+        }
+        outputs = []
+        func = _get_model(inputs["a"].shape, dtype, iter(inputs))
+
+        for acl in [False, True]:
+            outputs.append(build_and_run(func, inputs, 1, None, device, enable_acl=acl)[0])
+
+        verify(outputs, atol=1e-7, rtol=1e-7)
+
+
+def test_codegen_maximum():
+    if skip_codegen_test():
+        return
+
+    shape = (100, 100)
+    inputs = {"a", "b"}
+    for dtype in ["float32"]:
+        args = (shape, dtype)
+        func = _get_model(*args, iter(inputs))
+        exp_codegen = _get_expected_codegen(*args)
+        verify_codegen(func, exp_codegen, 1)
+
+
+if __name__ == "__main__":
+    test_maximum()
+    test_codegen_maximum()