Arm(R) Ethos(TM)-U NPU codegen integration

This commit integrates the codegen for Arm® Ethos™-U. * Adding Conv2D tests and a mobilenet_v1 conv2d offload test. Co-authored-by: Grant Watson <grant.watson@arm.com> Co-authored-by: Leandro Nunes <leandro.nunes@arm.com> Co-authored-by: Christopher Sidebottom <chris.sidebottom@arm.com> Change-Id: Iff3de833842be05ce3d3525efd8f9a301c8fbd27
apache · Sep 13, 2021 · 5349732 · 5349732
1 parent 1a1ee1f
commit 5349732
Show file tree

Hide file tree

Showing 17 changed files with 1,305 additions and 48 deletions.
diff --git a/python/tvm/relay/backend/contrib/ethosu/__init__.py b/python/tvm/relay/backend/contrib/ethosu/__init__.py
@@ -19,6 +19,7 @@
 from . import legalize
 from . import preprocess
 from . import errors
+from . import codegen
 from . import vela_api
 from . import tir_to_cs_translator
 from .util import partition_for_ethosu
diff --git a/python/tvm/relay/backend/contrib/ethosu/codegen.py b/python/tvm/relay/backend/contrib/ethosu/codegen.py
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Codegen for Arm(R) Ethos(TM)-U"""
+import tvm
+from tvm import relay
+from tvm.relay.backend.contrib.ethosu.tir.compiler import lower_to_tir
+from tvm.relay.backend.contrib.ethosu.tir.scheduler import copy_constants
+from tvm.relay.backend.contrib.ethosu.legalize import LegalizeEthosU
+from tvm.relay.backend.contrib.ethosu import tir_to_cs_translator
+from tvm.relay.backend.contrib.ethosu import util
+
+
+@tvm._ffi.register_func("relay.ext.ethosu.constant_updater")
+def constant_updater(expr, symbol):  # pylint: disable=unused-argument
+    """
+    We dont want the build process to extract constants to be loaded in
+    the runtime as we are embedding them inside the C runtime.Module.
+    """
+    return dict()
+
+
+@tvm._ffi.register_func("relay.ext.ethosu")
+def ethosu_compiler(ref):
+    """Main function to a compile a given relay function of
+    NPU compatible operators to generated command stream.
+    Such generated command stream would be loaded to the runtime
+    module that interfaces with NPU driver.
+    """
+    assert isinstance(ref, tvm.ir.function.BaseFunc)
+    func_name = ref.attrs["global_symbol"]
+    # There should only be a single input
+    assert len(ref.params) == 1
+    input_size = util.calculate_size_bytes(ref.params[0])
+    output_size = util.calculate_size_bytes(ref.body)
+    cmms, encoded_constants, scratch_size = _compile(ref)
+    ethosu_runtime = tvm._ffi.get_global_func("runtime.module.ethosu.create")
+    return ethosu_runtime(func_name, cmms, encoded_constants, scratch_size, input_size, output_size)
+
+
+def _compile(ext_func):
+    """
+    This is the main wrapper that accepts an external
+    relay function and runs all the passes to lower it down
+    to command stream
+    Parameters
+    ----------
+    ext_func : tvm.relay.function.Function
+        The partitioned relay function
+    Returns
+    -------
+    cs : str
+        An hex string of the bytes of command stream
+    encoded_constants : str
+        An hex string of the bytes that includes concat'd
+        encoded weights, encoded biases and scales.
+    scratch_size : int
+        The size of the scratch buffer needed.
+    """
+    mod = tvm.IRModule()
+    mod["main"] = ext_func
+    mod = LegalizeEthosU()(mod)
+    mod = relay.transform.InferType()(mod)
+    # We are currently using copy_constants scheduler In the long run,
+    # this should be a single intelligent and a composite scheduler
+    # that can perform scheduling based on user inputs such as
+    # scratch memory size.
+    tir_mod, params = lower_to_tir(mod["main"], copy_constants())
+    cmms, encoded_constants, scratch_size = tir_to_cs_translator.translate(tir_mod, params)
+    return cmms, encoded_constants, scratch_size
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -221,3 +221,9 @@ def transform_module(
         mod = LegalizeSplit()(mod)
         mod = LegalizeEthosUConv2D()(mod)
         return mod
+
+    def __call__(self, *args, **kwargs):
+        # pylint is unable figure out the decorated
+        # class is callable, thus adding this to
+        # suppress the warning.
+        pass
diff --git a/python/tvm/relay/backend/contrib/ethosu/util.py b/python/tvm/relay/backend/contrib/ethosu/util.py
@@ -197,3 +197,15 @@ def get_dim_value(layout: str, dim: int):
         if dim_char == dim:
             return idx
     return None
+
+
+def calculate_size_bytes(expr):
+    """This is a helper function to calculate the number
+    of bytes required to hold the tensor/relay.expr"""
+    try:
+        type_info = np.iinfo(expr.checked_type.dtype)
+    except ValueError:
+        type_info = np.finfo(expr.checked_type.dtype)
+    element_size = type_info.bits // 8
+    elements = np.prod(list(expr.checked_type.shape))
+    return element_size * elements
diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc
@@ -650,7 +650,6 @@ class AOTExecutorCodegen : public MixedModeVisitor {
     // Apply storage rewrite pass to the runner function to do memory planning
     auto storage_rewrite = tir::transform::StorageRewrite();
     mod_run = storage_rewrite(mod_run);
-
     // The workspace for main function should be calculated after performing storage_rewrite for
     // the top level TIR function.
     auto workspace_byte_alignment =