[ETHOSN] Update driver stack version to 22.11 (#13637)

-New process memory allocator is used to create buffers and networks. -Support for 22.08 stack has been kept intact in the sources and tests until new docker image is built and used. -Tests were modified to meet limitations imposed on input zero point and kernel size by NPU software. -Removed defining ETHON_API_VERSION from cmake infra.
apache · Dec 20, 2022 · 5019dce · 5019dce
1 parent 26a205c
commit 5019dce
Show file tree

Hide file tree

Showing 12 changed files with 204 additions and 29 deletions.
diff --git a/cmake/utils/FindEthosN.cmake b/cmake/utils/FindEthosN.cmake
@@ -58,18 +58,6 @@ macro(find_ethosn use_ethosn)
       PATHS ${__ethosn_stack}/lib)
     find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport)
 
-    list(GET ETHOSN_INCLUDE_DIRS 0 filename)
-    set(filename "${filename}/ethosn_support_library/Support.hpp")
-    file(READ ${filename} ETHOSN_SUPPORT_H)
-    string(REGEX MATCH "VERSION_MAJOR ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
-    set(ver_major ${CMAKE_MATCH_1})
-    string(REGEX MATCH "VERSION_MINOR ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
-    set(ver_minor ${CMAKE_MATCH_1})
-    string(REGEX MATCH "VERSION_PATCH ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
-    set(ver_patch ${CMAKE_MATCH_1})
-    set(ETHOSN_PACKAGE_VERSION "${ver_major}.${ver_minor}.${ver_patch}")
-    set(ETHOSN_DEFINITIONS -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})
-
     # Runtime hardware support. Driver library also needed for
     # test support.
     find_path(_DL_DIR NAMES Network.hpp
@@ -81,9 +69,7 @@ macro(find_ethosn use_ethosn)
       PATHS ${__ethosn_stack}/lib)
     find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver)
     if(${USE_ETHOSN_HW} MATCHES ${IS_TRUE_PATTERN})
-      set(ETHOSN_DEFINITIONS -DETHOSN_HW -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})
-    else()
-      set(ETHOSN_DEFINITIONS -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})
+      set(ETHOSN_DEFINITIONS -DETHOSN_HW)
     endif()
 
     if(ETHOSN_COMPILER_LIBRARY)

diff --git a/docker/install/ubuntu_install_ethosn_driver_stack.sh b/docker/install/ubuntu_install_ethosn_driver_stack.sh
@@ -22,7 +22,7 @@ set -o pipefail
 
 repo_url="https://github.com/Arm-software/ethos-n-driver-stack"
 repo_dir="ethosn-driver"
-repo_revision="22.08"
+repo_revision="22.11"
 install_path="/opt/arm/$repo_dir"
 
 tmpdir=$(mktemp -d)

diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py
@@ -117,7 +117,7 @@ def partition_for_ethosn(mod, params=None, **opts):
     ret : annotated and partitioned module.
     """
     api_version = ethosn_api_version()
-    supported_api_versions = ["3.1.0"]
+    supported_api_versions = ["3.2.0", "3.1.0"]
     if all(api_version != LooseVersion(exp_ver) for exp_ver in supported_api_versions):
         raise ValueError(
             f"Driver stack version {api_version} is unsupported. "

diff --git a/src/runtime/contrib/ethosn/ethosn_device.cc b/src/runtime/contrib/ethosn/ethosn_device.cc
@@ -42,6 +42,9 @@
 
 #include "ethosn_driver_library/Inference.hpp"
 #include "ethosn_driver_library/Network.hpp"
+#ifdef _ETHOSN_API_VERSION_3_2_0
+#include "ethosn_driver_library/ProcMemAllocator.hpp"
+#endif
 
 namespace tvm {
 namespace runtime {
@@ -87,6 +90,81 @@ InferenceWaitStatus WaitForInference(dl::Inference* inference, int timeout) {
   return InferenceWaitStatus(InferenceWaitErrorCode::kSuccess);
 }
 
+#ifdef _ETHOSN_API_VERSION_3_2_0
+void CreateBuffers(dl::ProcMemAllocator* proc_mem_alloc,
+                   std::vector<std::shared_ptr<dl::Buffer>>* fm,
+                   const std::vector<DLTensor*>& tensors, const std::vector<uint32_t>& tensor_sizes,
+                   bool input) {
+  for (size_t i = 0; i < tensors.size(); i++) {
+    auto* data = static_cast<uint8_t*>(tensors[i]->data);
+    if (input) {
+      (*fm)[i] = std::make_shared<dl::Buffer>(
+          proc_mem_alloc->CreateBuffer(data, tensor_sizes[i], dl::DataFormat::NHWC));
+    } else {
+      (*fm)[i] = std::make_shared<dl::Buffer>(
+          proc_mem_alloc->CreateBuffer(tensor_sizes[i], dl::DataFormat::NHWC));
+    }
+  }
+}
+
+bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu,
+               const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
+               const std::vector<uint32_t>& input_sizes,
+               const std::vector<uint32_t>& output_sizes) {
+  // Unpack parameters
+  size_t n_inputs = input_order.size();
+  size_t n_outputs = output_order.size();
+  std::vector<DLTensor*> inputs(n_inputs);
+  for (size_t i = 0; i < n_inputs; i++) {
+    inputs[i] = args[input_order[i]];
+  }
+  std::vector<DLTensor*> outputs(n_outputs);
+  size_t output_offset = n_inputs;
+  for (size_t i = 0; i < n_outputs; i++) {
+    outputs[i] = args[output_order[i] + output_offset];
+  }
+
+  // Set up input buffers
+  std::vector<std::shared_ptr<dl::Buffer>> ifm(n_inputs);
+  CreateBuffers(proc_mem_alloc, &ifm, inputs, input_sizes, true);
+
+  // Set up output buffers
+  std::vector<std::shared_ptr<dl::Buffer>> ofm(n_outputs);
+  CreateBuffers(proc_mem_alloc, &ofm, outputs, output_sizes, false);
+
+  // Raw pointers for the inference
+  dl::Buffer* ifm_raw[n_inputs];
+  for (size_t i = 0; i < n_inputs; i++) {
+    ifm_raw[i] = ifm[i].get();
+  }
+  dl::Buffer* ofm_raw[n_outputs];
+  for (size_t i = 0; i < n_outputs; i++) {
+    ofm_raw[i] = ofm[i].get();
+  }
+
+  // Execute the inference.
+  std::unique_ptr<dl::Inference> inference(
+      npu->ScheduleInference(ifm_raw, n_inputs, ofm_raw, n_outputs));
+  InferenceWaitStatus result = WaitForInference(inference.get(), 60);
+
+  if (result.GetErrorCode() != InferenceWaitErrorCode::kSuccess) {
+    LOG(FATAL) << "An error has occured waiting for the inference of a sub-graph on the NPU: "
+               << result.GetErrorDescription();
+  }
+
+  for (size_t i = 0; i < n_outputs; i++) {
+    DLTensor* tensor = outputs[i];
+    dl::Buffer* source_buffer = ofm_raw[i];
+    uint8_t* dest_buffer = static_cast<uint8_t*>(tensor->data);
+    size_t size = source_buffer->GetSize();
+    uint8_t* source_buffer_data = source_buffer->Map();
+    std::copy(source_buffer_data, source_buffer_data + size, dest_buffer);
+    source_buffer->Unmap();
+  }
+
+  return true;
+}
+#else
 void CreateBuffers(std::vector<std::shared_ptr<dl::Buffer>>* fm,
                    const std::vector<DLTensor*>& tensors, const std::vector<uint32_t>& tensor_sizes,
                    bool input) {
@@ -157,7 +235,7 @@ bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu,
 
   return true;
 }
-
+#endif
 }  // namespace ethosn
 }  // namespace runtime
 }  // namespace tvm
@@ -192,9 +270,12 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result")
     });
 
 // Allow the ethos-n support code to be tested without a device
-bool Inference(tvm::runtime::TVMArgs args, dl::Network* /* npu */,
-               const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
-               const std::vector<uint32_t>& input_sizes,
+bool Inference(tvm::runtime::TVMArgs args,
+#ifdef _ETHOSN_API_VERSION_3_2_0
+               dl::ProcMemAllocator* proc_mem_alloc,
+#endif
+               dl::Network* /* npu */, const std::vector<uint32_t>& input_order,
+               const std::vector<uint32_t>& output_order, const std::vector<uint32_t>& input_sizes,
                const std::vector<uint32_t>& output_sizes) {
   std::vector<DLTensor*> outputs;
   for (int argc = input_order.size(); argc < args.size(); argc++) {

diff --git a/src/runtime/contrib/ethosn/ethosn_device.h b/src/runtime/contrib/ethosn/ethosn_device.h
@@ -38,10 +38,15 @@ namespace dl = ::ethosn::driver_library;
 
 using tvm::runtime::TVMArgs;
 
+#ifdef _ETHOSN_API_VERSION_3_2_0
+bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu,
+               const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
+               const std::vector<uint32_t>& input_sizes, const std::vector<uint32_t>& output_sizes);
+#else
 bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu,
                const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
                const std::vector<uint32_t>& input_sizes, const std::vector<uint32_t>& output_sizes);
-
+#endif
 }  // namespace ethosn
 }  // namespace runtime
 }  // namespace tvm

diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.cc b/src/runtime/contrib/ethosn/ethosn_runtime.cc
@@ -53,6 +53,11 @@ EthosnModule::EthosnModule(std::vector<OrderedCompiledNetwork>* cmms) {
     if (it.compiled_cmm != nullptr) {
       network_map_[it.name].compiled_cmm = std::move(it.compiled_cmm);
     }
+#ifdef _ETHOSN_API_VERSION_3_2_0
+    if (it.proc_mem_alloc != nullptr) {
+      network_map_[it.name].proc_mem_alloc = std::move(it.proc_mem_alloc);
+    }
+#endif
     if (it.runtime_cmm != nullptr) {
       network_map_[it.name].runtime_cmm = std::move(it.runtime_cmm);
     }
@@ -67,9 +72,16 @@ PackedFunc EthosnModule::GetFunction(const std::string& name,
                                      const ObjectPtr<Object>& sptr_to_self) {
   if (network_map_.find(name) != network_map_.end()) {
     return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) {
+#ifdef _ETHOSN_API_VERSION_3_2_0
+      *rv = Inference(args, network_map_[name].proc_mem_alloc.get(),
+                      network_map_[name].runtime_cmm.get(), network_map_[name].inputs,
+                      network_map_[name].outputs, network_map_[name].input_sizes,
+                      network_map_[name].output_sizes);
+#else
       *rv = Inference(args, network_map_[name].runtime_cmm.get(), network_map_[name].inputs,
                       network_map_[name].outputs, network_map_[name].input_sizes,
                       network_map_[name].output_sizes);
+#endif
     });
   } else {
     return PackedFunc();
@@ -102,6 +114,9 @@ Module EthosnModule::LoadFromBinary(void* strm) {
   cmms.resize(func_count);
   for (unsigned int i = 0; i < func_count; i++) {
     OrderedCompiledNetwork& compiled = cmms[i];
+#ifdef _ETHOSN_API_VERSION_3_2_0
+    compiled.proc_mem_alloc = std::make_unique<dl::ProcMemAllocator>();
+#endif
     std::string ext_symbol;
     std::string cmm;
     uint64_t input_size;
@@ -114,7 +129,12 @@ Module EthosnModule::LoadFromBinary(void* strm) {
 #if defined ETHOSN_HW
     // If hardware unavaiable use the mock inference functionality. If hardware is
     // avaiable, deserialize the compiled graph.
+#ifdef _ETHOSN_API_VERSION_3_2_0
+    compiled.runtime_cmm = std::make_unique<dl::Network>(
+        compiled.proc_mem_alloc->CreateNetwork(cmm.c_str(), cmm.size()));
+#else
     compiled.runtime_cmm = std::make_unique<dl::Network>(cmm.c_str(), cmm.size());
+#endif
 #endif
     // Read the number of inputs
     stream->Read<uint64_t>(&input_size);

diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.h b/src/runtime/contrib/ethosn/ethosn_runtime.h
@@ -36,6 +36,14 @@
 #include "ethosn_driver_library/Network.hpp"
 #include "ethosn_support_library/Support.hpp"
 
+#if ETHOSN_SUPPORT_LIBRARY_VERSION_MAJOR == 3 && ETHOSN_SUPPORT_LIBRARY_VERSION_MINOR == 2 && \
+    ETHOSN_SUPPORT_LIBRARY_VERSION_PATCH == 0
+#define _ETHOSN_API_VERSION_3_2_0
+#endif
+#ifdef _ETHOSN_API_VERSION_3_2_0
+#include "ethosn_driver_library/ProcMemAllocator.hpp"
+#endif
+
 namespace tvm {
 namespace runtime {
 namespace ethosn {
@@ -46,6 +54,9 @@ namespace dl = ::ethosn::driver_library;
 struct OrderedCompiledNetwork {
   std::unique_ptr<sl::CompiledNetwork> compiled_cmm;
   std::unique_ptr<dl::Network> runtime_cmm;
+#ifdef _ETHOSN_API_VERSION_3_2_0
+  std::unique_ptr<dl::ProcMemAllocator> proc_mem_alloc;
+#endif
   std::string name;
   std::vector<uint32_t> inputs;
   std::vector<uint32_t> outputs;

diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py
@@ -168,7 +168,6 @@ def build(
     if not additional_config_args:
         additional_config_args = {}
     npu_config = {**get_ethosn_device_options(), **additional_config_args}
-    print(npu_config)
     with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-n.options": npu_config}):
         with tvm.target.Target("llvm"):
             if npu:

diff --git a/tests/python/contrib/test_ethosn/test_conv2d.py b/tests/python/contrib/test_ethosn/test_conv2d.py
@@ -22,6 +22,7 @@
 
 import tvm
 from tvm import relay
+from tvm.relay.op.contrib import ethosn_api_version
 from tvm.testing import requires_ethosn
 
 from . import infrastructure as tei
@@ -227,7 +228,10 @@ def test_conv2d_depthwise(
             )
         ),
     }
-    input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
+    if ethosn_api_version() == "3.2.0":
+        input_zp = np.random.randint(0, np.iinfo(dtype).max)
+    else:
+        input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
     input_sc = np.random.random() * 2
     if qnn_per_channel:
         kernel_sc = tvm.nd.array(

diff --git a/tests/python/contrib/test_ethosn/test_conv2d_transpose.py b/tests/python/contrib/test_ethosn/test_conv2d_transpose.py
@@ -22,6 +22,7 @@
 
 import tvm
 from tvm import relay
+from tvm.relay.op.contrib import ethosn_api_version
 from tvm.testing import requires_ethosn
 from . import infrastructure as tei
 
@@ -115,7 +116,7 @@ def _get_model(
     [
         ((1, 2, 2, 1), (2, 2), (1, 1), 1, False),
         ((1, 2, 2, 5), (2, 2), (3, 5), 4, False),
-        ((1, 7, 7, 4), (2, 2), (7, 9), 8, True),
+        ((1, 7, 7, 4), (2, 2), (7, 7), 8, True),
     ],
 )
 def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype, bias):
@@ -169,6 +170,71 @@ def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype,
     tei.verify(outputs, dtype, 1)
 
 
+@requires_ethosn
+@pytest.mark.parametrize("dtype", ["uint8", "int8"])
+@pytest.mark.parametrize(
+    "ifm_shape,strides,kernel_size,out_channels,bias",
+    [
+        ((1, 10, 20, 3), (1, 1), (8, 5), 4, False),
+        ((1, 10, 10, 2), (2, 2), (7, 9), 8, True),
+    ],
+)
+def test_conv2d_transpose_kernel_size_gt_8(
+    ifm_shape, strides, kernel_size, out_channels, dtype, bias
+):
+    """Check transpose convolution for big kernel sizes."""
+    if ethosn_api_version() in ["3.2.0", "3.1.0"]:
+        pytest.skip("Skipping because NPU driver 22.11 fails to interpret zp used in the test.")
+
+    np.random.seed(0)
+
+    kernel_layout = "IOHW"
+    dilation = (1, 1)
+    groups = 1
+
+    iinfo = np.iinfo(dtype)
+    data_min = iinfo.min
+    data_max = iinfo.max
+
+    input_zp = np.random.randint(data_min, data_max)
+    input_sc = np.random.random() * 2
+    kernel_zp = np.random.randint(data_min, data_max)
+    kernel_sc = np.random.random() * 4
+    output_zp, output_sc = tei.get_conv2d_qnn_params(
+        dtype, input_zp, input_sc, kernel_zp, kernel_sc, ifm_shape[1], ifm_shape[2], ifm_shape[3]
+    )
+
+    model, params = _get_model(
+        shape=ifm_shape,
+        kernel_h=kernel_size[0],
+        kernel_w=kernel_size[1],
+        input_zp=input_zp,
+        input_sc=input_sc,
+        kernel_zp=kernel_zp,
+        kernel_sc=kernel_sc,
+        output_zp=output_zp,
+        output_sc=output_sc,
+        stride=strides,
+        dilation=dilation,
+        groups=groups,
+        kernel_layout=kernel_layout,
+        dtype=dtype,
+        out_channels=out_channels,
+        bias=bias,
+    )
+
+    outputs = []
+    inputs = {
+        "a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=ifm_shape, dtype=dtype))
+    }
+
+    for npu in [False, True]:
+        mod = tei.make_module(model, params)
+        outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))
+
+    tei.verify(outputs, dtype, 1)
+
+
 @requires_ethosn
 @pytest.mark.parametrize("dtype", ["uint8", "int8"])
 @pytest.mark.parametrize(

diff --git a/tests/python/contrib/test_ethosn/test_leaky_relu.py b/tests/python/contrib/test_ethosn/test_leaky_relu.py
@@ -22,6 +22,7 @@
 
 import tvm
 from tvm import relay
+from tvm.relay.op.contrib import ethosn_api_version
 from tvm.testing import requires_ethosn
 
 from . import infrastructure as tei
@@ -55,9 +56,12 @@ def test_leaky_relu(dtype, shape, alpha):
     iinfo = np.iinfo(dtype)
     zp_min = iinfo.min
     zp_max = iinfo.max
-    input_zp = zp_min + 120
+    if ethosn_api_version() == "3.2.0":
+        input_zp = zp_min + 128
+    else:
+        input_zp = zp_min + 120
     input_sc = 0.0068132
-    output_zp = zp_min + 128
+    output_zp = zp_min + 126  # values offset more than 126 can cause saturation
     output_sc = 0.0078125
 
     inputs = {"x": tvm.nd.array(np.random.randint(zp_min, high=zp_max, size=shape, dtype=dtype))}