From 1ec4b5a9c78e82bc6e6d7ad68319e9ce120479df Mon Sep 17 00:00:00 2001 From: Ashutosh Parkhi Date: Fri, 16 Dec 2022 23:56:09 +0000 Subject: [PATCH] NPU driver updated to 22.11 -New process memory allocator is used to create buffers and networks. -Support for 22.08 stack has been kept intact in the sources and tests until new docker image is built and used. -Tests were modified to meet limitations imposed on input zero point and kernel size by NPU software. -Removed defining ETHON_API_VERSION from cmake infra. Change-Id: I580a7e8e9e21665a6c2e69641b5b85a324bcd9b4 --- cmake/utils/FindEthosN.cmake | 16 +--- .../ubuntu_install_ethosn_driver_stack.sh | 2 +- python/tvm/relay/op/contrib/ethosn.py | 2 +- src/runtime/contrib/ethosn/ethosn_device.cc | 89 ++++++++++++++++++- src/runtime/contrib/ethosn/ethosn_device.h | 7 +- src/runtime/contrib/ethosn/ethosn_runtime.cc | 20 +++++ src/runtime/contrib/ethosn/ethosn_runtime.h | 11 +++ .../contrib/test_ethosn/infrastructure.py | 1 - .../python/contrib/test_ethosn/test_conv2d.py | 6 +- .../test_ethosn/test_conv2d_transpose.py | 68 +++++++++++++- .../contrib/test_ethosn/test_leaky_relu.py | 8 +- tests/python/contrib/test_ethosn/test_tanh.py | 3 +- 12 files changed, 204 insertions(+), 29 deletions(-) diff --git a/cmake/utils/FindEthosN.cmake b/cmake/utils/FindEthosN.cmake index 591d49f82915..7d5f2802f6bd 100644 --- a/cmake/utils/FindEthosN.cmake +++ b/cmake/utils/FindEthosN.cmake @@ -58,18 +58,6 @@ macro(find_ethosn use_ethosn) PATHS ${__ethosn_stack}/lib) find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport) - list(GET ETHOSN_INCLUDE_DIRS 0 filename) - set(filename "${filename}/ethosn_support_library/Support.hpp") - file(READ ${filename} ETHOSN_SUPPORT_H) - string(REGEX MATCH "VERSION_MAJOR ([0-9]*)" _ ${ETHOSN_SUPPORT_H}) - set(ver_major ${CMAKE_MATCH_1}) - string(REGEX MATCH "VERSION_MINOR ([0-9]*)" _ ${ETHOSN_SUPPORT_H}) - set(ver_minor ${CMAKE_MATCH_1}) - string(REGEX MATCH "VERSION_PATCH ([0-9]*)" _ ${ETHOSN_SUPPORT_H}) - set(ver_patch ${CMAKE_MATCH_1}) - set(ETHOSN_PACKAGE_VERSION "${ver_major}.${ver_minor}.${ver_patch}") - set(ETHOSN_DEFINITIONS -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION}) - # Runtime hardware support. Driver library also needed for # test support. find_path(_DL_DIR NAMES Network.hpp @@ -81,9 +69,7 @@ macro(find_ethosn use_ethosn) PATHS ${__ethosn_stack}/lib) find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver) if(${USE_ETHOSN_HW} MATCHES ${IS_TRUE_PATTERN}) - set(ETHOSN_DEFINITIONS -DETHOSN_HW -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION}) - else() - set(ETHOSN_DEFINITIONS -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION}) + set(ETHOSN_DEFINITIONS -DETHOSN_HW) endif() if(ETHOSN_COMPILER_LIBRARY) diff --git a/docker/install/ubuntu_install_ethosn_driver_stack.sh b/docker/install/ubuntu_install_ethosn_driver_stack.sh index 4c26497c3895..7be815df32b9 100755 --- a/docker/install/ubuntu_install_ethosn_driver_stack.sh +++ b/docker/install/ubuntu_install_ethosn_driver_stack.sh @@ -22,7 +22,7 @@ set -o pipefail repo_url="https://github.com/Arm-software/ethos-n-driver-stack" repo_dir="ethosn-driver" -repo_revision="22.08" +repo_revision="22.11" install_path="/opt/arm/$repo_dir" tmpdir=$(mktemp -d) diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py index e28eea9d224f..e316c0863c6c 100644 --- a/python/tvm/relay/op/contrib/ethosn.py +++ b/python/tvm/relay/op/contrib/ethosn.py @@ -117,7 +117,7 @@ def partition_for_ethosn(mod, params=None, **opts): ret : annotated and partitioned module. """ api_version = ethosn_api_version() - supported_api_versions = ["3.1.0"] + supported_api_versions = ["3.2.0", "3.1.0"] if all(api_version != LooseVersion(exp_ver) for exp_ver in supported_api_versions): raise ValueError( f"Driver stack version {api_version} is unsupported. " diff --git a/src/runtime/contrib/ethosn/ethosn_device.cc b/src/runtime/contrib/ethosn/ethosn_device.cc index 0d79f69815fa..7e16f7e887f3 100644 --- a/src/runtime/contrib/ethosn/ethosn_device.cc +++ b/src/runtime/contrib/ethosn/ethosn_device.cc @@ -42,6 +42,9 @@ #include "ethosn_driver_library/Inference.hpp" #include "ethosn_driver_library/Network.hpp" +#ifdef _ETHOSN_API_VERSION_3_2_0 +#include "ethosn_driver_library/ProcMemAllocator.hpp" +#endif namespace tvm { namespace runtime { @@ -87,6 +90,81 @@ InferenceWaitStatus WaitForInference(dl::Inference* inference, int timeout) { return InferenceWaitStatus(InferenceWaitErrorCode::kSuccess); } +#ifdef _ETHOSN_API_VERSION_3_2_0 +void CreateBuffers(dl::ProcMemAllocator* proc_mem_alloc, + std::vector>* fm, + const std::vector& tensors, const std::vector& tensor_sizes, + bool input) { + for (size_t i = 0; i < tensors.size(); i++) { + auto* data = static_cast(tensors[i]->data); + if (input) { + (*fm)[i] = std::make_shared( + proc_mem_alloc->CreateBuffer(data, tensor_sizes[i], dl::DataFormat::NHWC)); + } else { + (*fm)[i] = std::make_shared( + proc_mem_alloc->CreateBuffer(tensor_sizes[i], dl::DataFormat::NHWC)); + } + } +} + +bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu, + const std::vector& input_order, const std::vector& output_order, + const std::vector& input_sizes, + const std::vector& output_sizes) { + // Unpack parameters + size_t n_inputs = input_order.size(); + size_t n_outputs = output_order.size(); + std::vector inputs(n_inputs); + for (size_t i = 0; i < n_inputs; i++) { + inputs[i] = args[input_order[i]]; + } + std::vector outputs(n_outputs); + size_t output_offset = n_inputs; + for (size_t i = 0; i < n_outputs; i++) { + outputs[i] = args[output_order[i] + output_offset]; + } + + // Set up input buffers + std::vector> ifm(n_inputs); + CreateBuffers(proc_mem_alloc, &ifm, inputs, input_sizes, true); + + // Set up output buffers + std::vector> ofm(n_outputs); + CreateBuffers(proc_mem_alloc, &ofm, outputs, output_sizes, false); + + // Raw pointers for the inference + dl::Buffer* ifm_raw[n_inputs]; + for (size_t i = 0; i < n_inputs; i++) { + ifm_raw[i] = ifm[i].get(); + } + dl::Buffer* ofm_raw[n_outputs]; + for (size_t i = 0; i < n_outputs; i++) { + ofm_raw[i] = ofm[i].get(); + } + + // Execute the inference. + std::unique_ptr inference( + npu->ScheduleInference(ifm_raw, n_inputs, ofm_raw, n_outputs)); + InferenceWaitStatus result = WaitForInference(inference.get(), 60); + + if (result.GetErrorCode() != InferenceWaitErrorCode::kSuccess) { + LOG(FATAL) << "An error has occured waiting for the inference of a sub-graph on the NPU: " + << result.GetErrorDescription(); + } + + for (size_t i = 0; i < n_outputs; i++) { + DLTensor* tensor = outputs[i]; + dl::Buffer* source_buffer = ofm_raw[i]; + uint8_t* dest_buffer = static_cast(tensor->data); + size_t size = source_buffer->GetSize(); + uint8_t* source_buffer_data = source_buffer->Map(); + std::copy(source_buffer_data, source_buffer_data + size, dest_buffer); + source_buffer->Unmap(); + } + + return true; +} +#else void CreateBuffers(std::vector>* fm, const std::vector& tensors, const std::vector& tensor_sizes, bool input) { @@ -157,7 +235,7 @@ bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu, return true; } - +#endif } // namespace ethosn } // namespace runtime } // namespace tvm @@ -192,9 +270,12 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result") }); // Allow the ethos-n support code to be tested without a device -bool Inference(tvm::runtime::TVMArgs args, dl::Network* /* npu */, - const std::vector& input_order, const std::vector& output_order, - const std::vector& input_sizes, +bool Inference(tvm::runtime::TVMArgs args, +#ifdef _ETHOSN_API_VERSION_3_2_0 + dl::ProcMemAllocator* proc_mem_alloc, +#endif + dl::Network* /* npu */, const std::vector& input_order, + const std::vector& output_order, const std::vector& input_sizes, const std::vector& output_sizes) { std::vector outputs; for (int argc = input_order.size(); argc < args.size(); argc++) { diff --git a/src/runtime/contrib/ethosn/ethosn_device.h b/src/runtime/contrib/ethosn/ethosn_device.h index acef104515e1..a5f3d18cf9fd 100644 --- a/src/runtime/contrib/ethosn/ethosn_device.h +++ b/src/runtime/contrib/ethosn/ethosn_device.h @@ -38,10 +38,15 @@ namespace dl = ::ethosn::driver_library; using tvm::runtime::TVMArgs; +#ifdef _ETHOSN_API_VERSION_3_2_0 +bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu, + const std::vector& input_order, const std::vector& output_order, + const std::vector& input_sizes, const std::vector& output_sizes); +#else bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu, const std::vector& input_order, const std::vector& output_order, const std::vector& input_sizes, const std::vector& output_sizes); - +#endif } // namespace ethosn } // namespace runtime } // namespace tvm diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.cc b/src/runtime/contrib/ethosn/ethosn_runtime.cc index dc2d4da853f8..11edc8c71a2c 100644 --- a/src/runtime/contrib/ethosn/ethosn_runtime.cc +++ b/src/runtime/contrib/ethosn/ethosn_runtime.cc @@ -53,6 +53,11 @@ EthosnModule::EthosnModule(std::vector* cmms) { if (it.compiled_cmm != nullptr) { network_map_[it.name].compiled_cmm = std::move(it.compiled_cmm); } +#ifdef _ETHOSN_API_VERSION_3_2_0 + if (it.proc_mem_alloc != nullptr) { + network_map_[it.name].proc_mem_alloc = std::move(it.proc_mem_alloc); + } +#endif if (it.runtime_cmm != nullptr) { network_map_[it.name].runtime_cmm = std::move(it.runtime_cmm); } @@ -67,9 +72,16 @@ PackedFunc EthosnModule::GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) { if (network_map_.find(name) != network_map_.end()) { return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) { +#ifdef _ETHOSN_API_VERSION_3_2_0 + *rv = Inference(args, network_map_[name].proc_mem_alloc.get(), + network_map_[name].runtime_cmm.get(), network_map_[name].inputs, + network_map_[name].outputs, network_map_[name].input_sizes, + network_map_[name].output_sizes); +#else *rv = Inference(args, network_map_[name].runtime_cmm.get(), network_map_[name].inputs, network_map_[name].outputs, network_map_[name].input_sizes, network_map_[name].output_sizes); +#endif }); } else { return PackedFunc(); @@ -102,6 +114,9 @@ Module EthosnModule::LoadFromBinary(void* strm) { cmms.resize(func_count); for (unsigned int i = 0; i < func_count; i++) { OrderedCompiledNetwork& compiled = cmms[i]; +#ifdef _ETHOSN_API_VERSION_3_2_0 + compiled.proc_mem_alloc = std::make_unique(); +#endif std::string ext_symbol; std::string cmm; uint64_t input_size; @@ -114,7 +129,12 @@ Module EthosnModule::LoadFromBinary(void* strm) { #if defined ETHOSN_HW // If hardware unavaiable use the mock inference functionality. If hardware is // avaiable, deserialize the compiled graph. +#ifdef _ETHOSN_API_VERSION_3_2_0 + compiled.runtime_cmm = std::make_unique( + compiled.proc_mem_alloc->CreateNetwork(cmm.c_str(), cmm.size())); +#else compiled.runtime_cmm = std::make_unique(cmm.c_str(), cmm.size()); +#endif #endif // Read the number of inputs stream->Read(&input_size); diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.h b/src/runtime/contrib/ethosn/ethosn_runtime.h index b8942fef12d9..2f8e445d97a8 100644 --- a/src/runtime/contrib/ethosn/ethosn_runtime.h +++ b/src/runtime/contrib/ethosn/ethosn_runtime.h @@ -36,6 +36,14 @@ #include "ethosn_driver_library/Network.hpp" #include "ethosn_support_library/Support.hpp" +#if ETHOSN_SUPPORT_LIBRARY_VERSION_MAJOR == 3 && ETHOSN_SUPPORT_LIBRARY_VERSION_MINOR == 2 && \ + ETHOSN_SUPPORT_LIBRARY_VERSION_PATCH == 0 +#define _ETHOSN_API_VERSION_3_2_0 +#endif +#ifdef _ETHOSN_API_VERSION_3_2_0 +#include "ethosn_driver_library/ProcMemAllocator.hpp" +#endif + namespace tvm { namespace runtime { namespace ethosn { @@ -46,6 +54,9 @@ namespace dl = ::ethosn::driver_library; struct OrderedCompiledNetwork { std::unique_ptr compiled_cmm; std::unique_ptr runtime_cmm; +#ifdef _ETHOSN_API_VERSION_3_2_0 + std::unique_ptr proc_mem_alloc; +#endif std::string name; std::vector inputs; std::vector outputs; diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py index 8a469403872f..334cd6d3b87c 100644 --- a/tests/python/contrib/test_ethosn/infrastructure.py +++ b/tests/python/contrib/test_ethosn/infrastructure.py @@ -168,7 +168,6 @@ def build( if not additional_config_args: additional_config_args = {} npu_config = {**get_ethosn_device_options(), **additional_config_args} - print(npu_config) with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-n.options": npu_config}): with tvm.target.Target("llvm"): if npu: diff --git a/tests/python/contrib/test_ethosn/test_conv2d.py b/tests/python/contrib/test_ethosn/test_conv2d.py index a6ce73656bfc..851bd031b38e 100644 --- a/tests/python/contrib/test_ethosn/test_conv2d.py +++ b/tests/python/contrib/test_ethosn/test_conv2d.py @@ -22,6 +22,7 @@ import tvm from tvm import relay +from tvm.relay.op.contrib import ethosn_api_version from tvm.testing import requires_ethosn from . import infrastructure as tei @@ -227,7 +228,10 @@ def test_conv2d_depthwise( ) ), } - input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max) + if ethosn_api_version() == "3.2.0": + input_zp = np.random.randint(0, np.iinfo(dtype).max) + else: + input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max) input_sc = np.random.random() * 2 if qnn_per_channel: kernel_sc = tvm.nd.array( diff --git a/tests/python/contrib/test_ethosn/test_conv2d_transpose.py b/tests/python/contrib/test_ethosn/test_conv2d_transpose.py index 84aa7e969b30..4d99a310ac44 100644 --- a/tests/python/contrib/test_ethosn/test_conv2d_transpose.py +++ b/tests/python/contrib/test_ethosn/test_conv2d_transpose.py @@ -22,6 +22,7 @@ import tvm from tvm import relay +from tvm.relay.op.contrib import ethosn_api_version from tvm.testing import requires_ethosn from . import infrastructure as tei @@ -115,7 +116,7 @@ def _get_model( [ ((1, 2, 2, 1), (2, 2), (1, 1), 1, False), ((1, 2, 2, 5), (2, 2), (3, 5), 4, False), - ((1, 7, 7, 4), (2, 2), (7, 9), 8, True), + ((1, 7, 7, 4), (2, 2), (7, 7), 8, True), ], ) def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype, bias): @@ -169,6 +170,71 @@ def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype, tei.verify(outputs, dtype, 1) +@requires_ethosn +@pytest.mark.parametrize("dtype", ["uint8", "int8"]) +@pytest.mark.parametrize( + "ifm_shape,strides,kernel_size,out_channels,bias", + [ + ((1, 10, 20, 3), (1, 1), (8, 5), 4, False), + ((1, 10, 10, 2), (2, 2), (7, 9), 8, True), + ], +) +def test_conv2d_transpose_kernel_size_gt_8( + ifm_shape, strides, kernel_size, out_channels, dtype, bias +): + """Check transpose convolution for big kernel sizes.""" + if ethosn_api_version() in ["3.2.0", "3.1.0"]: + pytest.skip("Skipping because NPU driver 22.11 fails to interpret zp used in the test.") + + np.random.seed(0) + + kernel_layout = "IOHW" + dilation = (1, 1) + groups = 1 + + iinfo = np.iinfo(dtype) + data_min = iinfo.min + data_max = iinfo.max + + input_zp = np.random.randint(data_min, data_max) + input_sc = np.random.random() * 2 + kernel_zp = np.random.randint(data_min, data_max) + kernel_sc = np.random.random() * 4 + output_zp, output_sc = tei.get_conv2d_qnn_params( + dtype, input_zp, input_sc, kernel_zp, kernel_sc, ifm_shape[1], ifm_shape[2], ifm_shape[3] + ) + + model, params = _get_model( + shape=ifm_shape, + kernel_h=kernel_size[0], + kernel_w=kernel_size[1], + input_zp=input_zp, + input_sc=input_sc, + kernel_zp=kernel_zp, + kernel_sc=kernel_sc, + output_zp=output_zp, + output_sc=output_sc, + stride=strides, + dilation=dilation, + groups=groups, + kernel_layout=kernel_layout, + dtype=dtype, + out_channels=out_channels, + bias=bias, + ) + + outputs = [] + inputs = { + "a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=ifm_shape, dtype=dtype)) + } + + for npu in [False, True]: + mod = tei.make_module(model, params) + outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu)) + + tei.verify(outputs, dtype, 1) + + @requires_ethosn @pytest.mark.parametrize("dtype", ["uint8", "int8"]) @pytest.mark.parametrize( diff --git a/tests/python/contrib/test_ethosn/test_leaky_relu.py b/tests/python/contrib/test_ethosn/test_leaky_relu.py index ccf67151bf1e..ee5f2048dbbb 100644 --- a/tests/python/contrib/test_ethosn/test_leaky_relu.py +++ b/tests/python/contrib/test_ethosn/test_leaky_relu.py @@ -22,6 +22,7 @@ import tvm from tvm import relay +from tvm.relay.op.contrib import ethosn_api_version from tvm.testing import requires_ethosn from . import infrastructure as tei @@ -55,9 +56,12 @@ def test_leaky_relu(dtype, shape, alpha): iinfo = np.iinfo(dtype) zp_min = iinfo.min zp_max = iinfo.max - input_zp = zp_min + 120 + if ethosn_api_version() == "3.2.0": + input_zp = zp_min + 128 + else: + input_zp = zp_min + 120 input_sc = 0.0068132 - output_zp = zp_min + 128 + output_zp = zp_min + 126 # values offset more than 126 can cause saturation output_sc = 0.0078125 inputs = {"x": tvm.nd.array(np.random.randint(zp_min, high=zp_max, size=shape, dtype=dtype))} diff --git a/tests/python/contrib/test_ethosn/test_tanh.py b/tests/python/contrib/test_ethosn/test_tanh.py index 25f46e51eda9..77ed33980ea5 100644 --- a/tests/python/contrib/test_ethosn/test_tanh.py +++ b/tests/python/contrib/test_ethosn/test_tanh.py @@ -47,7 +47,6 @@ def _get_model(shape, input_zp, input_sc, output_zp, output_sc, dtype): @pytest.mark.parametrize("shape", [(1, 52, 52, 3)]) def test_tanh(dtype, shape): """Compare Tanh output with TVM.""" - zp_min = np.iinfo(dtype).min zp_max = np.iinfo(dtype).max @@ -57,7 +56,7 @@ def test_tanh(dtype, shape): } outputs = [] for npu in [False, True]: - model = _get_model(shape, zp_min + 120, 0.0250629, zp_min + 128, 0.0078125, dtype) + model = _get_model(shape, zp_min + 128, 1 / 256, zp_min + 128, 1 / 128, dtype) mod = tei.make_module(model, []) outputs.append( tei.build_and_run(