Skip to content

Commit

Permalink
[ETHOSN] Update driver stack version to 22.11 (#13637)
Browse files Browse the repository at this point in the history
-New process memory allocator is used to create
 buffers and networks.
-Support for 22.08 stack has been kept intact
 in the sources and tests until new docker
 image is built and used.
-Tests were modified to meet limitations imposed
 on input zero point and kernel size by NPU software.
-Removed defining ETHON_API_VERSION from cmake infra.
  • Loading branch information
ashutosh-arm authored Dec 20, 2022
1 parent 26a205c commit 5019dce
Show file tree
Hide file tree
Showing 12 changed files with 204 additions and 29 deletions.
16 changes: 1 addition & 15 deletions cmake/utils/FindEthosN.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,6 @@ macro(find_ethosn use_ethosn)
PATHS ${__ethosn_stack}/lib)
find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport)

list(GET ETHOSN_INCLUDE_DIRS 0 filename)
set(filename "${filename}/ethosn_support_library/Support.hpp")
file(READ ${filename} ETHOSN_SUPPORT_H)
string(REGEX MATCH "VERSION_MAJOR ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
set(ver_major ${CMAKE_MATCH_1})
string(REGEX MATCH "VERSION_MINOR ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
set(ver_minor ${CMAKE_MATCH_1})
string(REGEX MATCH "VERSION_PATCH ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
set(ver_patch ${CMAKE_MATCH_1})
set(ETHOSN_PACKAGE_VERSION "${ver_major}.${ver_minor}.${ver_patch}")
set(ETHOSN_DEFINITIONS -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})

# Runtime hardware support. Driver library also needed for
# test support.
find_path(_DL_DIR NAMES Network.hpp
Expand All @@ -81,9 +69,7 @@ macro(find_ethosn use_ethosn)
PATHS ${__ethosn_stack}/lib)
find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver)
if(${USE_ETHOSN_HW} MATCHES ${IS_TRUE_PATTERN})
set(ETHOSN_DEFINITIONS -DETHOSN_HW -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})
else()
set(ETHOSN_DEFINITIONS -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})
set(ETHOSN_DEFINITIONS -DETHOSN_HW)
endif()

if(ETHOSN_COMPILER_LIBRARY)
Expand Down
2 changes: 1 addition & 1 deletion docker/install/ubuntu_install_ethosn_driver_stack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ set -o pipefail

repo_url="https://github.com/Arm-software/ethos-n-driver-stack"
repo_dir="ethosn-driver"
repo_revision="22.08"
repo_revision="22.11"
install_path="/opt/arm/$repo_dir"

tmpdir=$(mktemp -d)
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/relay/op/contrib/ethosn.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def partition_for_ethosn(mod, params=None, **opts):
ret : annotated and partitioned module.
"""
api_version = ethosn_api_version()
supported_api_versions = ["3.1.0"]
supported_api_versions = ["3.2.0", "3.1.0"]
if all(api_version != LooseVersion(exp_ver) for exp_ver in supported_api_versions):
raise ValueError(
f"Driver stack version {api_version} is unsupported. "
Expand Down
89 changes: 85 additions & 4 deletions src/runtime/contrib/ethosn/ethosn_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@

#include "ethosn_driver_library/Inference.hpp"
#include "ethosn_driver_library/Network.hpp"
#ifdef _ETHOSN_API_VERSION_3_2_0
#include "ethosn_driver_library/ProcMemAllocator.hpp"
#endif

namespace tvm {
namespace runtime {
Expand Down Expand Up @@ -87,6 +90,81 @@ InferenceWaitStatus WaitForInference(dl::Inference* inference, int timeout) {
return InferenceWaitStatus(InferenceWaitErrorCode::kSuccess);
}

#ifdef _ETHOSN_API_VERSION_3_2_0
void CreateBuffers(dl::ProcMemAllocator* proc_mem_alloc,
std::vector<std::shared_ptr<dl::Buffer>>* fm,
const std::vector<DLTensor*>& tensors, const std::vector<uint32_t>& tensor_sizes,
bool input) {
for (size_t i = 0; i < tensors.size(); i++) {
auto* data = static_cast<uint8_t*>(tensors[i]->data);
if (input) {
(*fm)[i] = std::make_shared<dl::Buffer>(
proc_mem_alloc->CreateBuffer(data, tensor_sizes[i], dl::DataFormat::NHWC));
} else {
(*fm)[i] = std::make_shared<dl::Buffer>(
proc_mem_alloc->CreateBuffer(tensor_sizes[i], dl::DataFormat::NHWC));
}
}
}

bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu,
const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
const std::vector<uint32_t>& input_sizes,
const std::vector<uint32_t>& output_sizes) {
// Unpack parameters
size_t n_inputs = input_order.size();
size_t n_outputs = output_order.size();
std::vector<DLTensor*> inputs(n_inputs);
for (size_t i = 0; i < n_inputs; i++) {
inputs[i] = args[input_order[i]];
}
std::vector<DLTensor*> outputs(n_outputs);
size_t output_offset = n_inputs;
for (size_t i = 0; i < n_outputs; i++) {
outputs[i] = args[output_order[i] + output_offset];
}

// Set up input buffers
std::vector<std::shared_ptr<dl::Buffer>> ifm(n_inputs);
CreateBuffers(proc_mem_alloc, &ifm, inputs, input_sizes, true);

// Set up output buffers
std::vector<std::shared_ptr<dl::Buffer>> ofm(n_outputs);
CreateBuffers(proc_mem_alloc, &ofm, outputs, output_sizes, false);

// Raw pointers for the inference
dl::Buffer* ifm_raw[n_inputs];
for (size_t i = 0; i < n_inputs; i++) {
ifm_raw[i] = ifm[i].get();
}
dl::Buffer* ofm_raw[n_outputs];
for (size_t i = 0; i < n_outputs; i++) {
ofm_raw[i] = ofm[i].get();
}

// Execute the inference.
std::unique_ptr<dl::Inference> inference(
npu->ScheduleInference(ifm_raw, n_inputs, ofm_raw, n_outputs));
InferenceWaitStatus result = WaitForInference(inference.get(), 60);

if (result.GetErrorCode() != InferenceWaitErrorCode::kSuccess) {
LOG(FATAL) << "An error has occured waiting for the inference of a sub-graph on the NPU: "
<< result.GetErrorDescription();
}

for (size_t i = 0; i < n_outputs; i++) {
DLTensor* tensor = outputs[i];
dl::Buffer* source_buffer = ofm_raw[i];
uint8_t* dest_buffer = static_cast<uint8_t*>(tensor->data);
size_t size = source_buffer->GetSize();
uint8_t* source_buffer_data = source_buffer->Map();
std::copy(source_buffer_data, source_buffer_data + size, dest_buffer);
source_buffer->Unmap();
}

return true;
}
#else
void CreateBuffers(std::vector<std::shared_ptr<dl::Buffer>>* fm,
const std::vector<DLTensor*>& tensors, const std::vector<uint32_t>& tensor_sizes,
bool input) {
Expand Down Expand Up @@ -157,7 +235,7 @@ bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu,

return true;
}

#endif
} // namespace ethosn
} // namespace runtime
} // namespace tvm
Expand Down Expand Up @@ -192,9 +270,12 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result")
});

// Allow the ethos-n support code to be tested without a device
bool Inference(tvm::runtime::TVMArgs args, dl::Network* /* npu */,
const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
const std::vector<uint32_t>& input_sizes,
bool Inference(tvm::runtime::TVMArgs args,
#ifdef _ETHOSN_API_VERSION_3_2_0
dl::ProcMemAllocator* proc_mem_alloc,
#endif
dl::Network* /* npu */, const std::vector<uint32_t>& input_order,
const std::vector<uint32_t>& output_order, const std::vector<uint32_t>& input_sizes,
const std::vector<uint32_t>& output_sizes) {
std::vector<DLTensor*> outputs;
for (int argc = input_order.size(); argc < args.size(); argc++) {
Expand Down
7 changes: 6 additions & 1 deletion src/runtime/contrib/ethosn/ethosn_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,15 @@ namespace dl = ::ethosn::driver_library;

using tvm::runtime::TVMArgs;

#ifdef _ETHOSN_API_VERSION_3_2_0
bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu,
const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
const std::vector<uint32_t>& input_sizes, const std::vector<uint32_t>& output_sizes);
#else
bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu,
const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
const std::vector<uint32_t>& input_sizes, const std::vector<uint32_t>& output_sizes);

#endif
} // namespace ethosn
} // namespace runtime
} // namespace tvm
Expand Down
20 changes: 20 additions & 0 deletions src/runtime/contrib/ethosn/ethosn_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ EthosnModule::EthosnModule(std::vector<OrderedCompiledNetwork>* cmms) {
if (it.compiled_cmm != nullptr) {
network_map_[it.name].compiled_cmm = std::move(it.compiled_cmm);
}
#ifdef _ETHOSN_API_VERSION_3_2_0
if (it.proc_mem_alloc != nullptr) {
network_map_[it.name].proc_mem_alloc = std::move(it.proc_mem_alloc);
}
#endif
if (it.runtime_cmm != nullptr) {
network_map_[it.name].runtime_cmm = std::move(it.runtime_cmm);
}
Expand All @@ -67,9 +72,16 @@ PackedFunc EthosnModule::GetFunction(const std::string& name,
const ObjectPtr<Object>& sptr_to_self) {
if (network_map_.find(name) != network_map_.end()) {
return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) {
#ifdef _ETHOSN_API_VERSION_3_2_0
*rv = Inference(args, network_map_[name].proc_mem_alloc.get(),
network_map_[name].runtime_cmm.get(), network_map_[name].inputs,
network_map_[name].outputs, network_map_[name].input_sizes,
network_map_[name].output_sizes);
#else
*rv = Inference(args, network_map_[name].runtime_cmm.get(), network_map_[name].inputs,
network_map_[name].outputs, network_map_[name].input_sizes,
network_map_[name].output_sizes);
#endif
});
} else {
return PackedFunc();
Expand Down Expand Up @@ -102,6 +114,9 @@ Module EthosnModule::LoadFromBinary(void* strm) {
cmms.resize(func_count);
for (unsigned int i = 0; i < func_count; i++) {
OrderedCompiledNetwork& compiled = cmms[i];
#ifdef _ETHOSN_API_VERSION_3_2_0
compiled.proc_mem_alloc = std::make_unique<dl::ProcMemAllocator>();
#endif
std::string ext_symbol;
std::string cmm;
uint64_t input_size;
Expand All @@ -114,7 +129,12 @@ Module EthosnModule::LoadFromBinary(void* strm) {
#if defined ETHOSN_HW
// If hardware unavaiable use the mock inference functionality. If hardware is
// avaiable, deserialize the compiled graph.
#ifdef _ETHOSN_API_VERSION_3_2_0
compiled.runtime_cmm = std::make_unique<dl::Network>(
compiled.proc_mem_alloc->CreateNetwork(cmm.c_str(), cmm.size()));
#else
compiled.runtime_cmm = std::make_unique<dl::Network>(cmm.c_str(), cmm.size());
#endif
#endif
// Read the number of inputs
stream->Read<uint64_t>(&input_size);
Expand Down
11 changes: 11 additions & 0 deletions src/runtime/contrib/ethosn/ethosn_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@
#include "ethosn_driver_library/Network.hpp"
#include "ethosn_support_library/Support.hpp"

#if ETHOSN_SUPPORT_LIBRARY_VERSION_MAJOR == 3 && ETHOSN_SUPPORT_LIBRARY_VERSION_MINOR == 2 && \
ETHOSN_SUPPORT_LIBRARY_VERSION_PATCH == 0
#define _ETHOSN_API_VERSION_3_2_0
#endif
#ifdef _ETHOSN_API_VERSION_3_2_0
#include "ethosn_driver_library/ProcMemAllocator.hpp"
#endif

namespace tvm {
namespace runtime {
namespace ethosn {
Expand All @@ -46,6 +54,9 @@ namespace dl = ::ethosn::driver_library;
struct OrderedCompiledNetwork {
std::unique_ptr<sl::CompiledNetwork> compiled_cmm;
std::unique_ptr<dl::Network> runtime_cmm;
#ifdef _ETHOSN_API_VERSION_3_2_0
std::unique_ptr<dl::ProcMemAllocator> proc_mem_alloc;
#endif
std::string name;
std::vector<uint32_t> inputs;
std::vector<uint32_t> outputs;
Expand Down
1 change: 0 additions & 1 deletion tests/python/contrib/test_ethosn/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ def build(
if not additional_config_args:
additional_config_args = {}
npu_config = {**get_ethosn_device_options(), **additional_config_args}
print(npu_config)
with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-n.options": npu_config}):
with tvm.target.Target("llvm"):
if npu:
Expand Down
6 changes: 5 additions & 1 deletion tests/python/contrib/test_ethosn/test_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import tvm
from tvm import relay
from tvm.relay.op.contrib import ethosn_api_version
from tvm.testing import requires_ethosn

from . import infrastructure as tei
Expand Down Expand Up @@ -227,7 +228,10 @@ def test_conv2d_depthwise(
)
),
}
input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
if ethosn_api_version() == "3.2.0":
input_zp = np.random.randint(0, np.iinfo(dtype).max)
else:
input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
input_sc = np.random.random() * 2
if qnn_per_channel:
kernel_sc = tvm.nd.array(
Expand Down
68 changes: 67 additions & 1 deletion tests/python/contrib/test_ethosn/test_conv2d_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import tvm
from tvm import relay
from tvm.relay.op.contrib import ethosn_api_version
from tvm.testing import requires_ethosn
from . import infrastructure as tei

Expand Down Expand Up @@ -115,7 +116,7 @@ def _get_model(
[
((1, 2, 2, 1), (2, 2), (1, 1), 1, False),
((1, 2, 2, 5), (2, 2), (3, 5), 4, False),
((1, 7, 7, 4), (2, 2), (7, 9), 8, True),
((1, 7, 7, 4), (2, 2), (7, 7), 8, True),
],
)
def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype, bias):
Expand Down Expand Up @@ -169,6 +170,71 @@ def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype,
tei.verify(outputs, dtype, 1)


@requires_ethosn
@pytest.mark.parametrize("dtype", ["uint8", "int8"])
@pytest.mark.parametrize(
"ifm_shape,strides,kernel_size,out_channels,bias",
[
((1, 10, 20, 3), (1, 1), (8, 5), 4, False),
((1, 10, 10, 2), (2, 2), (7, 9), 8, True),
],
)
def test_conv2d_transpose_kernel_size_gt_8(
ifm_shape, strides, kernel_size, out_channels, dtype, bias
):
"""Check transpose convolution for big kernel sizes."""
if ethosn_api_version() in ["3.2.0", "3.1.0"]:
pytest.skip("Skipping because NPU driver 22.11 fails to interpret zp used in the test.")

np.random.seed(0)

kernel_layout = "IOHW"
dilation = (1, 1)
groups = 1

iinfo = np.iinfo(dtype)
data_min = iinfo.min
data_max = iinfo.max

input_zp = np.random.randint(data_min, data_max)
input_sc = np.random.random() * 2
kernel_zp = np.random.randint(data_min, data_max)
kernel_sc = np.random.random() * 4
output_zp, output_sc = tei.get_conv2d_qnn_params(
dtype, input_zp, input_sc, kernel_zp, kernel_sc, ifm_shape[1], ifm_shape[2], ifm_shape[3]
)

model, params = _get_model(
shape=ifm_shape,
kernel_h=kernel_size[0],
kernel_w=kernel_size[1],
input_zp=input_zp,
input_sc=input_sc,
kernel_zp=kernel_zp,
kernel_sc=kernel_sc,
output_zp=output_zp,
output_sc=output_sc,
stride=strides,
dilation=dilation,
groups=groups,
kernel_layout=kernel_layout,
dtype=dtype,
out_channels=out_channels,
bias=bias,
)

outputs = []
inputs = {
"a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=ifm_shape, dtype=dtype))
}

for npu in [False, True]:
mod = tei.make_module(model, params)
outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))

tei.verify(outputs, dtype, 1)


@requires_ethosn
@pytest.mark.parametrize("dtype", ["uint8", "int8"])
@pytest.mark.parametrize(
Expand Down
8 changes: 6 additions & 2 deletions tests/python/contrib/test_ethosn/test_leaky_relu.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import tvm
from tvm import relay
from tvm.relay.op.contrib import ethosn_api_version
from tvm.testing import requires_ethosn

from . import infrastructure as tei
Expand Down Expand Up @@ -55,9 +56,12 @@ def test_leaky_relu(dtype, shape, alpha):
iinfo = np.iinfo(dtype)
zp_min = iinfo.min
zp_max = iinfo.max
input_zp = zp_min + 120
if ethosn_api_version() == "3.2.0":
input_zp = zp_min + 128
else:
input_zp = zp_min + 120
input_sc = 0.0068132
output_zp = zp_min + 128
output_zp = zp_min + 126 # values offset more than 126 can cause saturation
output_sc = 0.0078125

inputs = {"x": tvm.nd.array(np.random.randint(zp_min, high=zp_max, size=shape, dtype=dtype))}
Expand Down
Loading

0 comments on commit 5019dce

Please sign in to comment.