diff --git a/CMakeLists.txt b/CMakeLists.txt index e0eab17dd1d6a..bd4e6b9bea023 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,7 @@ include(cmake/util/FindOpenCL.cmake) include(cmake/util/FindVulkan.cmake) include(cmake/util/FindLLVM.cmake) include(cmake/util/FindROCM.cmake) +include(cmake/util/FindEthosN.cmake) if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/config.cmake) include(${CMAKE_CURRENT_BINARY_DIR}/config.cmake) @@ -45,6 +46,7 @@ tvm_option(HIDE_PRIVATE_SYMBOLS "Compile with -fvisibility=hidden." OFF) tvm_option(USE_TF_COMPILE_FLAGS "Build with TensorFlow's compile flags." OFF) tvm_option(USE_TF_TVMDSOOP "Build with TensorFlow TVMDSOOp" OFF) tvm_option(USE_FALLBACK_STL_MAP "Use TVM's POD compatible Map" OFF) +tvm_option(USE_ETHOSN "Build with Arm Ethos-N" OFF) # 3rdparty libraries tvm_option(DLPACK_PATH "Path to DLPACK" "3rdparty/dlpack/include") @@ -322,6 +324,7 @@ include(cmake/modules/Metal.cmake) include(cmake/modules/ROCM.cmake) include(cmake/modules/LLVM.cmake) include(cmake/modules/Micro.cmake) +include(cmake/modules/contrib/EthosN.cmake) include(cmake/modules/contrib/BLAS.cmake) include(cmake/modules/contrib/CODEGENC.cmake) include(cmake/modules/contrib/DNNL.cmake) diff --git a/cmake/config.cmake b/cmake/config.cmake index c41ed95bccbc7..5cc67e1aa43f5 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -214,6 +214,16 @@ set(USE_DNNL_CODEGEN OFF) set(USE_ARM_COMPUTE_LIB OFF) set(USE_ARM_COMPUTE_LIB_GRAPH_RUNTIME OFF) +# Whether to build with Arm Ethos-N support +# Possible values: +# - OFF: disable Arm Ethos-N support +# - path/to/arm-ethos-N-stack: use a specific version of the +# Ethos-N driver stack +set(USE_ETHOSN OFF) +# If USE_ETHOSN is enabled, use ETHOSN_HW (ON) if Ethos-N hardware is available on this machine +# otherwise use ETHOSN_HW (OFF) to use the software test infrastructure +set(USE_ETHOSN_HW OFF) + # Build ANTLR parser for Relay text format # Possible values: # - ON: enable ANTLR by searching default locations (cmake find_program for antlr4 and /usr/local for jar) diff --git a/cmake/modules/contrib/EthosN.cmake b/cmake/modules/contrib/EthosN.cmake new file mode 100644 index 0000000000000..ca1f7daa8c8a1 --- /dev/null +++ b/cmake/modules/contrib/EthosN.cmake @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Arm Ethos-N rules + +if(NOT USE_ETHOSN STREQUAL "OFF") + find_ethosn(${USE_ETHOSN}) + + if(NOT ETHOSN_FOUND) + message(FATAL_ERROR "Cannot find Ethos-N, USE_ETHOSN=" ${USE_ETHOSN}) + + else() + include_directories(${ETHOSN_INCLUDE_DIRS}) + add_definitions(${ETHOSN_DEFINITIONS}) + + message(STATUS "Build with Ethos-N ${ETHOSN_PACKAGE_VERSION}") + + file(GLOB ETHOSN_RUNTIME_CONTRIB_SRC + CONFIGURE_DEPENDS src/runtime/contrib/ethosn/ethosn_runtime.cc + CONFIGURE_DEPENDS src/runtime/contrib/ethosn/ethosn_device.cc) + list(APPEND RUNTIME_SRCS ${ETHOSN_RUNTIME_CONTRIB_SRC}) + + file(GLOB COMPILER_ETHOSN_SRCS + CONFIGURE_DEPENDS src/relay/backend/contrib/ethosn/*) + list(APPEND COMPILER_SRCS ${COMPILER_ETHOSN_SRCS}) + + list(APPEND TVM_LINKER_LIBS ${ETHOSN_COMPILER_LIBRARY} + ${ETHOSN_RUNTIME_LIBRARY}) + list(APPEND TVM_RUNTIME_LINKER_LIBS ${ETHOSN_COMPILER_LIBRARY} + ${ETHOSN_RUNTIME_LIBRARY}) + + if(NOT MSVC) + set_source_files_properties(${COMPILER_ETHOSN_SRCS} + PROPERTIES COMPILE_DEFINITIONS "DMLC_ENABLE_RTTI=0") + set_source_files_properties(${COMPILER_ETHOSN_SRCS} + PROPERTIES COMPILE_FLAGS "-fno-rtti") + endif() + endif(NOT ETHOSN_FOUND) +else() + if(USE_ETHOSN_HW) + message(FATAL_ERROR "Cannot enable Ethos-N HW if USE_ETHOSN=OFF") + endif() +endif(NOT USE_ETHOSN STREQUAL "OFF") diff --git a/cmake/util/FindEthosN.cmake b/cmake/util/FindEthosN.cmake new file mode 100644 index 0000000000000..7f0fb64996911 --- /dev/null +++ b/cmake/util/FindEthosN.cmake @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +####################################################### +# Find Arm Ethos-N libraries +# +# Usage: +# find_ethosn(${USE_ETHOSN}) +# +# - When USE_ETHOSN=/path/to/ethos-sdk-path, use the path from USE_ETHOSN +# - Else, when environment variable ETHOSN_STACK is set, use that path +# - When USE_ETHOSN=ON, use auto search +# +# Provide variables: +# +# - ETHOSN_FOUND +# - ETHOSN_PACKAGE_VERSION +# - ETHOSN_DEFINITIONS +# - ETHOSN_INCLUDE_DIRS +# - ETHOSN_COMPILER_LIBRARY +# - ETHOSN_RUNTIME_LIBRARY + +macro(find_ethosn use_ethosn) + set(__use_ethosn ${use_ethosn}) + if(IS_DIRECTORY ${__use_ethosn}) + set(__ethosn_stack ${__use_ethosn}) + message(STATUS "Arm Ethos-N driver stack PATH=" ${__use_ethosn}) + elseif(IS_DIRECTORY $ENV{ETHOSN_STACK}) + set(__ethosn_stack $ENV{ETHOSN_STACK}) + message(STATUS "Arm Ethos-N driver stack from env=" ${__use_ethosn}) + else() + set(__ethosn_stack "") + endif() + + if(__ethosn_stack) + set(ETHOSN_INCLUDE_DIRS "") + # Compile-time support + find_path(_SL_DIR NAMES Support.hpp + PATHS ${__ethosn_stack}/include/ethosn_support_library) + string(REGEX REPLACE "/ethosn_support_library" "" _SL_DIR2 ${_SL_DIR}) + list(APPEND ETHOSN_INCLUDE_DIRS "${_SL_DIR2}") + + find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport + PATHS ${__ethosn_stack}/lib) + find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport) + + set(ETHOSN_PACKAGE_VERSION "0.1.1") + + if(USE_ETHOSN_HW STREQUAL "ON") + # Runtime hardware support + find_path(_DL_DIR NAMES Network.hpp + PATHS ${__ethosn_stack}/include/ethosn_driver_library) + string(REGEX REPLACE "/ethosn_driver_library" "" _DL_DIR2 ${_DL_DIR}) + list(APPEND ETHOSN_INCLUDE_DIRS "${_DL_DIR2}") + + find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver + PATHS ${__ethosn_stack}/lib) + find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver) + set(ETHOSN_DEFINITIONS -DETHOSN_HW) + endif () + + if(ETHOSN_COMPILER_LIBRARY) + set(ETHOSN_FOUND TRUE) + endif() + endif(__ethosn_stack) + + if(NOT ETHOSN_FOUND) + if(__use_ethosn STREQUAL "ON") + message(WARNING "No cmake find_package available for Arm Ethos-N") + endif() + + # additional libraries + else() + message(STATUS "Found ETHOSN_DEFINITIONS=${ETHOSN_DEFINITIONS}") + message(STATUS "Found ETHOSN_INCLUDE_DIRS=${ETHOSN_INCLUDE_DIRS}") + message(STATUS "Found ETHOSN_COMPILER_LIBRARY=${ETHOSN_COMPILER_LIBRARY}") + message(STATUS "Found ETHOSN_RUNTIME_LIBRARY=${ETHOSN_RUNTIME_LIBRARY}") + endif(NOT ETHOSN_FOUND) + +endmacro(find_ethosn) diff --git a/python/tvm/relay/op/contrib/__init__.py b/python/tvm/relay/op/contrib/__init__.py index 26ca78c1190b0..dbcd8055d30bc 100644 --- a/python/tvm/relay/op/contrib/__init__.py +++ b/python/tvm/relay/op/contrib/__init__.py @@ -21,3 +21,4 @@ from .arm_compute_lib import * from .dnnl import * from .coreml import * +from .ethosn import * diff --git a/python/tvm/relay/op/contrib/_ethosn.py b/python/tvm/relay/op/contrib/_ethosn.py new file mode 100644 index 0000000000000..ea2915675ec61 --- /dev/null +++ b/python/tvm/relay/op/contrib/_ethosn.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Expose 'is supported' functions to Python.""" + +import tvm._ffi + +tvm._ffi._init_api("relay.ethos-n.support", __name__) diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py new file mode 100644 index 0000000000000..de70297a78898 --- /dev/null +++ b/python/tvm/relay/op/contrib/ethosn.py @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument +"""Arm(R) Ethos(TM) -N NPU supported operators.""" +from enum import Enum +import tvm.ir +from ... import qnn as _qnn +from . import _ethosn as support + + +class Available(Enum): + UNAVAILABLE = 0 + SW_ONLY = 1 + SW_AND_HW = 2 + + def __bool__(self): + return self != Available.UNAVAILABLE + + +def ethosn_available(): + """Return whether Ethos-N software and hardware support is available""" + if not tvm.get_global_func("relay.ethos-n.query", True): + print("skip because Ethos-N module is not available") + return Available.UNAVAILABLE + hw = tvm.get_global_func("relay.ethos-n.query")() + return Available.SW_AND_HW if hw else Available.SW_ONLY + + +@tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n") +def qnn_concatenate(attrs, args): + """Check if a concatenate is supported by Ethos-N.""" + if not ethosn_available(): + return False + + conc = _qnn.op.concatenate(*args, **attrs) + if not support.concatenate(conc): + return False + + # Support library has some unenforced restrictions on qnn params + min_range = 1e9 + max_range = -1e9 + qnn_params = [] + for i in range(len(args[1].fields)): + scale = args[1].fields[i].data.asnumpy() + zero_point = args[2].fields[i].data.asnumpy() + min_range = min(-1 * zero_point * scale, min_range) + max_range = max((255 - zero_point) * scale, max_range) + qnn_params.append((scale, zero_point)) + + scale = (max_range - min_range) / 255 + zero_point = int(-min_range/scale) + if (scale, zero_point) in qnn_params: + return True + + return False + + +@tvm.ir.register_op_attr("split", "target.ethos-n") +def split(attrs, args): + """Check if a split is supported by Ethos-N.""" + if not ethosn_available(): + return False + + if isinstance(attrs["indices_or_sections"], tvm.tir.IntImm): + sp = tvm.relay.split(*args, + indices_or_sections=attrs["indices_or_sections"].value, + axis=attrs["axis"]) + else: + sp = tvm.relay.split(*args, + indices_or_sections=attrs["indices_or_sections"], + axis=attrs["axis"]) + if not support.split(sp.astuple()): + return False + + return True diff --git a/src/relay/backend/contrib/ethosn/capabilities.h b/src/relay/backend/contrib/ethosn/capabilities.h new file mode 100644 index 0000000000000..409d440cf8e1c --- /dev/null +++ b/src/relay/backend/contrib/ethosn/capabilities.h @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/ethosn/capabilities.h + * \brief The Ethos-N processor series has four variants, the Ethos-N37, Ethos-N57, Ethos-N77 + * and the Ethos-N78. This release of the integration supports the first three variants. + * Configuration information for each variant is stored as a blob in this file. These blobs + * are passed into the Ethos-N support library, which in turn uses them to optimize the + * generated command-stream appropriately for the specified variant. + */ + +#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CAPABILITIES_H_ +#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CAPABILITIES_H_ + +#include + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosn { + +/* Ethos-N variants (N77, N57 and N37) + * variant[0] - N77 + * variant[1] - N57 + * variant[2] - N37 + */ +static std::vector variants[3] = { + { + 0x02, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + { + 0x02, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + { + 0x02, 0x00, 0x00, 0x00, 0x74, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }}; + +} // namespace ethosn +} // namespace contrib +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CAPABILITIES_H_ diff --git a/src/relay/backend/contrib/ethosn/codegen.cc b/src/relay/backend/contrib/ethosn/codegen.cc new file mode 100644 index 0000000000000..f66eb94cb20fd --- /dev/null +++ b/src/relay/backend/contrib/ethosn/codegen.cc @@ -0,0 +1,361 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/ethosn/codegen.cc + * \brief The Relay -> Ethos-N command stream compiler. + */ +#include +#include + +#include "capabilities.h" +#include "codegen_ethosn.h" +#include "ethosn_api.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosn { + +sl::TensorInfo GetTensorInfo(std::map> tensor_table, + const Call& call) { + if (tensor_table.find(call) != tensor_table.end()) return tensor_table[call][0]; + + return sl::TensorInfo(); +} + +bool IsEthosnOp(const Call& call, const std::string& op_name) { + if (call->op->IsInstance()) { + Op op = Downcast(call->op); + CHECK(op.defined()); + return op == Op::Get(op_name); + } else { + return false; + } +} + +std::map> InferTensorsVisitor::Infer(const Expr& expr) { + tensor_table_.clear(); + CHECK(expr->checked_type().defined()); + size_t output_size = 1; + if (auto tuple = expr->checked_type().as()) { + output_size = tuple->fields.size(); + } + for (size_t i = 0; i < output_size; i++) { + tensor_table_[expr].push_back(sl::TensorInfo({1, 1, 1, 1}, sl::DataType::UINT8_QUANTIZED, + sl::DataFormat::NHWC, sl::QuantizationInfo())); + } + VisitInferred(expr); + return tensor_table_; +} + +void InferTensorsVisitor::InferCall(const CallNode* cn) { + EthosnError err; + Call call = GetRef(cn); + // Determine call -> NPU mapping + if (IsEthosnOp(call, "qnn.concatenate")) { + ConcatenateParams params; + err = EthosnAPI::Concatenate(call, ¶ms); + tensor_table_[cn->args[0]] = params.input_infos; + } else if (IsEthosnOp(call, "split")) { + SplitParams params; + params.input_info = GetTensorInfo(tensor_table_, call); + err = EthosnAPI::Split(call, ¶ms); + tensor_table_[cn->args[0]] = {params.input_info}; + } else { + err = EthosnError("unknown operator"); + } + if (err) { + ReportFatalError(call, err); + } +} + +// This will only visit an expression if the expression's tensor info +// has already been entirely inferred. +// An example where this is important is a tuple node where each +// get item node will only infer one field of the tuple's expression info. +// We don't want to traverse the tuple until all of its fields have been inferred. +void InferTensorsVisitor::VisitInferred(const Expr& expr) { + if (tensor_table_.find(expr) != tensor_table_.end()) { + for (const auto& tensor_info : tensor_table_[expr]) { + if (tensor_info == sl::TensorInfo()) return; + } + VisitExpr(expr); + } +} + +void InferTensorsVisitor::VisitExpr_(const CallNode* cn) { + InferCall(cn); + // Pre-order visitor + for (const auto& arg : cn->args) { + VisitInferred(arg); + } +} + +void InferTensorsVisitor::VisitExpr_(const TupleNode* tn) { + auto tuple = GetRef(tn); + CHECK(tensor_table_.find(tuple) != tensor_table_.end()); + for (size_t i = 0; i < tn->fields.size(); i++) { + tensor_table_[tn->fields[i]] = {tensor_table_[tuple][i]}; + } + // Pre-order visitor + for (const auto& field : tn->fields) { + VisitExpr(field); + } +} + +void InferTensorsVisitor::VisitExpr_(const TupleGetItemNode* tgn) { + // Don't assume it must be targeting a TupleNode + // Vars and calls can still have TupleType + auto tg = GetRef(tgn); + CHECK(tensor_table_.find(tg) != tensor_table_.end()); + auto tuple = tg->tuple; + auto type = tuple->checked_type().as(); + int index = tg->index; + // Resize the tensor infos to the tuple size if not already done + if (tensor_table_.find(tuple) == tensor_table_.end()) { + tensor_table_[tuple].resize(type->fields.size()); + } + tensor_table_[tuple][index] = tensor_table_[tg][0]; + // Pre-order visitor + VisitInferred(tuple); +} + +sl::TensorsAndId MakeOps(const sl::TensorAndId& op) { + sl::TensorsAndId ops; + ops.tensors = {op.tensor}; + ops.operationId = op.operationId; + return ops; +} + +NetworkWithIDs ConstructNetworkVisitor::Construct(const Function& func) { + // Initialise everything + NetworkWithIDs network_with_ids; + network_ = sl::CreateNetwork(); + network_with_ids.network = network_; + operand_table_.clear(); + + // Infer tensor information + tensor_table_ = InferTensors(this->mod_, this->var_, func->body); + // Add the inputs in the order they appear in the parameters + unsigned int idx = 0; + for (const auto& param : func->params) { + for (const auto& tensor_info : tensor_table_[param]) { + auto tensor_and_id = AddInput(network_, tensor_info); + operand_table_[param].push_back(tensor_and_id.tensor); + id_table_[param].push_back(std::make_pair(tensor_and_id.operationId, 0)); + network_with_ids.input_ids[tensor_and_id.operationId] = idx++; + } + } + // Add the function body + VisitExpr(func->body); + // Add the outputs + idx = 0; + for (const auto& layer : operand_table_[func->body]) { + AddOutput(network_, *layer); + network_with_ids.output_ids[id_table_[func->body][idx]] = idx; + idx++; + } + return network_with_ids; +} + +sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) { + EthosnError err; + Call call = GetRef(cn); + sl::TensorAndId tensor; + sl::TensorsAndId tensors; + // Determine call -> NPU mapping + if (IsEthosnOp(call, "qnn.concatenate")) { + if ((err = MakeConcatenateLayer(call, &tensor))) ReportFatalError(call, err); + return MakeOps(tensor); + } else if (IsEthosnOp(call, "split")) { + if ((err = MakeSplitLayer(call, &tensors))) ReportFatalError(call, err); + return tensors; + } else { + ReportFatalError(call, EthosnError("unknown operator")); + return {}; + } +} + +void ConstructNetworkVisitor::VisitExpr_(const CallNode* cn) { + auto operand = HandleCall(cn); + operand_table_[GetRef(cn)] = operand.tensors; + for (size_t i = 0; i < operand.tensors.size(); i++) { + id_table_[GetRef(cn)].push_back(std::make_pair(operand.operationId, i)); + } +} + +void ConstructNetworkVisitor::VisitExpr_(const TupleNode* op) { + Tuple tuple = GetRef(op); + for (const auto& arg : tuple->fields) { + // The fields in a tuple should not themselves be tuples + // Nested tuples are not supported + if (operand_table_[arg].size() == 1) { + operand_table_[tuple].push_back(operand_table_[arg][0]); + id_table_[tuple].push_back(id_table_[arg][0]); + } else { + operand_table_[tuple].push_back(nullptr); + id_table_[tuple].push_back(std::make_pair(0, 0)); + } + } +} + +void ConstructNetworkVisitor::VisitExpr_(const TupleGetItemNode* tg) { + Expr tuple = tg->tuple; + operand_table_[GetRef(tg)] = {operand_table_[tuple][tg->index]}; + id_table_[GetRef(tg)] = {id_table_[tuple][tg->index]}; +} + +void ConstructNetworkVisitor::VisitLeaf(const Expr& expr) { + // Don't traverse into functions, they're not supported + if (!expr->IsInstance()) MixedModeVisitor::VisitLeaf(expr); +} + +EthosnError ConstructNetworkVisitor::MakeConcatenateLayer(const Call& call, + sl::TensorAndId* out) { + ConcatenateParams params; + if (auto err = EthosnAPI::Concatenate(call, ¶ms)) { + return err; + } + + std::vector layers; + auto ops = operand_table_[call->args[0]]; + + for (const auto& op : ops) { + layers.emplace_back(op.get()); + } + try { + *out = AddConcatenation(network_, layers, params.concat_info); + } catch (const sl::NotSupportedException& e) { + return EthosnError(e.what()); + } + return EthosnError(); +} + +EthosnError ConstructNetworkVisitor::MakeSplitLayer(const Call& call, sl::TensorsAndId* outs) { + SplitParams params; + params.input_info = GetTensorInfo(tensor_table_, call); + if (auto err = EthosnAPI::Split(call, ¶ms)) { + return err; + } + + auto input = operand_table_[call->args[0]][0]; + + try { + *outs = AddSplit(network_, *input, params.split_info); + } catch (const sl::NotSupportedException& e) { + return EthosnError(e.what()); + } + return EthosnError(); +} + +runtime::Module EthosnCompiler::CreateRuntimeModule(const ObjectRef& ref) { + std::vector cmms; + if (ref->IsInstance()) { + IRModule mod; + Function func = Downcast(ref); + auto name_node = func->GetAttr(tvm::attr::kGlobalSymbol); + CHECK(name_node.defined()) << "Failed to retrieved external symbol."; + GlobalVar gvar = GlobalVar(name_node.value()); + mod->Add(gvar, func); + Function mod_func = Downcast(mod->functions.at(gvar)); + cmms.emplace_back(CompileEthosnFunc(mod, gvar, mod_func)); + } else { + LOG(FATAL) << "The input ref is expected to be a Relay function"; + } + auto n = make_object(&cmms); + return runtime::Module(n); +} + +runtime::ethosn::OrderedCompiledNetwork EthosnCompiler::CompileEthosnFunc(const IRModule& mod, + const GlobalVar& gvar, + const Function& func) { + // Construct the network + auto network_with_ids = ConstructNetwork(mod, gvar, func); + // Now set the required build flags + sl::CompilationOptions options = CreateOptions(); + // Finally compile the network + std::vector> compiled_networks = + sl::Compile(*network_with_ids.network, options); + CHECK_GE(compiled_networks.size(), 1) << "Ethos-N compiler failed to compile network"; + auto compiled_network = std::move(compiled_networks[0]); + // Determine the order that the inputs/outputs are in and how that corresponds to the + // order that the TVM runtime will expect them in + auto input_output_order = GetInputOutputOrder(network_with_ids, compiled_network); + // Use the order information to create an 'ordered' network with includes how to map + // the inputs/outputs from the TVM runtime to the inputs/outputs of the compiled network + runtime::ethosn::OrderedCompiledNetwork ordered_network; + ordered_network.name = gvar->name_hint; + ordered_network.cmm = std::move(compiled_network); + ordered_network.inputs = input_output_order.first; + ordered_network.outputs = input_output_order.second; + return ordered_network; +} + +sl::CompilationOptions EthosnCompiler::CreateOptions() { + auto ctx = transform::PassContext::Current(); + auto cfg = ctx->GetConfig("relay.ext.ethos-n.options"); + if (!cfg.defined()) { + cfg = AttrsWithDefaultValues(); + } + + sl::CompilationOptions options(variants[cfg.value()->variant]); + options.m_Strategy0 = cfg.value()->strategy0; + options.m_Strategy1 = cfg.value()->strategy1; + options.m_Strategy3 = cfg.value()->strategy3; + options.m_Strategy4 = cfg.value()->strategy4; + options.m_Strategy6 = cfg.value()->strategy6; + options.m_Strategy7 = cfg.value()->strategy7; + options.m_DebugInfo.m_DumpRam = cfg.value()->dump_ram; + options.m_DebugInfo.m_InitialSramDump = cfg.value()->initial_sram_dump; + options.m_BlockConfig16x16 = cfg.value()->block_config_16x16; + options.m_BlockConfig32x8 = cfg.value()->block_config_32x8; + options.m_BlockConfig8x32 = cfg.value()->block_config_8x32; + options.m_BlockConfig8x8 = cfg.value()->block_config_8x8; + options.m_EnableIntermediateCompression = cfg.value()->enable_intermediate_compression; + options.m_DisableWinograd = cfg.value()->disable_winograd; + options.m_DebugInfo.m_DumpDebugFiles = cfg.value()->dump_debug_files; + options.m_DebugInfo.m_DebugDir = cfg.value()->debug_dir; + options.m_EnableCascading = cfg.value()->enable_cascading; + return options; +} + +std::pair, std::vector> EthosnCompiler::GetInputOutputOrder( + NetworkWithIDs network, const std::unique_ptr& compiled_network) { + std::vector input_infos = compiled_network->GetInputBufferInfos(); + std::vector output_infos = compiled_network->GetOutputBufferInfos(); + std::vector input_order; + std::vector output_order; + // Find the order of the inputs in the compiled network + for (const auto& input_info : input_infos) { + input_order.push_back(network.input_ids[input_info.m_SourceOperationId]); + } + // Find the order of the outputs in the compiled network + for (const auto& output_info : output_infos) { + auto output_id = + std::make_pair(output_info.m_SourceOperationId, output_info.m_SourceOperationOutputIndex); + output_order.push_back(network.output_ids[output_id]); + } + return std::make_pair(input_order, output_order); +} + +} // namespace ethosn +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/contrib/ethosn/codegen_ethosn.h b/src/relay/backend/contrib/ethosn/codegen_ethosn.h new file mode 100644 index 0000000000000..714a22d220276 --- /dev/null +++ b/src/relay/backend/contrib/ethosn/codegen_ethosn.h @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/ethosn/codegen_ethosn.h + * \brief The Relay -> Ethos-N command stream compiler. + */ + +#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_ +#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../../../runtime/contrib/ethosn/ethosn_runtime.h" +#include "../codegen_c/codegen_c.h" +#include "ethosn_api.h" +#include "ethosn_support_library/Support.hpp" +#include "ethosn_support_library/SupportQueries.hpp" + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosn { + +namespace sl = ::ethosn::support_library; + +/*! + * \brief A struct to hold an uncompiled support library network alongside + * the desired order of input and output operation ids. + */ +struct NetworkWithIDs { + struct hash_pair { + template + size_t operator()(const std::pair& p) const { + return std::hash{}(p.first) ^ std::hash{}(p.second); + } + }; + std::shared_ptr network; + std::unordered_map input_ids; + std::unordered_map, unsigned int, hash_pair> output_ids; +}; + +/*! + * \brief A base class for error handling using ErrorReporter. + */ +class ErrorReportingPass { + public: + ErrorReportingPass(const IRModule& mod, const GlobalVar& var) : mod_(mod), var_(var) {} + + /*! + * \brief Report fatal errors for an expression. + * \param expr The expression to report errors at. + * \param err The errors to report. + */ + void ReportFatalError(const ObjectRef& expr, const EthosnError& err) { + for (const auto& msg : err.msgs) { + error_reporter_.ReportAt(this->var_, expr, ErrorBuilder() << msg); + } + error_reporter_.RenderErrors(this->mod_); + } + + protected: + /*! \brief An ErrorReporter object to render the errors.*/ + ErrorReporter error_reporter_; + /*! \brief The module to report errors for. */ + IRModule mod_; + /*! \brief The GlobalVar to report errors for. */ + GlobalVar var_; +}; + +/*! + * \brief A custom pass to infer the support library tensor information + * for a Relay expression. + * + * Support Library requires that tensors are explicitly declared with + * information on their size, data type, format (eg. NHWC) and quantisation + * parameters. In Relay, size and data type are already determined when the + * type_infer pass is run. However, format and quantisation parameters are + * properties of the operators that consume the tensors. + * + * This pass works by having each node initialise the information of its + * parents, essentially propagating the inferred information all the way up + * to the inputs of the expression. + * + * Because the children initialise the information of the parents, it is + * necessary to traverse the graph in such a way so as to ensure all the + * children of a node are visited before the parent is. As Relay does not + * keep a reference to child nodes, this pass goes in preorder but will + * skip visiting a parent if all the children haven't yet been visited (see + * VisitInferred for the logic that implements this). + * + * Inference only works for supported callnodes, for tuplenodes, tuplegetitem + * nodes and free var nodes. Other nodes should not be off-loaded to Ethos-N. + */ +class InferTensorsVisitor : private ErrorReportingPass, private ExprVisitor { + public: + InferTensorsVisitor(const IRModule& mod, const GlobalVar& var) : ErrorReportingPass(mod, var) {} + + /*! + * \brief Infer the support library tensor information for all the nodes + * in an expression. + * \param expr The expression for which to infer tensor information. + * \return A map of expressions to tensor information. + * \note This algorithm does not traverse into functions, so call it on + * the body of the function you're interested in. + */ + std::map> Infer(const Expr& expr); + + private: + // Infer a callnode if it's a supported operator/composite function + void InferCall(const CallNode* cn); + void VisitInferred(const Expr& expr); + + void VisitExpr_(const CallNode* cn) final; + void VisitExpr_(const TupleNode* tn) final; + void VisitExpr_(const TupleGetItemNode* tg) final; + // Don't traverse into functions, the Ethos-N codegen isn't meant to support them. + void VisitExpr_(const FunctionNode* fn) final {} + + /*! \brief A look-up table from Expr to tensor infos. */ + std::map> tensor_table_; +}; + +std::map> InferTensors(const IRModule& mod, const GlobalVar& var, + const Expr& expr) { + return InferTensorsVisitor(mod, var).Infer(expr); +} + +/*! + * \brief A pass to generate a support library network from a Relay function. + * + * This pass constructs an equivalent support library network from a Relay + * function in two visits. One to infer the tensor information of all the nodes + * and another in postorder to add the nodes as support library operands. + * (Supported) Callnodes, tuplenodes, tuplegetitemnodes and (free) + * varnodes are handled by this pass. + * + * As part of the pass, nodes in the function body are associated with both + * type information in the 'tensor_table', and support library operands in the + * 'operand_table'. Both of these are maps of vectors as a Relay node can have + * tuple type and accordingly be associated with multiple tensors. For nodes + * which are not tuple type, vectors of size 1 are used. + */ +class ConstructNetworkVisitor : public MixedModeVisitor, private ErrorReportingPass { + public: + explicit ConstructNetworkVisitor(const IRModule& mod, const GlobalVar& var) + : ErrorReportingPass(mod, var) {} + + /*! + * \brief Construct a support library network from a given Relay function. The + * function should contain only nodes supported by Ethos-N. + * \param func The Relay function for which to construct a support library network. + * \return A support library network that performs the same operation as the Relay + * function. + */ + NetworkWithIDs Construct(const Function& func); + + private: + // Translate from a callnode to the appropriate 'Make' method + sl::TensorsAndId HandleCall(const CallNode*); + + void VisitExpr_(const CallNode* cn) final; + void VisitExpr_(const TupleNode* op) final; + void VisitExpr_(const TupleGetItemNode* tg) final; + void VisitLeaf(const Expr& expr) final; + + // Make a support library operand from a Call + EthosnError MakeConcatenateLayer(const Call& call, sl::TensorAndId* out); + EthosnError MakeSplitLayer(const Call& call, sl::TensorsAndId* outs); + + /*! \brief A look-up table from Expr to layers. */ + std::map>> operand_table_; + /*! \brief A look-up table from Expr to SL operation IDs. */ + std::map>> id_table_; + /*! \brief A look-up table from Expr to tensor infos. */ + std::map> tensor_table_; + /*! \brief The support library network to compile. */ + std::shared_ptr network_; +}; + +NetworkWithIDs ConstructNetwork(const IRModule& mod, const GlobalVar& var, const Function& func) { + return ConstructNetworkVisitor(mod, var).Construct(func); +} + +/*! \brief Attributes to store the compiler options for Ethos-N */ +struct EthosnCompilerConfigNode : public tvm::AttrsNode { + int variant; + bool strategy0; + bool strategy1; + bool strategy3; + bool strategy4; + bool strategy6; + bool strategy7; + bool dump_ram; + bool initial_sram_dump; + bool block_config_16x16; + bool block_config_32x8; + bool block_config_8x32; + bool block_config_8x8; + bool enable_intermediate_compression; + bool disable_winograd; + bool dump_debug_files; + String debug_dir; + bool enable_cascading; + + TVM_DECLARE_ATTRS(EthosnCompilerConfigNode, "ext.attrs.EthosnCompilerConfigNode") { + TVM_ATTR_FIELD(variant) + .describe("0 for Ethos-N77, 1 for Ethos-N57, 2 for Ethos-N37. See Ethos-N documentation.") + .set_default(0); + TVM_ATTR_FIELD(strategy0).set_default(true); + TVM_ATTR_FIELD(strategy1).set_default(true); + TVM_ATTR_FIELD(strategy3).set_default(true); + TVM_ATTR_FIELD(strategy4).set_default(true); + TVM_ATTR_FIELD(strategy6).set_default(true); + TVM_ATTR_FIELD(strategy7).set_default(true); + TVM_ATTR_FIELD(dump_ram).set_default(false); + TVM_ATTR_FIELD(initial_sram_dump).set_default(false); + TVM_ATTR_FIELD(block_config_16x16).set_default(true); + TVM_ATTR_FIELD(block_config_32x8).set_default(true); + TVM_ATTR_FIELD(block_config_8x32).set_default(true); + TVM_ATTR_FIELD(block_config_8x8).set_default(true); + TVM_ATTR_FIELD(enable_intermediate_compression).set_default(true); + TVM_ATTR_FIELD(disable_winograd).set_default(false); + TVM_ATTR_FIELD(dump_debug_files).set_default(false); + TVM_ATTR_FIELD(debug_dir).set_default("."); + TVM_ATTR_FIELD(enable_cascading).set_default(false); + } +}; + +class EthosnCompilerConfig : public Attrs { + public: + TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(EthosnCompilerConfig, Attrs, EthosnCompilerConfigNode); +}; + +TVM_REGISTER_NODE_TYPE(EthosnCompilerConfigNode); +TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.ethos-n.options", EthosnCompilerConfig); + +/*! \brief The compiler for Ethos-N functions */ +class EthosnCompiler { + public: + /*! + * \brief Create an Ethos-N runtime module from a Relay Ethos-N function + * \param ref An ObjectRef pointing to a Relay Ethos-N function + * \return runtime_module An Ethos-N runtime module + */ + static runtime::Module CreateRuntimeModule(const ObjectRef& ref); + + private: + /*! + * \brief Compile a single Relay Ethos-N function into an ordered compiled network. + * Compilation options will be taken from the PassContext. + * \param mod The module the function is stored in (for error reporting purposes) + * \param gvar The global var corresponding to the function + * \param func The function to be compiled + * \return ordered_compiled_network A compiled network with additional information + * to handle difference in input/output ordering between the TVM runtime and the + * Ethos-N compiled network. + */ + static runtime::ethosn::OrderedCompiledNetwork CompileEthosnFunc(const IRModule& mod, + const GlobalVar& gvar, + const Function& func); + + /*! + * \brief Get the Support Library compilation options from the PassContext + * \return options The compilation options + */ + static sl::CompilationOptions CreateOptions(); + + /*! + * \brief Determine the order in which inputs should be provided/outputs should be + * read from a compiled network. This is required because when you compile a network + * for Ethos-N, you don't have control over the order in which the inputs/outputs + * are given. You can, however, query what order the compiler decided to give them in. + * We therefore keep track of our desired order and the actual order and create a + * small translation table between the two for use in the runtime. + * \param network A network additionally with the desired input/output order + * \param compiled_network The compiled network with an as yet undetermined input/output order + * \return input_output_order The order in which to permute the inputs/outputs given + * by the TVM runtime such that they map correctly to the compiled network. + */ + static std::pair, std::vector> GetInputOutputOrder( + NetworkWithIDs network, const std::unique_ptr& compiled_network); +}; + +runtime::Module CompileEthosn(const ObjectRef& ref) { + return EthosnCompiler::CreateRuntimeModule(ref); +} + +TVM_REGISTER_GLOBAL("relay.ext.ethos-n").set_body_typed(CompileEthosn); + +} // namespace ethosn +} // namespace contrib +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_ diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.cc b/src/relay/backend/contrib/ethosn/ethosn_api.cc new file mode 100644 index 0000000000000..d92e35afeea09 --- /dev/null +++ b/src/relay/backend/contrib/ethosn/ethosn_api.cc @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "ethosn_api.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ethosn_support_library/Support.hpp" +#include "ethosn_support_library/SupportQueries.hpp" + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosn { + +EthosnError EthosnAPI::Concatenate(const Expr& expr, ConcatenateParams* params) { + Call call = Downcast(expr); + const auto& attrs = call->attrs.as(); + params->concat_info.m_Axis = attrs->axis; + + float output_s; + int output_zp; + EthosnError err = AsConstant(call->args[3], &output_s); + err += AsConstant(call->args[4], &output_zp); + params->concat_info.m_OutputQuantizationInfo = sl::QuantizationInfo(output_zp, output_s); + + auto input_scales = call->args[1].as()->fields; + auto input_zero_points = call->args[2].as()->fields; + auto input_tensors = call->args[0]->checked_type().as()->fields; + + int index = 0; + for (auto input_scale : input_scales) { + auto input_dtype = input_tensors[index].as(); + auto input_zero_point = input_zero_points[index]; + float scale; + int zp; + err += AsConstant(input_scale, &scale); + err += AsConstant(input_zero_point, &zp); + sl::TensorShape input_tensor_shape = {1, 1, 1, 1}; + sl::DataType input_data_type; + err += Tvm2Npu(input_dtype->shape, &input_tensor_shape); + err += Tvm2Npu(input_dtype->dtype, &input_data_type); + params->input_infos.emplace_back(sl::TensorInfo(input_tensor_shape, input_data_type, + sl::DataFormat::NHWC, + sl::QuantizationInfo(zp, scale))); + index++; + } + return err; +} + +EthosnError EthosnAPI::Split(const Expr& expr, SplitParams* params) { + Call call = Downcast(expr); + const auto* input_tensor_type = call->args[0]->checked_type().as(); + const auto& attrs = call->attrs.as(); + + sl::TensorShape input_tensor_shape = {1, 1, 1, 1}; + sl::DataType input_data_type; + EthosnError err = Tvm2Npu(input_tensor_type->shape, &input_tensor_shape); + err += Tvm2Npu(input_tensor_type->dtype, &input_data_type); + params->input_info = + sl::TensorInfo(input_tensor_shape, input_data_type, params->input_info.m_DataFormat, + params->input_info.m_QuantizationInfo); + params->split_info.m_Axis = attrs->axis; + if (attrs->indices_or_sections->IsInstance()) { + auto sections = Downcast(attrs->indices_or_sections)->value; + int size = input_tensor_shape[attrs->axis] / sections; + for (int i = 0; i < sections; i++) { + params->split_info.m_Sizes.push_back(size); + } + } else { + auto indices = Downcast>(attrs->indices_or_sections); + int last_index = 0; + for (const auto& i : indices) { + params->split_info.m_Sizes.push_back(i->value - last_index); + last_index = i->value; + } + int axis_size = input_tensor_shape[attrs->axis]; + params->split_info.m_Sizes.push_back(axis_size - last_index); + } + return err; +} + +EthosnError EthosnAPI::Tvm2Npu(const Array& shape, sl::TensorShape* npu_shape) { + EthosnError err = AsArray(shape, npu_shape); + if (npu_shape->front() != 1) { + err += EthosnError(ErrStrm() << "batch size=" << npu_shape->front() << ", batch size must = 1"); + } + return err; +} + +EthosnError EthosnAPI::Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_type) { + if (dtype.is_scalar() == 1) { + if (dtype.is_uint() && dtype.bits() == 8) { + *data_type = sl::DataType::UINT8_QUANTIZED; + return EthosnError(); + } else if (dtype.is_int() && dtype.bits() == 32) { + *data_type = sl::DataType::INT32_QUANTIZED; + return EthosnError(); + } + } + return EthosnError(ErrStrm() << "dtype=\'" << dtype << "\', dtype must be either uint8 or int32"); +} + +// Convert an array of IntImmNodes into ValueT +// IndexT type of Array indexing variable +// ValueT type of resulting value +template +EthosnError EthosnAPI::AsArray(const Array& arr, std::array* v) { + if (arr.size() > 4) + return EthosnError(ErrStrm() << "dimensions=" << arr.size() << ", dimensions must be <= 4"); + for (size_t i = 0; i < std::min(arr.size(), 4ul); i++) { + const PrimExpr& a = arr[i]; + const auto* intImm = a.as(); + if (intImm->value > std::numeric_limits::max()) { + return EthosnError(ErrStrm() << "axis size=" << intImm->value << ", axis size must be <= " + << std::numeric_limits::max()); + } + (*v)[i] = static_cast(intImm->value); + } + return EthosnError(); +} + +// Get a T from a constant represented by a NDArray. +template +EthosnError EthosnAPI::AsConstant(const Expr& expr, T* out) { + if (!expr->IsInstance()) { + return EthosnError("expected constant data"); + } + runtime::NDArray data = Downcast(expr)->data; + *out = *static_cast(data->data); + return EthosnError(); +} + +TVM_REGISTER_GLOBAL("relay.ethos-n.support.concatenate") + .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { + Call call = args[0]; + ConcatenateParams params; + auto err = EthosnAPI::Concatenate(call, ¶ms); + *rv = !err && sl::IsConcatenationSupported(params.input_infos, params.concat_info); + }); + +TVM_REGISTER_GLOBAL("relay.ethos-n.support.split") + .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { + Call call = args[0]; + SplitParams params; + auto err = EthosnAPI::Split(call, ¶ms); + *rv = !err && sl::IsSplitSupported(params.input_info, params.split_info); + }); + +TVM_REGISTER_GLOBAL("relay.ethos-n.query").set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { +#if defined ETHOSN_HW + *rv = true; +#else + *rv = false; +#endif +}); + +} // namespace ethosn +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.h b/src/relay/backend/contrib/ethosn/ethosn_api.h new file mode 100644 index 0000000000000..34af7ce0b1d8b --- /dev/null +++ b/src/relay/backend/contrib/ethosn/ethosn_api.h @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_ +#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ethosn_support_library/Support.hpp" +#include "ethosn_support_library/SupportQueries.hpp" + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosn { + +namespace sl = ::ethosn::support_library; + +struct ConcatenateParams { + sl::QuantizationInfo qInfo; + sl::ConcatenationInfo concat_info = sl::ConcatenationInfo(1, qInfo); + std::vector input_infos; +}; + +struct SplitParams { + sl::SplitInfo split_info = sl::SplitInfo(0, {}); + sl::TensorInfo input_info; +}; + +/*! + * \brief A wrapper around std::stringstream to build an EthosnError. + */ +class ErrStrm { + public: + template + ErrStrm& operator<<(const T& val) { // NOLINT(*) + stream_ << val; + return *this; + } + + private: + std::stringstream stream_; + friend class EthosnError; +}; + +/*! + * \brief Custom error class for storing error messages produced + * during compilation for Ethos-N. + */ +class EthosnError { + public: + /*! \brief Default constructor */ + EthosnError() {} + /*! + * \brief Construct error from an Array of Strings + * \param msgs The messages + */ + explicit EthosnError(const Array& msgs) : msgs(msgs) {} + /*! + * \brief Construct error from a String + * \param msg The message + */ + explicit EthosnError(const String& msg) { msgs.push_back(msg); } + /*! + * \brief Construct error from an ErrStrm + * \param err The ErrStrm + */ + explicit EthosnError(const ErrStrm& err) : EthosnError(err.stream_.str()) {} + + /*! \return Whether there are any error messages */ + explicit operator bool() const { return !msgs.empty(); } + + /*! \brief Add together two errors to give a single error with all the msgs */ + EthosnError& operator+=(const EthosnError& other) { + msgs.insert(msgs.end(), other.msgs.begin(), other.msgs.end()); + return *this; + } + + /*! \brief The error messages */ + Array msgs; +}; + +/*! + * \brief Functions to interact with Support Library's API including the + * translation of Relay ops/composite functions into Support Library + * equivalents. + */ +class EthosnAPI { + public: + /*! \brief Extract the Support Library concatenate params from a Relay qnn.concatenate call */ + static EthosnError Concatenate(const Expr& expr, ConcatenateParams* params); + /*! \brief Extract the Support Library split params from a Relay split call */ + static EthosnError Split(const Expr& expr, SplitParams* params); + + private: + /*! \brief Convert a TVM tensor shape to a SL tensor shape */ + static EthosnError Tvm2Npu(const Array& shape, sl::TensorShape* npu_shape); + /*! \brief Convert a TVM data type to a SL data type */ + static EthosnError Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_type); + + // Convert an array of IntImmNodes into ValueT + // IndexT type of Array indexing variable + // ValueT type of resulting value + template + static EthosnError AsArray(const Array& arr, std::array* v); + + // Get a T from a constant represented by a NDArray. + template + static EthosnError AsConstant(const Expr& expr, T* out); +}; + +} // namespace ethosn +} // namespace contrib +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_ diff --git a/src/runtime/contrib/ethosn/ethosn_device.cc b/src/runtime/contrib/ethosn/ethosn_device.cc new file mode 100644 index 0000000000000..7e0d43fcf4cb3 --- /dev/null +++ b/src/runtime/contrib/ethosn/ethosn_device.cc @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_device.cc + * \brief Ethos-N NPU device integration. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "ethosn_driver_library/Buffer.hpp" +#include "ethosn_support_library/Support.hpp" + +#if defined ETHOSN_HW + +#include "ethosn_driver_library/Inference.hpp" +#include "ethosn_driver_library/Network.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; +namespace dl = ::ethosn::driver_library; + +bool WaitForInference(dl::Inference* inference, int timeout) { + // Wait for inference to complete + int fd = inference->GetFileDescriptor(); + struct pollfd fds; + memset(&fds, 0, sizeof(fds)); + fds.fd = fd; + fds.events = POLLIN; // Wait for any available input. + + const int ms_per_seconds = 1000; + int poll_result = poll(&fds, 1, timeout * ms_per_seconds); + if (poll_result > 0) { + dl::InferenceResult result; + if (read(fd, &result, sizeof(result)) != sizeof(result)) { + return false; + } + if (result != dl::InferenceResult::Completed) { + return false; + } + } else if (poll_result == 0) { + return false; + } else { + return false; + } + return true; +} + +template +void CopyOutput(dl::Buffer* source_buffers[], std::vector* outputs) { + for (DLTensor* tensor : *outputs) { + dl::Buffer* source_buffer = source_buffers[0]; + uint8_t* source_buffer_data = source_buffer->GetMappedBuffer(); + size_t size = source_buffer->GetSize(); + T* dest_pointer = static_cast(tensor->data); + std::copy_backward(source_buffer_data, source_buffer_data + size, dest_pointer + size); + source_buffers++; + } +} + +void CreateBuffers(std::vector >* fm, + const std::vector& tensors) { + int index = 0; + for (auto buffer : tensors) { + auto* data = static_cast(buffer->data); + // The NPU only needs the size of the tensor * uint8_t. + auto data_size = static_cast(GetDataSize(*buffer)); + (*fm)[index++] = std::make_shared(data, data_size, dl::DataFormat::NHWC); + } +} + +bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network, + const std::vector& input_order, + const std::vector& output_order) { + // Unpack parameters + uint8_t argc = 0; + std::vector inputs(input_order.size()); + for (uint8_t i = 0; i < network->GetInputBufferInfos().size(); i++) { + inputs[input_order[i]] = args[argc++]; + } + auto out_infos = network->GetOutputBufferInfos(); + std::vector outputs(output_order.size()); + for (uint8_t i = 0; i < network->GetOutputBufferInfos().size(); i++) { + outputs[output_order[i]] = args[argc++]; + } + + // Set up input buffers + std::vector > ifm(inputs.size()); + CreateBuffers(&ifm, inputs); + + // Set up output buffers + std::vector > ofm(outputs.size()); + CreateBuffers(&ofm, outputs); + + // Raw pointers for the inference + dl::Buffer* ifm_raw[inputs.size()]; + for (size_t i = 0; i < inputs.size(); i++) { + ifm_raw[i] = ifm[i].get(); + } + dl::Buffer* ofm_raw[outputs.size()]; + for (size_t i = 0; i < outputs.size(); i++) { + ofm_raw[i] = ofm[i].get(); + } + + auto npu = std::make_unique(*network); + + // Execute the inference. + std::unique_ptr result( + npu->ScheduleInference(ifm_raw, sizeof(ifm_raw) / sizeof(ifm_raw[0]), ofm_raw, + sizeof(ofm_raw) / sizeof(ofm_raw[0]))); + bool inferenceCompleted = WaitForInference(result.get(), 60); + if (inferenceCompleted) { + switch ((outputs)[0]->dtype.bits) { + case 8: { + dl::Buffer** ofms = &ofm_raw[0]; + for (DLTensor* tensor : outputs) { + uint8_t* source_buffer_data = (*ofms++)->GetMappedBuffer(); + uint8_t* dest_pointer = static_cast(tensor->data); + if (source_buffer_data != dest_pointer) { + CopyOutput(ofm_raw, &outputs); + break; + } + } + break; + } + case 16: + CopyOutput(ofm_raw, &outputs); + break; + case 32: + CopyOutput(ofm_raw, &outputs); + break; + default: + break; + } + } + + return inferenceCompleted; +} + +} // namespace ethosn +} // namespace runtime +} // namespace tvm + +#else +/* If USE_ETHOSN_HW=OFF, we mock the inference call with a known-good output. + * That output can be set by using relay.ethos-n.test.infra.inference_result + * which will set the values the mocked inference will return the next time + * it's called. + */ + +#include +#include + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; + +std::vector test_outputs; + +TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result") + .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { + test_outputs.clear(); + for (int argc = 1; argc < args.size(); argc++) { + const DLTensor* tensor = args[argc]; + auto shape = std::vector(tensor->shape, tensor->shape + tensor->ndim); + test_outputs.emplace_back(tvm::runtime::NDArray::Empty(shape, tensor->dtype, tensor->ctx)); + test_outputs[test_outputs.size() - 1].CopyFrom(tensor); + } + }); + +// Allow the ethos-n support code to be tested without a device +bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network, + const std::vector& input_order, + const std::vector& output_order) { + std::vector outputs; + for (int argc = network->GetInputBufferInfos().size(); argc < args.size(); argc++) { + outputs.push_back(args[argc]); + } + bool rc = false; + if (test_outputs.size() == outputs.size()) { + for (auto i = 0u; i < outputs.size(); i++) { + test_outputs[i].CopyTo(outputs[i]); + } + rc = true; + } + // Clear after first usage; on-exit destructor of NDArray fails + test_outputs.clear(); + return rc; +} + +} // namespace ethosn +} // namespace runtime +} // namespace tvm + +#endif diff --git a/src/runtime/contrib/ethosn/ethosn_device.h b/src/runtime/contrib/ethosn/ethosn_device.h new file mode 100644 index 0000000000000..d631d242e9931 --- /dev/null +++ b/src/runtime/contrib/ethosn/ethosn_device.h @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_device.h + * \brief Ethos-N NPU device integration. + */ +#ifndef TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_DEVICE_H_ +#define TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_DEVICE_H_ + +#include + +#include "ethosn_support_library/Support.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; + +bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network, + const std::vector& input_order, const std::vector& output_order); + +} // namespace ethosn +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_DEVICE_H_ diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.cc b/src/runtime/contrib/ethosn/ethosn_runtime.cc new file mode 100644 index 0000000000000..0fbebcf16139d --- /dev/null +++ b/src/runtime/contrib/ethosn/ethosn_runtime.cc @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_runtime.cc + * \brief Execution handling of Ethos-N command streams. + */ + +#include "ethosn_runtime.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../../file_util.h" +#include "ethosn_device.h" +#include "ethosn_driver_library/Inference.hpp" +#include "ethosn_driver_library/Network.hpp" +#include "ethosn_support_library/Support.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; +namespace dl = ::ethosn::driver_library; + +EthosnModule::EthosnModule(std::vector* cmms) { + for (auto& it : *cmms) { + network_map_[it.name].name = it.name; + network_map_[it.name].cmm = std::move(it.cmm); + network_map_[it.name].inputs = it.inputs; + network_map_[it.name].outputs = it.outputs; + } +} + +PackedFunc EthosnModule::GetFunction(const std::string& name, + const ObjectPtr& sptr_to_self) { + if (network_map_.find(name) != network_map_.end()) { + return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) { + *rv = Inference(args, network_map_[name].cmm.get(), network_map_[name].inputs, + network_map_[name].outputs); + }); + } else { + return PackedFunc(); + } +} + +void EthosnModule::SaveToBinary(dmlc::Stream* stream) { + stream->Write(network_map_.size()); + for (const auto& it : network_map_) { + stream->Write(it.first); + std::stringstream ss; + it.second.cmm->Serialize(ss); + stream->Write(ss.str()); + stream->Write(it.second.inputs.size()); + stream->Write(&it.second.inputs[0], sizeof(uint32_t) * it.second.inputs.size()); + stream->Write(it.second.outputs.size()); + stream->Write(&it.second.outputs[0], sizeof(uint32_t) * it.second.outputs.size()); + } +} + +Module EthosnModule::LoadFromBinary(void* strm) { + auto stream = static_cast(strm); + size_t func_count; + // Read the number of functions + stream->Read(&func_count); + std::vector cmms; + cmms.resize(func_count); + for (unsigned int i = 0; i < func_count; i++) { + OrderedCompiledNetwork& compiled = cmms[i]; + std::string ext_symbol; + std::string cmm; + uint64_t input_size; + uint64_t output_size; + // Read the symbol name + stream->Read(&compiled.name); + // Read the serialized command stream + stream->Read(&cmm); + std::istringstream cmm_strm(cmm); + compiled.cmm = sl::DeserializeCompiledNetwork(cmm_strm); + // Read the number of inputs + stream->Read(&input_size); + auto size = static_cast(input_size); + compiled.inputs.resize(size); + // Read the order of inputs + stream->Read(&compiled.inputs[0], sizeof(uint32_t) * size); + // Read the number of outputs + stream->Read(&output_size); + size = static_cast(output_size); + compiled.outputs.resize(size); + // Read the order of outputs + stream->Read(&compiled.outputs[0], sizeof(uint32_t) * size); + } + auto n = make_object(&cmms); + return Module(n); +} + +TVM_REGISTER_GLOBAL("runtime.module.loadbinary_ethos-n") + .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = EthosnModule::LoadFromBinary(args[0]); }); +} // namespace ethosn +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/contrib/ethosn/ethosn_runtime.h b/src/runtime/contrib/ethosn/ethosn_runtime.h new file mode 100644 index 0000000000000..730739cbd2302 --- /dev/null +++ b/src/runtime/contrib/ethosn/ethosn_runtime.h @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_runtime.h + * \brief Execution handling of Ethos-N command streams. + */ +#ifndef TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_ +#define TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_ + +#include + +#include +#include +#include +#include +#include + +#include "ethosn_support_library/Support.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; + +struct OrderedCompiledNetwork { + std::unique_ptr cmm; + std::string name; + std::vector inputs; + std::vector outputs; +}; + +class EthosnModule : public ModuleNode { + public: + /*! + * \brief The Ethos-N runtime module. + * \param cmms A vector of compiled networks with input/output orders. + */ + explicit EthosnModule(std::vector* cmms); + + /*! + * \brief Get a PackedFunc from the Ethos-N module. + * \param name The name of the function. + * \param sptr_to_self The ObjectPtr that points to this module node. + * \return The function pointer when it is found, otherwise, PackedFunc(nullptr). + */ + PackedFunc GetFunction(const std::string& name, const ObjectPtr& sptr_to_self) final; + /*! + * \brief Save a compiled network to a binary stream, which can then be + * serialized to disk. + * \param stream The stream to save the binary. + * \note See EthosnModule::LoadFromBinary for the serialization format. + */ + void SaveToBinary(dmlc::Stream* stream) final; + /*! + * \brief Load a compiled network from stream. + * \param strm The binary stream to load. + * \return The created Ethos-N module. + * \note The serialization format is: + * + * size_t : number of functions + * [ + * std::string : name of function (symbol) + * std::string : serialized command stream + * size_t : number of inputs + * std::vector : order of inputs + * size_t : number of outputs + * std::vector : order of outputs + * ] * number of functions + */ + static Module LoadFromBinary(void* strm); + + const char* type_key() const override { return "ethos-n"; } + + private: + /*! \brief A map between ext_symbols (function names) and ordered compiled networks. */ + std::map network_map_; +}; + +} // namespace ethosn +} // namespace runtime +} // namespace tvm +#endif // TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_ diff --git a/tests/python/contrib/test_ethosn/__init__.py b/tests/python/contrib/test_ethosn/__init__.py new file mode 100644 index 0000000000000..deba5e5eb494d --- /dev/null +++ b/tests/python/contrib/test_ethosn/__init__.py @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Infrastructure and tests for EthosN""" + diff --git a/tests/python/contrib/test_ethosn/_infrastructure.py b/tests/python/contrib/test_ethosn/_infrastructure.py new file mode 100644 index 0000000000000..a71ab3dbc663a --- /dev/null +++ b/tests/python/contrib/test_ethosn/_infrastructure.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Expose test functions to the Python front end""" + +import tvm._ffi + +tvm._ffi._init_api("relay.ethos-n.test.infra", __name__) diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py new file mode 100644 index 0000000000000..c6278334cfecb --- /dev/null +++ b/tests/python/contrib/test_ethosn/infrastructure.py @@ -0,0 +1,175 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Expose Ethos test functions to the Python front end""" + +from __future__ import absolute_import, print_function +import tvm +from tvm import relay +from tvm.contrib import util, graph_runtime, download +from tvm.relay.testing import run_opt_pass +from enum import Enum +from hashlib import md5 +from itertools import zip_longest, combinations +import numpy as np +from PIL import Image +import os + +from . import _infrastructure +from tvm.relay.op.contrib import get_pattern_table + + +def make_module(func, params): + func = relay.Function(relay.analysis.free_vars(func), func) + if params: + relay.build_module.bind_params_by_name(func, params) + return tvm.IRModule.from_expr(func) + + +def make_ethosn_composite(ethosn_expr, name): + vars = relay.analysis.free_vars(ethosn_expr) + func = relay.Function([relay.Var("a")], ethosn_expr) + func = func.with_attr("Composite", name) + call = relay.Call(func, vars) + return call + + +def make_ethosn_partition(ethosn_expr): + # Create an Ethos-N global function + mod = tvm.IRModule({}) + vars = relay.analysis.free_vars(ethosn_expr) + func = relay.Function(vars, ethosn_expr) + func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Compiler", "ethos-n") + func = func.with_attr("global_symbol", "ethos-n_0") + g1 = relay.GlobalVar("ethos-n_0") + mod[g1] = func + + # These are the vars to call the Ethos-N partition with + more_vars = relay.analysis.free_vars(ethosn_expr) + # Call the Ethos-N partition in main + call_fn1 = g1(*more_vars) + mod["main"] = relay.Function(more_vars, call_fn1) + return mod + + +def get_host_op_count(mod): + class Counter(tvm.relay.ExprVisitor): + def __init__(self): + super().__init__() + self.count = 0 + + def visit_call(self, call): + if isinstance(call.op, tvm.ir.Op): + self.count += 1 + super().visit_call(call) + + c = Counter() + c.visit(mod["main"]) + return c.count + + +def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1): + relay.backend.compile_engine.get().clear() + with tvm.transform.PassContext(opt_level=3, config={ + "relay.ext.ethos-n.options": {"variant": 0} + }): + with tvm.target.create("llvm"): + if npu: + f = relay.build_module.bind_params_by_name(mod["main"], params) + mod = tvm.IRModule() + mod["main"] = f + mod = relay.transform.AnnotateTarget("ethos-n")(mod) + mod = relay.transform.MergeCompilerRegions()(mod) + mod = relay.transform.PartitionGraph()(mod) + host_op_count = get_host_op_count(mod) + assert host_op_count == expected_host_ops, \ + "Got {} host operators, expected {}".format(host_op_count, expected_host_ops) + partition_count = 0 + for global_var in mod.get_global_vars(): + if "ethos-n" in global_var.name_hint: + partition_count += 1 + + assert npu_partitions == partition_count, \ + "Got {} ethos-n partitions, expected {}".format(partition_count, npu_partitions) + + return relay.build(mod, params=params) + + +def run(graph, lib, params, inputs, outputs, npu=True): + # Export and load lib to confirm this works + lib_name = "mod.so" + temp = util.tempdir() + lib_path = temp.relpath(lib_name) + lib.export_library(lib_path) + lib = tvm.runtime.load_module(lib_path) + module = graph_runtime.create(graph, lib, tvm.cpu()) + module.set_input(**inputs) + module.set_input(**params) + module.run() + out = [module.get_output(i) for i in range(outputs)] + if not npu: + inference_result(0, out) + return out + + +def build_and_run(mod, inputs, outputs, params, ctx=tvm.cpu(), npu=True, expected_host_ops=0, npu_partitions=1): + graph, lib, params = build(mod, params, npu, expected_host_ops, npu_partitions) + return run(graph, lib, params, inputs, outputs, npu) + + +def verify(answers, atol, rtol=1e-07, verify_saturation=True): + """Compare the array of answers. Each entry is a list of outputs""" + if len(answers) < 2: + print("No results to compare: expected at least two, found ", + len(answers)) + for answer in zip_longest(*answers): + for outs in combinations(answer, 2): + if verify_saturation: + assert np.count_nonzero(outs[0].asnumpy() == 255) < 0.25 * outs[0].asnumpy().size, \ + "Output is saturated: {}".format(outs[0]) + assert np.count_nonzero(outs[0].asnumpy() == 0) < 0.25 * outs[0].asnumpy().size, \ + "Output is saturated: {}".format(outs[0]) + tvm.testing.assert_allclose( + outs[0].asnumpy(), outs[1].asnumpy(), rtol=rtol, atol=atol + ) + + +def inference_result(checksum, outputs): + """Set the expected results of an Ethos inference, if the testing + infrastructure is available. This assumes that the entire graph + was offloaded to the neural processor.""" + if tvm.get_global_func( + "relay.ethos-n.test.infra.inference_result", True): + return _infrastructure.inference_result(checksum, *outputs) + return False + + +def test_error(mod, params, err_msg): + caught = None + with tvm.transform.PassContext(opt_level=3): + with tvm.target.create("llvm"): + try: + relay.build(mod, params) + except tvm.error.TVMError as e: + caught = e.args[0] + finally: + relay.backend.compile_engine.get().clear() + + assert caught is not None + assert err_msg in caught, caught diff --git a/tests/python/contrib/test_ethosn/test_concatenate.py b/tests/python/contrib/test_ethosn/test_concatenate.py new file mode 100644 index 0000000000000..cca61d1db6779 --- /dev/null +++ b/tests/python/contrib/test_ethosn/test_concatenate.py @@ -0,0 +1,91 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Concatenate tests for Ethos-N""" + +import numpy as np +import tvm +from tvm import relay +from tvm.relay.op.contrib.ethosn import ethosn_available +from . import infrastructure as tei + + +def _get_inputs(shapes): + inputs = {} + for i, shape in enumerate(shapes): + inputs["in" + str(i)] = tvm.nd.array( + np.random.randint(0, high=256, size=shape, dtype="uint8") + ) + + return inputs + + +def _get_model(shapes, dtype, axis): + tup = [] + for i, shape in enumerate(shapes): + a = relay.var("in" + str(i), shape=shape, dtype=dtype) + tup.append(a) + + zeroi = relay.const(1, "int32") + zerof = relay.const(0.5, "float32") + con = relay.qnn.op.concatenate(tup, + input_scales=[zerof]*len(shapes), + input_zero_points=[zeroi]*len(shapes), + output_scale=zerof, + output_zero_point=zeroi, + axis=axis) + return con + + +def test_concatenate(): + if not ethosn_available(): + return + + trials = [ + ([(1, 4), (1, 6)], 1), + ([(1, 16, 4), (1, 16, 4)], 1), + ([(1, 25, 4, 16)]*3, 3), + ([(1, 25, 4, 16), (1, 25, 5, 16), (1, 25, 6, 16)], 2), + ] + + for shapes, axis in trials: + outputs = [] + inputs = _get_inputs(shapes) + for npu in [False, True]: + model = _get_model(shapes, "uint8", axis) + mod = tei.make_module(model, {}) + outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu)) + + tei.verify(outputs, 0) + + +def test_concatenate_failure(): + if not ethosn_available(): + return + + trials = [ + ([(1, 4, 4, 4, 4), (1, 4, 4, 4, 4)], "uint8", 1, "dimensions=5, dimensions must be <= 4;"), + ([(1, 4, 4, 4), (1, 4, 4, 4)], "uint8", 3, "Concatenation along the channels dimension (axis 3) requires input tensors with a multiple of 16 channels;"), + ([(1, 4, 4, 4), (1, 4, 4, 4)], "int8", 2, "dtype='int8', dtype must be either uint8 or int32; dtype='int8', dtype must be either uint8 or int32;"), + ([(2, 4, 4, 4), (2, 4, 4, 4)], "uint8", 2, "batch size=2, batch size must = 1; batch size=2, batch size must = 1;"), + ([(1, 4, 4, 4), (1, 4, 4, 4)], "uint8", 0, "Concatenation cannot be performed along batch axis (axis 0);"), + ] + + for shapes, dtype, axis, err_msg in trials: + model = _get_model(shapes, dtype, axis) + mod = tei.make_ethosn_partition(model) + tei.test_error(mod, {}, err_msg) diff --git a/tests/python/contrib/test_ethosn/test_split.py b/tests/python/contrib/test_ethosn/test_split.py new file mode 100644 index 0000000000000..d5ff9bf0831f8 --- /dev/null +++ b/tests/python/contrib/test_ethosn/test_split.py @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Split tests for Ethos-N""" + +import numpy as np +import tvm +from tvm import relay +from tvm.relay.op.contrib.ethosn import ethosn_available +from . import infrastructure as tei + + +def _get_model(shape, dtype, splits, axis): + a = relay.var("a", shape=shape, dtype=dtype) + split = relay.op.split(a, indices_or_sections=splits, axis=axis) + return split.astuple() + + +def test_split(): + if not ethosn_available(): + return + + trials = [ + ((1, 16, 16, 32), (2, 7, 10), 2), + ((1, 12, 8, 16), 3, 1), + ((1, 33), 11, 1), + ] + + np.random.seed(0) + for shape, splits, axis in trials: + outputs = [] + inputs = {"a": tvm.nd.array(np.random.randint(0, high=256, size=shape, dtype="uint8"))} + for npu in [False, True]: + model = _get_model(shape, "uint8", splits, axis) + mod = tei.make_module(model, {}) + output_count = splits if type(splits) == int else len(splits) + 1 + outputs.append(tei.build_and_run(mod, inputs, output_count, {}, npu=npu)) + + tei.verify(outputs, 0) + + +def test_split_failure(): + if not ethosn_available(): + return + + trials = [ + ((1, 4, 4, 4, 4), "uint8", 4, 2, "dimensions=5, dimensions must be <= 4;"), + ((1, 4, 4, 4), "int8", 4, 2, "dtype='int8', dtype must be either uint8 or int32;"), + ((2, 4, 4, 4), "uint8", 4, 2, "batch size=2, batch size must = 1;"), + ((1, 4, 4, 4), "uint8", 1, 0, "Split cannot be performed along batch axis (axis 0);"), + ((1, 4, 4, 4), "uint8", 4, 3, "Split along the channels dimension (axis 3) requires all output sizes (specified in splitInfo.m_Sizes) to be multiples of 16;"), + ] + + for shape, dtype, splits, axis, err_msg in trials: + model = _get_model(shape, dtype, splits, axis) + mod = tei.make_ethosn_partition(model) + tei.test_error(mod, {}, err_msg) diff --git a/tests/python/contrib/test_ethosn/test_topologies.py b/tests/python/contrib/test_ethosn/test_topologies.py new file mode 100644 index 0000000000000..942186d57e60a --- /dev/null +++ b/tests/python/contrib/test_ethosn/test_topologies.py @@ -0,0 +1,123 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Ethos-N tests for complex network topologies.""" + +import numpy as np +import tvm +from tvm import relay +from tvm.relay.op.contrib.ethosn import ethosn_available +from . import infrastructure as tei + + +def test_split_with_asym_concats(): + if not ethosn_available(): + return + + def get_model(shape, splits, axis): + a = relay.var("a", shape=shape, dtype="uint8") + split = relay.op.split(a, indices_or_sections=splits, axis=axis) + zeroi = relay.const(1, "int32") + zerof = relay.const(0.5, "float32") + con1 = relay.qnn.op.concatenate([split[0], split[1]], + input_scales=[zerof]*2, + input_zero_points=[zeroi]*2, + output_scale=zerof, + output_zero_point=zeroi, + axis=axis) + con2 = relay.qnn.op.concatenate([split[2], split[3]], + input_scales=[zerof]*2, + input_zero_points=[zeroi]*2, + output_scale=zerof, + output_zero_point=zeroi, + axis=axis) + return relay.Tuple((con2, con1)) + + trials = [ + ((1, 16, 16, 32), (2, 7, 10), 2), + ] + + np.random.seed(0) + for shape, splits, axis in trials: + outputs = [] + inputs = {"a": tvm.nd.array(np.random.randint(0, high=256, size=shape, dtype="uint8"))} + for npu in [False, True]: + model = get_model(shape, splits, axis) + mod = tei.make_module(model, {}) + outputs.append(tei.build_and_run(mod, inputs, 2, {}, npu=npu)) + + tei.verify(outputs, 0) + + +def test_output_tuple_propagation(): + """This tests the case where the output tuple must be inferred + as having dummy tensor information.""" + if not ethosn_available(): + return + + def get_model(): + a = relay.var("a", shape=(1, 4, 4, 16), dtype="uint8") + split = relay.op.split(a, indices_or_sections=4, axis=2) + return relay.Tuple((split[0], split[1], split[2], split[3])) + + np.random.seed(0) + outputs = [] + inputs = {"a": tvm.nd.array(np.random.randint(0, high=256, size=(1, 4, 4, 16), dtype="uint8"))} + for npu in [False, True]: + model = get_model() + mod = tei.make_module(model, {}) + outputs.append(tei.build_and_run(mod, inputs, 4, {}, npu=npu)) + + tei.verify(outputs, 0) + + +def test_input_tuples(): + if not ethosn_available(): + return + + def get_model(shapes, axis): + tup = [] + for i, shape in enumerate(shapes): + a = relay.var("in" + str(i), shape=shape, dtype="uint8") + tup.append(a) + + zeroi = relay.const(1, "int32") + zerof = relay.const(0.5, "float32") + con = relay.qnn.op.concatenate(tup, + input_scales=[zerof]*len(shapes), + input_zero_points=[zeroi]*len(shapes), + output_scale=zerof, + output_zero_point=zeroi, + axis=axis) + + return con + + np.random.seed(0) + inputs = { + "in0": tvm.nd.array(np.random.randint(0, high=256, size=(1, 4), dtype="uint8")), + "in1": tvm.nd.array(np.random.randint(0, high=256, size=(1, 6), dtype="uint8")), + } + outputs = [] + for npu in [False, True]: + model = get_model([(1, 4), (1, 6)], 1) + if not npu: + mod = tei.make_module(model, {}) + else: + mod = tei.make_ethosn_partition(model) + graph, lib, params = tei.build(mod, {}, npu=False) + outputs.append(tei.run(graph, lib, {}, inputs, 1, npu=npu)) + + tei.verify(outputs, 0) diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh index f36c1d974b7ef..77b28e66fbb77 100755 --- a/tests/scripts/task_config_build_cpu.sh +++ b/tests/scripts/task_config_build_cpu.sh @@ -43,3 +43,5 @@ echo set\(USE_VTA_FSIM ON\) >> config.cmake echo set\(USE_TFLITE ON\) >> config.cmake echo set\(USE_TENSORFLOW_PATH \"/tensorflow\"\) >> config.cmake echo set\(USE_FLATBUFFERS_PATH \"/flatbuffers\"\) >> config.cmake +echo set\(USE_ETHOSN /opt/arm/ethosn-driver\) >> config.cmake +echo set\(USE_ETHOSN_HW OFF\) >> config.cmake \ No newline at end of file diff --git a/tests/scripts/task_python_ethosn_tests.sh b/tests/scripts/task_python_ethosn_tests.sh new file mode 100755 index 0000000000000..36a3d09196506 --- /dev/null +++ b/tests/scripts/task_python_ethosn_tests.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -u +source tests/scripts/setup-pytest-env.sh + + +# Rebuild cython + +find . -type f -path "*.pyc" | xargs rm -f +make cython3 + +TVM_FFI=ctypes python3 -m pytest tests/python/contrib/test_ethosn +