diff --git a/.ci/scripts/setup-samsung-linux-deps.sh b/.ci/scripts/setup-samsung-linux-deps.sh new file mode 100644 index 00000000000..7e6024c584e --- /dev/null +++ b/.ci/scripts/setup-samsung-linux-deps.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -ex + + +download_ai_lite_core() { + API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download" + API_KEY="kn10SoSY3hkC-9Qny5TqD2mnqVrlupv3krnjLeBt5cY" + + VERSION="0.5" + OS_NAME="Ubuntu 22.04" + OUT_FILE="/tmp/exynos-ai-litecore-v${VERSION}.tar.gz" + TARGET_PATH="/tmp/exynos_ai_lite_core" + + mkdir -p ${TARGET_PATH} + # Presigned issue URL + JSON_RESP=$(curl -sS -G \ + --location --fail --retry 3 \ + -H "apikey: ${API_KEY}" \ + --data-urlencode "version=${VERSION}" \ + --data-urlencode "os=${OS_NAME}" \ + "${API_BASE}") + + DOWNLOAD_URL=$(echo "$JSON_RESP" | sed -n 's/.*"data":[[:space:]]*"\([^"]*\)".*/\1/p') + + if [[ -z "$DOWNLOAD_URL" ]]; then + echo "Failed to extract download URL" + echo "$JSON_RESP" + exit 1 + fi + + # Download LiteCore + curl -sS -L --fail --retry 3 \ + --output "$OUT_FILE" \ + "$DOWNLOAD_URL" + + echo "Download done: $OUT_FILE" + + + tar -C "${TARGET_PATH}" --strip-components=1 -xzvf "${OUT_FILE}" + + export EXYNOS_AI_LITECORE_ROOT=${TARGET_PATH} + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${EXYNOS_AI_LITECORE_ROOT}/lib/x86_64-linux +} + +install_enn_backend() { + NDK_INSTALLATION_DIR=/opt/ndk + rm -rf "${NDK_INSTALLATION_DIR}" && sudo mkdir -p "${NDK_INSTALLATION_DIR}" + ANDROID_NDK_VERSION=r27b + + pushd . + cd /tmp + curl -Os --retry 3 "https://ossci-android.s3.amazonaws.com/android-ndk-${ANDROID_NDK_VERSION}-linux.zip" + unzip -qo "android-ndk-${ANDROID_NDK_VERSION}-linux.zip" + + # Print the content for manual verification + ls -lah "android-ndk-${ANDROID_NDK_VERSION}" + sudo mv "android-ndk-${ANDROID_NDK_VERSION}"/* "${NDK_INSTALLATION_DIR}" + popd + # build Exynos backend + export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk} + bash backends/samsung/build.sh --build all + # set env variable + export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)" + export PYTHONPATH=${PYTHONPATH:-}:${EXECUTORCH_ROOT}/.. +} + +AI_LITE_CORE_VERSION=0.5.0 + +download_ai_lite_core ${AI_LITE_CORE_VERSION} +install_enn_backend diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 37c6623ca97..fd5ded7e89f 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -866,6 +866,41 @@ jobs: PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh mobilenetv2 + test-samsung-models-linux: + name: test-samsung-models-linux + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + permissions: + id-token: write + contents: read + with: + runner: linux.2xlarge + docker-image: ci-image:executorch-ubuntu-22.04-gcc9 + submodules: 'recursive' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 90 + script: | + set -ex + + # The generic Linux job chooses to use base env, not the one setup by the image + CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + conda activate "${CONDA_ENV}" + + # Setup python + PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake" + + # Setup Samsung SDK (AI Lite Core) and install enn backend + source .ci/scripts/setup-samsung-linux-deps.sh + + # Test models serially + models="mv2 ic3 resnet18 resnet50" + for model in $models; do + python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955 + done + + # Test ops + python -m unittest discover -s backends/samsung/test/ops -p "test_*.py" + + test-vulkan-models-linux: name: test-vulkan-models-linux uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main diff --git a/CMakeLists.txt b/CMakeLists.txt index 92aa1e3b194..45c41640e89 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -571,6 +571,11 @@ if(EXECUTORCH_BUILD_QNN) list(APPEND _executorch_backends qnn_executorch_backend) endif() +if(EXECUTORCH_BUILD_ENN) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/samsung) + list(APPEND _executorch_backends enn_backend) +endif() + if(EXECUTORCH_BUILD_XNNPACK) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack) list(APPEND _executorch_backends xnnpack_backend) @@ -817,6 +822,10 @@ if(EXECUTORCH_BUILD_PYBIND) list(APPEND _dep_libs qnn_executorch_backend) endif() + if(EXECUTORCH_BUILD_ENN) + list(APPEND _dep_libs enn_backend) + endif() + if(EXECUTORCH_BUILD_XNNPACK) # need to explicitly specify XNNPACK and xnnpack-microkernels-prod here # otherwise uses XNNPACK and microkernel-prod symbols from libtorch_cpu diff --git a/LICENSE b/LICENSE index fdd029058a6..f20b198d808 100644 --- a/LICENSE +++ b/LICENSE @@ -8,6 +8,7 @@ Copyright (c) Qualcomm Innovation Center, Inc. Copyright (c) 2023 Apple Inc. Copyright (c) 2024 MediaTek Inc. Copyright 2023 NXP +Copyright (c) 2025 Samsung Electronics Co. LTD Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/backends/samsung/CMakeLists.txt b/backends/samsung/CMakeLists.txt new file mode 100644 index 00000000000..fff3ece5239 --- /dev/null +++ b/backends/samsung/CMakeLists.txt @@ -0,0 +1,168 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +cmake_minimum_required(VERSION 3.15) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +get_filename_component( + EXECUTORCH_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE +) + +if(NOT DEFINED EXYNOS_AI_LITECORE_ROOT) + message( + FATAL_ERROR + "Please define EXYNOS_AI_LIRECORE_PATH by adding cmake parameter -DEXYNOS_AI_LITECORE_ROOT=<...>" + ) +endif() +if(CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$") + message(FATAL_ERROR "IOS is not supported on Exynos.") +endif() + +if(NOT FLATC_EXECUTABLE) + set(FLATC_EXECUTABLE flatc) +endif() + +add_compile_options(-Wall -Werror -fPIC) +if(CMAKE_BUILD_TYPE STREQUAL "Release") + # strip symbols + add_link_options("-s") + # hide dynamic symbols + set(CMAKE_C_VISIBILITY_PRESET hidden) + set(CMAKE_CXX_VISIBILITY_PRESET hidden) + add_definitions(-DNDEBUG) +endif() + +include_directories( + ${EXECUTORCH_SOURCE_DIR}/.. + ${EXECUTORCH_SOURCE_DIR}/runtime/core/portable_type/c10 + ${EXYNOS_AI_LITECORE_ROOT} +) +add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS) + +if(${ANDROID}) + find_library(android_log log) +endif() + +# add logging library +add_library(enn_logging STATIC) + +if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64") + add_subdirectory( + ${EXECUTORCH_SOURCE_DIR}/third-party/pybind11 + ${CMAKE_CURRENT_BINARY_DIR}/pybind11 + ) + add_library(PyEnnWrapperAdaptor MODULE) + + find_library( + GG_API_LIB + NAMES graphgen_api + HINTS ${EXYNOS_AI_LITECORE_ROOT}/lib/x86_64-linux + ) + add_library(graphgen_api SHARED IMPORTED GLOBAL) + set_target_properties( + graphgen_api + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${EXYNOS_AI_LITECORE_ROOT}/include" IMPORTED_LOCATION + "${GG_API_LIB}" + ) + + set(_enn_compile_options_schema + ${CMAKE_CURRENT_SOURCE_DIR}/serialization/compile_options_def.fbs + ) + + set(_enn_schema_generate_dir + "${CMAKE_BINARY_DIR}/schema/include/executorch/backends/samsung" + ) + # Paths to headers generated from the .fbs files. + string(REGEX REPLACE "serialization/([^/]+)[.]fbs$" "\\1_generated.h" + generated_header "${fbs_file}" + ) + set(_enn_schema_output "${_enn_schema_generate_dir}/${generated_header}") + + # Generate the headers from the .fbs files. + add_custom_command( + OUTPUT ${_enn_schema_output} + COMMAND ${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --scoped-enums -o + "${_enn_schema_generate_dir}" ${_enn_compile_options_schema} + DEPENDS ${_enn_compile_options_schema} + WORKING_DIRECTORY ${EXECUTORCH_SOURCE_DIR} + COMMENT "Generating enn compile options headers" + VERBATIM + ) + add_custom_target( + enn_compile_options_output ALL DEPENDS ${_enn_schema_output} + ) + + set_target_properties( + PyEnnWrapperAdaptor PROPERTIES CXX_VISIBILITY_PRESET hidden + ) + target_link_libraries( + PyEnnWrapperAdaptor PRIVATE pybind11::module pybind11::lto graphgen_api + enn_logging + ) + target_include_directories( + PyEnnWrapperAdaptor BEFORE + PRIVATE ${CMAKE_BINARY_DIR}/schema/include + ${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include + ) + add_dependencies(PyEnnWrapperAdaptor enn_compile_options_output) + pybind11_extension(PyEnnWrapperAdaptor) + + # PyGraphWrapperAdaptor + add_library(PyGraphWrapperAdaptor MODULE) + # + find_library( + GRAPH_WRAPPER_LIB + NAMES graph_wrapper + HINTS ${EXYNOS_AI_LITECORE_ROOT}/lib/x86_64-linux + ) + add_library(graph_wrapper SHARED IMPORTED GLOBAL) + set_target_properties( + graph_wrapper + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES + "${EXYNOS_AI_LITECORE_ROOT}/include" IMPORTED_LOCATION + "${GRAPH_WRAPPER_LIB}" + ) + set_target_properties( + PyGraphWrapperAdaptor PROPERTIES CXX_VISIBILITY_PRESET hidden + ) + target_link_libraries( + PyGraphWrapperAdaptor PRIVATE pybind11::module pybind11::lto graph_wrapper + enn_logging + ) + pybind11_extension(PyGraphWrapperAdaptor) + + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/aot) +endif() + +if(${ANDROID}) + target_link_libraries(enn_logging PRIVATE ${android_log}) + add_library(enn_backend STATIC) + target_link_libraries(enn_backend PRIVATE enn_logging) + executorch_target_link_options_shared_lib(enn_backend) + target_compile_options(enn_backend PRIVATE -Wno-deprecated-declarations) + + set(__enn_executor_runner_srcs + ${EXECUTORCH_SOURCE_DIR}/examples/samsung/executor_runner/enn_executor_runner.cpp + ) + add_executable(enn_executor_runner ${__enn_executor_runner_srcs}) + add_dependencies(enn_executor_runner enn_backend) + target_link_libraries( + enn_executor_runner PRIVATE enn_logging enn_backend gflags executorch + extension_data_loader portable_ops_lib + ) + set_target_properties( + enn_executor_runner PROPERTIES CXX_VISIBILITY_PRESET hidden + ) + install( + TARGETS enn_backend enn_logging + EXPORT ExecuTorchTargets + DESTINATION lib + ) +endif() + +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/runtime) diff --git a/backends/samsung/README.md b/backends/samsung/README.md new file mode 100644 index 00000000000..fcb7ef84b75 --- /dev/null +++ b/backends/samsung/README.md @@ -0,0 +1,72 @@ +# ExecuTorch Samsung Exynos Delegate + +The subtree contains Exynos delegate implementation for ExecuTorch. The target of delegation +is deploying torch model to run with exynos NPU/DSP. + +This backend is implemented on the top of [EXYNOS_LITECORE](https://soc-developer.semiconductor.samsung.com/global/development/ai-litecore) +Please prepare the SDK before you start, it is important to code compilation and runtime. + +## Delegate Options + +### Supported Chipset +- Exynos 2500 (E9955) + +### Supported Inference Type +- Quantized (i8/u8/i16/u16) +- FP16 + +## Directory Structure + +``` +backends/samsung +├── aot # Codes for generating binary buffer for ENN runtime. +├── builders # Codes for lowering each operators. +├── partition # ENN Partitioner. +├── passes # Various passes helping lower models to ENN backend. +├── python # Places to put pybind artifacts for accessing samsung libraries. +├── runtime # ENN runtime for executing lowered models. +├── scripts # Misc supporting scripts, not related to core functionality. +└── serialization # Codes for building Graph IR for Exynos and serializing. + +examples +└── samsung # Examples to run ENN backends. +``` + +## How to build +Please download Exynos AI LiteCore, and set the root path of SDK directory to `EXYNOS_AI_LITECORE_ROOT`.
+Please navigate to [Android NDK](https://developer.android.com/ndk) and download a version of NDK. +`ANDROID_NDK` refers the root path of NDK directory.
+ +### Set up environment variables +```bash +export LD_LIBRARY_PATH=${EXYNOS_AI_LITECORE_ROOT}/lib/x86_64-linux/ +``` + +### Build AOT Targets +Generates python artifacts that allow user call `Compile` interface to lower a model to Exynos backend in python script. +```bash +./backends/samsung/build.sh -b x86_64 +``` + +### Build ENN Executor Runner +```bash +./backends/samsung/build.sh -b android --ndk ${ANDROID_NDK} +``` +ANDROID_ABI=arm64-v8a is default, necessary runtime executable generated in `build_exynos_android` directory. + +### Build Anroid Extension +This is later exposed Java app. Please turn on CMake option `EXECUTORCH_BUILD_ENN`, and ENN runtime will be added. +```bash +cmake extension/android \ + -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ + -DANDROID_ABI="${ANDROID_ABI}" \ + -DCMAKE_INSTALL_PREFIX=cmake-android-out \ + -Bcmake-android-out/extension/android + +cmake --build cmake-android-out/extension/android -j8 +``` + +## Examples +python -m executorch.examples.samsung.aot_compiler --chipset e9955 -m ic3 + +Please see this [README.md](../../examples/samsung/README.md). diff --git a/backends/samsung/aot/CMakeLists.txt b/backends/samsung/aot/CMakeLists.txt new file mode 100644 index 00000000000..1a4e6286f97 --- /dev/null +++ b/backends/samsung/aot/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +target_sources( + PyEnnWrapperAdaptor PUBLIC PyEnnWrapperAdaptor.cpp PyEnnWrapperAdaptor.h +) +target_sources( + PyGraphWrapperAdaptor + PUBLIC PyGraphWrapperAdaptor.cpp PyGraphWrapperAdaptor.h + wrappers/op_param_wrapper.h wrappers/op_wrapper.h + wrappers/tensor_wrapper.h +) diff --git a/backends/samsung/aot/PyEnnWrapperAdaptor.cpp b/backends/samsung/aot/PyEnnWrapperAdaptor.cpp new file mode 100644 index 00000000000..a44e47ac40b --- /dev/null +++ b/backends/samsung/aot/PyEnnWrapperAdaptor.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ + +#include +#include + +namespace torch { +namespace executor { +namespace enn { +PYBIND11_MODULE(PyEnnWrapperAdaptor, m) { + pybind11::class_>(m, "EnnWrapper") + .def(pybind11::init()) + .def("Init", &PyEnnWrapper::Init) + .def("IsNodeSupportedByBackend", &PyEnnWrapper::IsNodeSupportedByBackend) + .def( + "Compile", + &PyEnnWrapper::Compile, + "Ahead of time compilation for serialized graph.") + .def("Destroy", &PyEnnWrapper::Destroy, "Release resources."); +} +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/aot/PyEnnWrapperAdaptor.h b/backends/samsung/aot/PyEnnWrapperAdaptor.h new file mode 100644 index 00000000000..953ec050174 --- /dev/null +++ b/backends/samsung/aot/PyEnnWrapperAdaptor.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace py = pybind11; + +namespace torch { +namespace executor { +namespace enn { + +class PyEnnWrapper { + public: + PyEnnWrapper() {} + + void Init(const py::bytes& compile_opts) { + graphgen_instance_ = graphgen_create(); + option_buf_ = enn_option::GetEnnExecuTorchOptions( + compile_opts.cast().data()); + } + + bool IsNodeSupportedByBackend() { + return false; + } + + py::array_t Compile(const py::array_t& model_buffer) { + if (graphgen_instance_ == nullptr) { + ENN_LOG_ERROR("Please call `Init()` first before compile."); + return py::array_t(); + } + auto soc_name = option_buf_->chipset(); + if (graphgen_initialize_context(graphgen_instance_, soc_name) != + GraphGenResult::SUCCESS) { + ENN_LOG_ERROR( + "Unsupported Soc (%d), please check your chipset version.", soc_name); + return py::array_t(); + } + + auto m_buf_info = model_buffer.request(); + auto* model_buf_ptr = reinterpret_cast(m_buf_info.ptr); + NNCBuffer* nnc_buffer = nullptr; + if (graphgen_generate( + graphgen_instance_, model_buf_ptr, m_buf_info.size, &nnc_buffer) != + GraphGenResult::SUCCESS) { + ENN_LOG_ERROR("Compile model failed."); + return py::array_t(); + } + + auto result = py::array_t({nnc_buffer->size}, {sizeof(char)}); + auto result_buf = result.request(); + memcpy(result_buf.ptr, nnc_buffer->addr, nnc_buffer->size); + + graphgen_release_buffer(graphgen_instance_, nnc_buffer); + + return result; + } + + void Destroy() { + graphgen_release(graphgen_instance_); + graphgen_instance_ = nullptr; + } + + ~PyEnnWrapper() { + Destroy(); + } + + private: + // pointer to enn software entry + void* graphgen_instance_ = nullptr; + // enn compilation option buf + const enn_option::EnnExecuTorchOptions* option_buf_ = nullptr; +}; +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/aot/PyGraphWrapperAdaptor.cpp b/backends/samsung/aot/PyGraphWrapperAdaptor.cpp new file mode 100644 index 00000000000..bfde8cfe9bf --- /dev/null +++ b/backends/samsung/aot/PyGraphWrapperAdaptor.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ + +#include "PyGraphWrapperAdaptor.h" + +namespace torch { +namespace executor { +namespace enn { + +PYBIND11_MODULE(PyGraphWrapperAdaptor, m) { + pybind11::class_>( + m, "OpParamWrapper") + .def(pybind11::init()) + .def("SetStringValue", &OpParamWrapper::SetStringValue) + .def("SetScalarValue", &OpParamWrapper::SetScalarValue) + .def("SetScalarValue", &OpParamWrapper::SetScalarValue) + .def("SetScalarValue", &OpParamWrapper::SetScalarValue) + .def("SetScalarValue", &OpParamWrapper::SetScalarValue) + .def("SetScalarValue", &OpParamWrapper::SetScalarValue) + .def("SetScalarValue", &OpParamWrapper::SetScalarValue) + .def("SetScalarValue", &OpParamWrapper::SetScalarValue) + .def("SetVectorValue", &OpParamWrapper::SetVectorValue) + .def("SetVectorValue", &OpParamWrapper::SetVectorValue) + .def("SetVectorValue", &OpParamWrapper::SetVectorValue) + .def("SetVectorValue", &OpParamWrapper::SetVectorValue) + .def("SetVectorValue", &OpParamWrapper::SetVectorValue) + .def("SetVectorValue", &OpParamWrapper::SetVectorValue); + + pybind11::class_>( + m, "PyEnnTensorWrapper") + .def(pybind11::init< + std::string, + const std::vector&, + std::string, + std::string>()) + .def( + "AddQuantizeParam", + &EnnTensorWrapper::AddQuantizeParam, + "Add quantize parameter.") + .def( + "AddData", + &EnnTensorWrapper::AddData, + "Add data for constant tensor."); + + pybind11::class_>( + m, "PyEnnOpWrapper") + .def(pybind11::init< + std::string, + std::string, + const std::vector&, + const std::vector&>()) + .def( + "AddOpParam", + &EnnOpWrapper::AddOpParam, + "Add parameter for current op."); + + pybind11::class_>( + m, "PyEnnGraphWrapper") + .def(pybind11::init()) + .def("Init", &PyEnnGraphWrapper::Init, "Initialize Graph Wrapper.") + .def( + "DefineTensor", + &PyEnnGraphWrapper::DefineTensor, + "Define a tensor in graph.") + .def( + "DefineOpNode", + &PyEnnGraphWrapper::DefineOpNode, + "Define a op node in graph.") + .def( + "SetGraphInputTensors", + &PyEnnGraphWrapper::SetGraphInputTensors, + "Set inputs for Graph") + .def( + "SetGraphOutputTensors", + &PyEnnGraphWrapper::SetGraphOutputTensors, + "Set outputs for Graph") + .def( + "FinishBuild", + &PyEnnGraphWrapper::FinishBuild, + "Finish to build the graph.") + .def("Serialize", &PyEnnGraphWrapper::Serialize, "Serialize the graph."); +} + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/aot/PyGraphWrapperAdaptor.h b/backends/samsung/aot/PyGraphWrapperAdaptor.h new file mode 100644 index 00000000000..baa082f4d69 --- /dev/null +++ b/backends/samsung/aot/PyGraphWrapperAdaptor.h @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "wrappers/op_param_wrapper.h" +#include "wrappers/op_wrapper.h" +#include "wrappers/tensor_wrapper.h" + +namespace py = pybind11; + +namespace torch { +namespace executor { +namespace enn { + +class PyEnnGraphWrapper { + public: + PyEnnGraphWrapper() {} + + void Init() { + graph_wrapper_ = create_graph(""); + } + + TENSOR_ID_T DefineTensor(std::shared_ptr tensor) const { + TENSOR_ID_T tensor_id; + auto result = define_tensor( + graph_wrapper_, + &tensor_id, + tensor->GetName().c_str(), + tensor->GetShape().data(), + tensor->GetShape().size(), + tensor->GetDataType().c_str(), + tensor->GetLayout().c_str()); + if (result != GraphWrapperReturn::SUCCESS) { + throw std::runtime_error("fail in define tensor"); + } + + if (tensor->HasConstantData()) { + auto set_data_result = set_data_for_constant_tensor( + graph_wrapper_, + tensor_id, + tensor->GetDataRawPtr(), + tensor->GetDataBytes()); + if (set_data_result != GraphWrapperReturn::SUCCESS) { + throw std::runtime_error("fail in define tensor"); + } + } + + auto* quantize_param = tensor->GetQuantizeParam(); + if (quantize_param != nullptr) { + auto set_qparam_result = set_quantize_param_for_tensor( + graph_wrapper_, + tensor_id, + quantize_param->GetQuantizeType().c_str(), + quantize_param->GetScales(), + quantize_param->GetZeroPoints()); + if (set_qparam_result != GraphWrapperReturn::SUCCESS) { + throw std::runtime_error("fail in define tensor"); + } + } + + return tensor_id; + } + + NODE_ID_T DefineOpNode(std::shared_ptr op) const { + NODE_ID_T op_id; + + auto result = define_op_node( + graph_wrapper_, + &op_id, + op->GetName().c_str(), + op->GetType().c_str(), + op->GetInputs().data(), + op->GetInputs().size(), + op->GetOutputs().data(), + op->GetOutputs().size()); + if (result != GraphWrapperReturn::SUCCESS) { + throw std::runtime_error("fail in define op"); + } + + for (const auto& param : op->GetParams()) { + add_op_parameter( + graph_wrapper_, op_id, param->getKeyName().c_str(), param->Dump()); + } + + return op_id; + } + + void SetGraphInputTensors(const std::vector& tensors) const { + auto result = + set_graph_input_tensors(graph_wrapper_, tensors.data(), tensors.size()); + if (result != GraphWrapperReturn::SUCCESS) { + throw std::runtime_error("fail in set graph inputs"); + } + } + + void SetGraphOutputTensors(const std::vector& tensors) const { + auto result = set_graph_output_tensors( + graph_wrapper_, tensors.data(), tensors.size()); + if (result != GraphWrapperReturn::SUCCESS) { + throw std::runtime_error("fail in set graph outputs"); + } + } + + void FinishBuild() const { + auto result = finish_build_graph(graph_wrapper_); + + if (result != GraphWrapperReturn::SUCCESS) { + throw std::runtime_error("fail to build graph"); + } + } + + py::array_t Serialize() { + uint64_t nbytes = 0; + uint8_t* addr = nullptr; + auto result = serialize(graph_wrapper_, &addr, &nbytes); + + if (result != GraphWrapperReturn::SUCCESS || addr == nullptr || + nbytes == 0) { + throw std::runtime_error("fail to serialize"); + } + + auto serial_buf = py::array_t(nbytes); + auto serial_buf_block = serial_buf.request(); + char* serial_buf_ptr = (char*)serial_buf_block.ptr; + std::memcpy(serial_buf_ptr, addr, nbytes); + + return serial_buf; + } + + ~PyEnnGraphWrapper() { + release_graph(graph_wrapper_); + } + + private: + GraphHandler graph_wrapper_; +}; + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/aot/wrappers/op_param_wrapper.h b/backends/samsung/aot/wrappers/op_param_wrapper.h new file mode 100644 index 00000000000..143bb80e478 --- /dev/null +++ b/backends/samsung/aot/wrappers/op_param_wrapper.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace torch { +namespace executor { +namespace enn { + +template +struct ScalarTypeCast { + constexpr static ScalarType value = ScalarType::UNKNOWN; +}; + +template <> +struct ScalarTypeCast { + constexpr static ScalarType value = ScalarType::UINT64; +}; + +template <> +struct ScalarTypeCast { + constexpr static ScalarType value = ScalarType::INT64; +}; + +template <> +struct ScalarTypeCast { + constexpr static ScalarType value = ScalarType::UINT32; +}; + +template <> +struct ScalarTypeCast { + constexpr static ScalarType value = ScalarType::INT32; +}; + +template <> +struct ScalarTypeCast { + constexpr static ScalarType value = ScalarType::FLOAT32; +}; + +template <> +struct ScalarTypeCast { + constexpr static ScalarType value = ScalarType::FLOAT64; +}; + +template <> +struct ScalarTypeCast { + constexpr static ScalarType value = ScalarType::BOOL; +}; + +class OpParamWrapper { + public: + OpParamWrapper(std::string key) : key_name_(std::move(key)) {} + + ~OpParamWrapper() = default; + + std::string getKeyName() const { + return key_name_; + } + + template + void SetScalarValue(T value) { + auto bytes = sizeof(T); + storage_ = std::unique_ptr(new uint8_t[bytes]); + memcpy(storage_.get(), &value, bytes); + size_ = 1; + is_scalar_ = true; + scalar_type_ = ScalarTypeCast::value; + } + + template + void SetVectorValue(const std::vector& value) { + auto bytes = sizeof(T) * value.size(); + storage_ = std::unique_ptr(new uint8_t[bytes]); + memcpy(storage_.get(), value.data(), bytes); + size_ = value.size(); + is_scalar_ = false; + scalar_type_ = ScalarTypeCast::value; + } + + void SetStringValue(const std::string& value) { + auto bytes = sizeof(std::string::value_type) * value.size(); + storage_ = std::unique_ptr(new uint8_t[bytes]); + memcpy(storage_.get(), value.data(), bytes); + size_ = value.size(); + is_scalar_ = false; + scalar_type_ = ScalarType::CHAR; + } + + ParamWrapper Dump() const { + ParamWrapper param; + param.data = storage_.get(); + param.size = size_; + param.is_scalar = is_scalar_; + param.type = scalar_type_; + + return param; + } + + private: + std::string key_name_; + std::unique_ptr storage_ = nullptr; + uint32_t size_ = 0; + bool is_scalar_ = false; + ScalarType scalar_type_ = ScalarType::UNKNOWN; +}; + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/aot/wrappers/op_wrapper.h b/backends/samsung/aot/wrappers/op_wrapper.h new file mode 100644 index 00000000000..d5c1956affe --- /dev/null +++ b/backends/samsung/aot/wrappers/op_wrapper.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include +#include +#include +#include + +#include +namespace torch { +namespace executor { +namespace enn { + +class EnnOpWrapper { + public: + EnnOpWrapper( + std::string op_name, + std::string op_type, + const std::vector& input_tensors_id, + const std::vector& output_tensors_id) + : name_(std::move(op_name)), + op_type_(std::move(op_type)), + input_tensors_(input_tensors_id), + output_tensors_(output_tensors_id) {} + + void AddOpParam(std::shared_ptr param) { + params_.emplace_back(std::move(param)); + } + + const std::string& GetName() const { + return name_; + } + + const std::string GetType() const { + return op_type_; + } + + const std::vector& GetInputs() const { + return input_tensors_; + } + + const std::vector& GetOutputs() const { + return output_tensors_; + } + + const std::vector>& GetParams() const { + return params_; + } + + private: + std::string name_; + std::string op_type_; + std::vector input_tensors_; + std::vector output_tensors_; + std::vector> params_; +}; + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/aot/wrappers/quantize_param_wrapper.h b/backends/samsung/aot/wrappers/quantize_param_wrapper.h new file mode 100644 index 00000000000..7b9ae473b28 --- /dev/null +++ b/backends/samsung/aot/wrappers/quantize_param_wrapper.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include +#include +#include +#include + +#include +namespace torch { +namespace executor { +namespace enn { + +class EnnQuantizeParamWrapper { + public: + EnnQuantizeParamWrapper( + std::string quantize_dtype, + const std::vector& scales, + const std::vector& zero_points) + : quantize_type_(std::move(quantize_dtype)), + scales_(scales), + zero_points_(zero_points) {} + + const std::string& GetQuantizeType() const { + return quantize_type_; + } + + ParamWrapper GetScales() const { + ParamWrapper param; + param.data = const_cast(scales_.data()); + param.size = scales_.size(); + param.is_scalar = false; + param.type = ScalarType::FLOAT32; + + return param; + } + + ParamWrapper GetZeroPoints() const { + ParamWrapper param; + param.data = const_cast(zero_points_.data()); + param.size = zero_points_.size(); + param.is_scalar = false; + param.type = ScalarType::INT32; + + return param; + } + + private: + std::string quantize_type_; + std::vector scales_; + std::vector zero_points_; +}; + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/aot/wrappers/tensor_wrapper.h b/backends/samsung/aot/wrappers/tensor_wrapper.h new file mode 100644 index 00000000000..0b50c839e04 --- /dev/null +++ b/backends/samsung/aot/wrappers/tensor_wrapper.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include +#include + +#include "quantize_param_wrapper.h" + +namespace py = pybind11; + +namespace torch { +namespace executor { +namespace enn { + +class EnnTensorWrapper { + public: + EnnTensorWrapper( + std::string tensor_name, + const std::vector& dims, + std::string data_type, + std::string layout) + : name_(std::move(tensor_name)), + shape_(dims), + data_type_(data_type), + layout_(std::move(layout)) {} + + void AddQuantizeParam( + std::string quantize_dtype, + const std::vector& scales, + const std::vector& zero_points) { + quantize_param_ = std::make_unique( + quantize_dtype, scales, zero_points); + } + + void AddData(py::array& data) { + data_bytes_ = data.nbytes(); + + if (data.data() == nullptr || data_bytes_ == 0) { + return; + } + data_ = std::unique_ptr(new uint8_t[data_bytes_]); + memcpy(data_.get(), data.data(), data_bytes_); + } + + const std::string& GetName() const { + return name_; + } + const std::vector& GetShape() const { + return shape_; + } + const std::string& GetDataType() const { + return data_type_; + } + const std::string& GetLayout() const { + return layout_; + } + const EnnQuantizeParamWrapper* GetQuantizeParam() const { + return quantize_param_.get(); + } + + bool HasConstantData() const { + return data_ != nullptr && data_bytes_ != 0; + } + + const uint8_t* GetDataRawPtr() const { + return data_.get(); + } + + uint32_t GetDataBytes() const { + return data_bytes_; + } + + private: + std::string name_; + std::vector shape_; + std::string data_type_; + std::string layout_; + std::unique_ptr quantize_param_ = nullptr; + std::unique_ptr data_ = nullptr; + uint32_t data_bytes_ = 0; +}; + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/build.sh b/backends/samsung/build.sh new file mode 100755 index 00000000000..dfa6407ff50 --- /dev/null +++ b/backends/samsung/build.sh @@ -0,0 +1,133 @@ +#!/bin/bash +## Directory Info +BASE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +PROJECT_DIR=$(realpath ${BASE_DIR}/../../) + +X86_64_BUILD_DIR=${PROJECT_DIR}/build_samsung +ANDROID_BUILD_DIR=${PROJECT_DIR}/build_samsung_android +BUILD_ARCH=all +CLEAN_BUILD_DIR=false + + +EXYNOS_AI_LITECORE_ROOT=${EXYNOS_AI_LITECORE_ROOT} +ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT} + +function usage() { + echo "Usage build.sh + + + + --sdk The path of downloaded ENN SDK, which is required for building. + Or export EXYNOS_AI_LITECORE_ROOT=/path/to/xxx + --ndk The path of Android NDK, or export ANDROID_NDK_ROOT=/path/to/ndk. + + --build, -b [x86_64, android, all] Default is all, x86_64 target to offline compilation, + android target to online execution. + --clean, -c Clean the build cache. + --help, -h Print the usage information. + " +} + +function build_x86_64() { + if [[ -z ${EXYNOS_AI_LITECORE_ROOT} ]]; then + echo "Please export EXYNOS_AI_LITECORE_ROOT or set by command" + exit 1 + fi + + echo "EXYNOS_AI_LITECORE_ROOT: ${EXYNOS_AI_LITECORE_ROOT}" + echo "ANDROID_NDK_ROOT: ${ANDROID_NDK_ROOT}" + + cmake \ + -DCMAKE_INSTALL_PREFIX=${X86_64_BUILD_DIR} \ + -DEXYNOS_AI_LITECORE_ROOT=${EXYNOS_AI_LITECORE_ROOT} \ + -DEXECUTORCH_BUILD_ENN=ON \ + -DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -S ${PROJECT_DIR} \ + -B ${X86_64_BUILD_DIR} + + cmake --build ${X86_64_BUILD_DIR} -j $(nproc) --target install + + rm -f ${PROJECT_DIR}/backends/samsung/python/Py*so + cp -fv ${X86_64_BUILD_DIR}/backends/samsung/Py*so ${PROJECT_DIR}/backends/samsung/python/ + cp -fv ${PROJECT_DIR}/schema/*.fbs ${PROJECT_DIR}/exir/_serialize/ +} + +function build_android() { + if [[ -z ${EXYNOS_AI_LITECORE_ROOT} ]]; then + echo "Please export EXYNOS_AI_LITECORE_ROOT or set by command" + exit 1 + fi + if [[ -z ${ANDROID_NDK_ROOT} ]]; then + echo "Please export ANDROID_NDK_ROOT or set by command" + exit 1 + fi + + ANDROID_ABI=arm64-v8a + cmake \ + -DCMAKE_INSTALL_PREFIX=${ANDROID_BUILD_DIR} \ + -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_ROOT}/build/cmake/android.toolchain.cmake" \ + -DANDROID_ABI="${ANDROID_ABI}" \ + -DCMAKE_BUILD_TYPE=Release \ + -DEXECUTORCH_BUILD_ENN=ON \ + -DEXYNOS_AI_LITECORE_ROOT=${EXYNOS_AI_LITECORE_ROOT} \ + -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + -DEXECUTORCH_ENABLE_LOGGING=1 \ + -DEXECUTORCH_BUILD_DEVTOOLS=ON \ + -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + -S ${PROJECT_DIR} \ + -B ${ANDROID_BUILD_DIR} + + cmake --build ${ANDROID_BUILD_DIR} -j $(nproc) --target install +} + +# Main +for (( i=1; i<=$#; i++)) +do + case "${!i}" in + "--sdk") + let i++ + EXYNOS_AI_LITECORE_ROOT="${!i}" + ;; + "--ndk") + let i++ + ANDROID_NDK_ROOT="${!i}" + ;; + "--clean"|"-c") + CLEAN_BUILD_DIR=true + ;; + "--build"|"-b") + let i++ + BUILD_ARCH="${!i}" + ;; + "--help"|"-h") + usage + exit 0 + ;; + *) + echo "Unknown option: ${!i}" + usage + exit 1 + ;; + esac +done + +cd ${PROJECT_DIR} +if [ "${CLEAN_BUILD_DIR}" = true ]; then + rm -rf ${X86_64_BUILD_DIR} + rm -rf ${ANDROID_BUILD_DIR} + exit 0 +fi + +if [[ "${BUILD_ARCH}" = "all" || "${BUILD_ARCH}" = "x86_64" ]]; then + build_x86_64 +fi +if [[ "${BUILD_ARCH}" = "all" || "${BUILD_ARCH}" = "android" ]]; then + build_android +fi diff --git a/backends/samsung/builders/__init__.py b/backends/samsung/builders/__init__.py new file mode 100644 index 00000000000..b3e72da36c3 --- /dev/null +++ b/backends/samsung/builders/__init__.py @@ -0,0 +1,47 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from . import ( + node_visitor, + op_add, + op_avg_pool2d, + op_batch_norm, + op_cat, + op_clamp, + op_conv2d, + op_getitem, + op_hardtanh, + op_linear, + op_max_pool2d, + op_mean_dim, + op_mul, + op_permute, + op_relu, + op_reshape, + op_select, + op_unsqueeze, +) + +__all__ = [ + node_visitor, + op_add, + op_avg_pool2d, + op_batch_norm, + op_cat, + op_clamp, + op_conv2d, + op_getitem, + op_hardtanh, + op_linear, + op_max_pool2d, + op_mean_dim, + op_mul, + op_permute, + op_relu, + op_reshape, + op_select, + op_unsqueeze, +] diff --git a/backends/samsung/builders/node_visitor.py b/backends/samsung/builders/node_visitor.py new file mode 100644 index 00000000000..a35c0b4715d --- /dev/null +++ b/backends/samsung/builders/node_visitor.py @@ -0,0 +1,106 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Dict, Optional + +import torch +from executorch.backends.samsung.builders.utils import ( + get_map_dtype, + get_tensor, + get_tensor_type, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph +from executorch.backends.transforms.utils import is_param_node +from torch.export import ExportedProgram + + +class NodeVisitor: + """ + Node visitor pattern for visiting nodes in an edge IR graph + """ + + def __init__(self, exported_program: ExportedProgram) -> None: + self._exported_program = exported_program or None + + @property + def exported_program(self) -> ExportedProgram: + return self._exported_program + + def define_node(self, node: torch.fx.Node, enn_graph: EnnGraph): + raise NotImplementedError("NodeVisitor must be extended!") + + def define_tensor( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.fx.Node, int], + swap_nc_for_weights: bool = False, + output_idx: Optional[int] = None, + ) -> int: + if node in vals_to_ids and (output_idx is None or output_idx == 0): + return vals_to_ids[node] + + # Get tensor basic information + tensor = get_tensor(self.exported_program, node) + + if output_idx is not None: + tensor = tensor[output_idx] + + tensor_type = get_tensor_type(self.exported_program, node) + data_type = get_map_dtype(tensor.dtype) + + const_data = None + if is_param_node(self.exported_program, node): + if swap_nc_for_weights: + tensor = torch.swapdims(tensor, 0, 1) + const_data = tensor.contiguous().detach().numpy() + + dims = [1] if len(tensor.size()) == 0 else list(tensor.size()) + + enn_tensor_id = enn_graph.define_tensor( + node.name, + dims, + data_type, + tensor_type.name, + const_data, + ) + assert enn_tensor_id is not None + vals_to_ids[node] = enn_tensor_id + + return enn_tensor_id + + +_node_visitor_dict = {} + + +def register_node_visitor(visitor): + assert ( + isinstance(visitor, type) + and issubclass(visitor, NodeVisitor) + and hasattr(visitor, "target") + ), f"Illformed NodeVisitor subclass, can't register!, got: {visitor}" + if isinstance(visitor.target, str): + _node_visitor_dict[visitor.target] = visitor + elif isinstance(visitor.target, (list, tuple)): + for target in visitor.target: + _node_visitor_dict[target] = visitor + else: + raise TypeError( + f"target of vistor should be str|Tuple[str]|List[str], not{type(visitor.target)}" + ) + + +def get_node_visitors(*args) -> Dict[str, NodeVisitor]: + node_visitors = {} + """ + Create a new class instance at runtime, and put them in a dict + """ + for target, visitor in _node_visitor_dict.items(): + assert callable(visitor), "Expecting a callable class, " + f"but got {visitor} of type {type(visitor)}" + node_visitors[target] = visitor(*args) + return node_visitors diff --git a/backends/samsung/builders/op_add.py b/backends/samsung/builders/op_add.py new file mode 100644 index 00000000000..1b0dddb0d02 --- /dev/null +++ b/backends/samsung/builders/op_add.py @@ -0,0 +1,36 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class AddVisitor(NodeVisitor): + target = "aten.add.Tensor" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input1 = node.args[0] + input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids) + input2 = node.args[1] + input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids) + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "ELTSUM", [input_id_1, input_id_2], [output_id]) diff --git a/backends/samsung/builders/op_avg_pool2d.py b/backends/samsung/builders/op_avg_pool2d.py new file mode 100644 index 00000000000..ad7ccbac3ae --- /dev/null +++ b/backends/samsung/builders/op_avg_pool2d.py @@ -0,0 +1,70 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict, List + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class AvgPool2dVisitor(NodeVisitor): + target = "aten.avg_pool2d.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + kernel_size = cast(List[int], node.args[1]) + if len(kernel_size) == 1: + kernel_size = kernel_size * 2 + + stride = cast(List[int], node.args[2]) if len(node.args) > 2 else kernel_size + if len(stride) == 1: + stride = stride * 2 + + padding = cast(List[int], node.args[3]) if len(node.args) > 3 else [0, 0] + if len(padding) == 1: + padding = padding * 2 + explicit_padding = [padding[0], padding[1], padding[0], padding[1]] + + params = {} + params["kernel_h"] = kernel_size[0] + params["kernel_w"] = kernel_size[1] + params["stride_h"] = stride[0] + params["stride_w"] = stride[1] + params["padding"] = "EXPLICIT" + params["explicit_padding"] = explicit_padding + + if len(node.args) > 4: + ceil_mode = cast(bool, node.args[4]) + assert not ceil_mode, "Not support ceil_mode = True." + + if len(node.args) > 5: + params["count_include_pad"] = cast(bool, node.args[5]) + else: + params["count_include_pad"] = True + + if len(node.args) > 6: + divisor_override = cast(int, node.args[6]) + assert ( + divisor_override == kernel_size[0] * kernel_size[1] + ), "Not supported divisor_override which is not equal to pooling region." + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "AVGPOOL2D", [input_id], [output_id], params) diff --git a/backends/samsung/builders/op_batch_norm.py b/backends/samsung/builders/op_batch_norm.py new file mode 100644 index 00000000000..e5373a8223a --- /dev/null +++ b/backends/samsung/builders/op_batch_norm.py @@ -0,0 +1,56 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class BatchNormVisitor(NodeVisitor): + target = "aten._native_batch_norm_legit_no_training.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + all_input_tensors = [] + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + all_input_tensors.append(input_id) + + weight_node, bias_node, mean_node, var_node = ( + node.args[1], + node.args[2], + node.args[3], + node.args[4], + ) + weight_id = self.define_tensor(weight_node, enn_graph, vals_to_ids) + all_input_tensors.append(weight_id) + bias_id = self.define_tensor(bias_node, enn_graph, vals_to_ids) + all_input_tensors.append(bias_id) + mean_id = self.define_tensor(mean_node, enn_graph, vals_to_ids) + all_input_tensors.append(mean_id) + var_id = self.define_tensor(var_node, enn_graph, vals_to_ids) + all_input_tensors.append(var_id) + + eps = node.args[-1] + params = {"epsilon": eps} + + output_id = self.define_tensor(node, enn_graph, vals_to_ids, output_idx=0) + + enn_graph.define_op( + node.name, "BatchNormalization", all_input_tensors, [output_id], params + ) diff --git a/backends/samsung/builders/op_cat.py b/backends/samsung/builders/op_cat.py new file mode 100644 index 00000000000..e9c0a32b389 --- /dev/null +++ b/backends/samsung/builders/op_cat.py @@ -0,0 +1,42 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict, List + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph +from executorch.backends.transforms import get_shape + + +@register_node_visitor +class CatVisitor(NodeVisitor): + target = "aten.cat.default" + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + tensors = cast(List[torch.fx.Node], node.args[0]) + input_tensor_ids = [] + + for in_tensor in tensors: + input_id = self.define_tensor(in_tensor, enn_graph, vals_to_ids) + input_tensor_ids.append(input_id) + + in_shape = get_shape(node) + axis = cast(int, node.args[1]) % len(in_shape) if len(node.args) >= 2 else 0 + params = {"axis": axis} + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + enn_graph.define_op(node.name, "CONCAT", input_tensor_ids, [output_id], params) diff --git a/backends/samsung/builders/op_clamp.py b/backends/samsung/builders/op_clamp.py new file mode 100644 index 00000000000..c5670b80fa3 --- /dev/null +++ b/backends/samsung/builders/op_clamp.py @@ -0,0 +1,43 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class ClampVisitor(NodeVisitor): + target = "aten.clamp.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + # The default value of lower bound and upper bound + output_min = torch.finfo(torch.float32).min + output_max = torch.finfo(torch.float32).max + if node.args[1] is not None: + output_min = cast(float, node.args[1]) + if len(node.args) > 2 and node.args[2] is not None: + output_max = cast(float, node.args[2]) + + params = {"minimum": output_min, "maximum": output_max} + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "CLIP", [input_id], [output_id], params) diff --git a/backends/samsung/builders/op_conv2d.py b/backends/samsung/builders/op_conv2d.py new file mode 100644 index 00000000000..881a533801f --- /dev/null +++ b/backends/samsung/builders/op_conv2d.py @@ -0,0 +1,85 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict, List + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph +from executorch.backends.transforms import get_shape + + +@register_node_visitor +class Conv2dVisitor(NodeVisitor): + target = "aten.convolution.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + all_input_tensors = [] + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + all_input_tensors.append(input_id) + + is_transpose_conv = cast(bool, node.args[6]) + weight_node = node.args[1] + weight_id = self.define_tensor( + weight_node, + enn_graph, + vals_to_ids, + is_transpose_conv, + ) + all_input_tensors.append(weight_id) + + if node.args[2] is not None: + bias_node = node.args[2] + bias_id = self.define_tensor(bias_node, enn_graph, vals_to_ids) + all_input_tensors.append(bias_id) + + stride = cast(List[int], node.args[3]) + padding = cast(List[int], node.args[4]) + dilation = cast(List[int], node.args[5]) + groups = cast(int, node.args[8]) + explicit_padding = [padding[0], padding[1], padding[0], padding[1]] + + input_shape = get_shape(input) + kernel_shape = get_shape(weight_node) + params = {} + params["kernel_h"] = kernel_shape[2] + params["kernel_w"] = kernel_shape[3] + params["stride_h"] = stride[0] + params["stride_w"] = stride[1] + params["dilation_h"] = dilation[0] + params["dilation_w"] = dilation[1] + params["groups"] = groups + params["padding"] = "EXPLICIT" + params["padding_type"] = "CONSTANT" # CONSTANT will be zero-padding + params["explicit_padding"] = explicit_padding + params["in_channels"] = input_shape[1] + params["out_channels"] = kernel_shape[0] * kernel_shape[1] * groups + params["out_channels"] //= input_shape[1] * input_shape[0] + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + is_depthwise_conv = kernel_shape[1] == 1 and kernel_shape[0] / groups == 1 + if is_depthwise_conv: + conv_type = "DWDECONV2D" if is_transpose_conv else "DWCONV2D" + enn_graph.define_op( + node.name, conv_type, all_input_tensors, [output_id], params + ) + else: + conv_type = "DECONV2D" if is_transpose_conv else "CONV2D" + enn_graph.define_op( + node.name, conv_type, all_input_tensors, [output_id], params + ) diff --git a/backends/samsung/builders/op_getitem.py b/backends/samsung/builders/op_getitem.py new file mode 100644 index 00000000000..901ec73cf7d --- /dev/null +++ b/backends/samsung/builders/op_getitem.py @@ -0,0 +1,32 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class OpGetItemVisitor(NodeVisitor): + target = "getitem" + """ + do nothing if node is getitem + """ + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + return diff --git a/backends/samsung/builders/op_hardtanh.py b/backends/samsung/builders/op_hardtanh.py new file mode 100644 index 00000000000..4f667bf5299 --- /dev/null +++ b/backends/samsung/builders/op_hardtanh.py @@ -0,0 +1,38 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class HardTanhVisitor(NodeVisitor): + target = "aten.hardtanh.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + output_min = cast(float, node.args[1]) if len(node.args) > 1 else -1 + output_max = cast(float, node.args[2]) if len(node.args) > 2 else 1 + params = {"minimum": output_min, "maximum": output_max} + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "CLIP", [input_id], [output_id], params) diff --git a/backends/samsung/builders/op_linear.py b/backends/samsung/builders/op_linear.py new file mode 100644 index 00000000000..2f7aa1e6415 --- /dev/null +++ b/backends/samsung/builders/op_linear.py @@ -0,0 +1,49 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph +from executorch.backends.transforms import get_shape + + +@register_node_visitor +class LinearVisitor(NodeVisitor): + target = "aten.linear.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + all_input_tensors = [] + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + all_input_tensors.append(input_id) + + weight_node = node.args[1] + weight_id = self.define_tensor(weight_node, enn_graph, vals_to_ids) + all_input_tensors.append(weight_id) + + if len(node.args) > 2 and node.args[2] is not None: + bias_node = node.args[2] + bias_id = self.define_tensor(bias_node, enn_graph, vals_to_ids) + all_input_tensors.append(bias_id) + + weight_shape = get_shape(weight_node) + params = {"in_channels": weight_shape[1], "out_channels": weight_shape[0]} + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "FC", all_input_tensors, [output_id], params) diff --git a/backends/samsung/builders/op_max_pool2d.py b/backends/samsung/builders/op_max_pool2d.py new file mode 100644 index 00000000000..d386dd30b1a --- /dev/null +++ b/backends/samsung/builders/op_max_pool2d.py @@ -0,0 +1,95 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict, List + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class MaxPool2dVisitor(NodeVisitor): + target = ["aten.max_pool2d.default", "aten.max_pool2d_with_indices.default"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + is_indices = False + if node.target.__name__ == "aten.max_pool2d_with_indices.default": + users = list(node.users.keys()) + + for user in users: + if user.target.__name__ == "getitem": + getitem_index = user.args[1] + is_indices = True + if getitem_index != 0: + raise AssertionError( + "Expected second argument of getitem" + f" node for {node.target.__name__ } to be 0, " + f"got {getitem_index}. ENN delegate currently " + "only supports getting just the max " + "values from the op, but doesn't support" + " getting the corresponding indices." + ) + + kernel_size = cast(List[int], node.args[1]) + if len(kernel_size) == 1: + kernel_size = kernel_size * 2 + + stride = cast(List[int], node.args[2]) if len(node.args) > 2 else kernel_size + if len(stride) == 1: + stride = stride * 2 + + padding = cast(List[int], node.args[3]) if len(node.args) > 3 else [0, 0] + if len(padding) == 1: + padding = padding * 2 + explicit_padding = [padding[0], padding[1], padding[0], padding[1]] + + dilation = cast(List[int], node.args[4]) if len(node.args) > 4 else [1, 1] + if len(dilation) == 1: + dilation = dilation * 2 + + params = {} + params["kernel_h"] = kernel_size[0] + params["kernel_w"] = kernel_size[1] + params["stride_h"] = stride[0] + params["stride_w"] = stride[1] + params["padding"] = "EXPLICIT" + params["explicit_padding"] = explicit_padding + params["dilation_h"] = dilation[0] + params["dilation_w"] = dilation[1] + + if len(node.args) > 5: + ceil_mode = cast(bool, node.args[5]) + assert not ceil_mode, "Not support ceil_mode = True." + + if not is_indices: + output_id = self.define_tensor( + node, + enn_graph, + vals_to_ids, + ) + else: + output_id = self.define_tensor( + node, + enn_graph, + vals_to_ids, + output_idx=0, + ) + + enn_graph.define_op(node.name, "MAXPOOL2D", [input_id], [output_id], params) diff --git a/backends/samsung/builders/op_mean_dim.py b/backends/samsung/builders/op_mean_dim.py new file mode 100644 index 00000000000..2f07f870ec4 --- /dev/null +++ b/backends/samsung/builders/op_mean_dim.py @@ -0,0 +1,44 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict, List + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph +from executorch.backends.transforms import get_shape + + +@register_node_visitor +class ReduceMeanVisitor(NodeVisitor): + target = "aten.mean.dim" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + dims = cast(List[int], node.args[1]) + reduce_axes = [] + in_shape = get_shape(input) + for dim in dims: + reduce_axes.append(dim % len(in_shape)) + reduce_axes.sort() + + keep_dim = node.args[2] if len(node.args) >= 3 else False + params = {"keep_dims": keep_dim, "axis": reduce_axes} + enn_graph.define_op(node.name, "REDUCEMEAN", [input_id], [output_id], params) diff --git a/backends/samsung/builders/op_mul.py b/backends/samsung/builders/op_mul.py new file mode 100644 index 00000000000..dce531ff0b0 --- /dev/null +++ b/backends/samsung/builders/op_mul.py @@ -0,0 +1,33 @@ +# Copyright (c) 2024 Samsung Electronics Co. LTD +# All rights reserved +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class MulVisitor(NodeVisitor): + target = "aten.mul.Tensor" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input1 = node.args[0] + input_id_1 = self.define_tensor(input1, enn_graph, vals_to_ids) + input2 = node.args[1] + input_id_2 = self.define_tensor(input2, enn_graph, vals_to_ids) + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "ELTMUL", [input_id_1, input_id_2], [output_id]) diff --git a/backends/samsung/builders/op_permute.py b/backends/samsung/builders/op_permute.py new file mode 100644 index 00000000000..646eac4c06a --- /dev/null +++ b/backends/samsung/builders/op_permute.py @@ -0,0 +1,37 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict, List + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class PermuteVisitor(NodeVisitor): + target = "aten.permute_copy.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + # permutation + permute_order = cast(List[int], node.args[1]) + params = {"perm": permute_order} + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "TRANSPOSE", [input_id], [output_id], params) diff --git a/backends/samsung/builders/op_relu.py b/backends/samsung/builders/op_relu.py new file mode 100644 index 00000000000..ba90116be1d --- /dev/null +++ b/backends/samsung/builders/op_relu.py @@ -0,0 +1,34 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class ReluVisitor(NodeVisitor): + target = "aten.relu.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "RELU", [input_id], [output_id]) diff --git a/backends/samsung/builders/op_reshape.py b/backends/samsung/builders/op_reshape.py new file mode 100644 index 00000000000..1f4e85ac059 --- /dev/null +++ b/backends/samsung/builders/op_reshape.py @@ -0,0 +1,34 @@ +# Copyright (c) 2024 Samsung Electronics Co. LTD +# All rights reserved +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class ReshapeVisitor(NodeVisitor): + target = "aten.view_copy.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + new_shape = node.args[1] + enn_graph.define_op( + node.name, "RESHAPE", [input_id], [output_id], {"new_shape": new_shape} + ) diff --git a/backends/samsung/builders/op_select.py b/backends/samsung/builders/op_select.py new file mode 100644 index 00000000000..26f455b2548 --- /dev/null +++ b/backends/samsung/builders/op_select.py @@ -0,0 +1,52 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import cast, Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph +from executorch.backends.transforms import get_shape + + +@register_node_visitor +class SelectVisitor(NodeVisitor): + target = ["aten.select_copy.int", "aten.select.int"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ): + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + in_shape = get_shape(input) + dim = cast(int, node.args[1]) % len(in_shape) + index = cast(int, node.args[2]) % in_shape[dim] + + begin = [0] * len(in_shape) + begin[dim] = index + end = in_shape + end[dim] = index + 1 + strides = [1] * len(in_shape) + + params = { + "begin": begin, + "end": end, + "strides": strides, + "shrink_axis_mask": pow(2, dim), + } + + enn_graph.define_op(node.name, "STRIDEDSLICE", [input_id], [output_id], params) diff --git a/backends/samsung/builders/op_unsqueeze.py b/backends/samsung/builders/op_unsqueeze.py new file mode 100644 index 00000000000..942c3307de7 --- /dev/null +++ b/backends/samsung/builders/op_unsqueeze.py @@ -0,0 +1,34 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +from typing import Dict + +import torch +from executorch.backends.samsung.builders.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph + + +@register_node_visitor +class UnsqueezeVisitor(NodeVisitor): + target = "aten.unsqueeze_copy.default" + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + enn_graph: EnnGraph, + vals_to_ids: Dict[torch.Tensor, int], + ) -> None: + input = node.args[0] + input_id = self.define_tensor(input, enn_graph, vals_to_ids) + + output_id = self.define_tensor(node, enn_graph, vals_to_ids) + + enn_graph.define_op(node.name, "RESHAPE", [input_id], [output_id]) diff --git a/backends/samsung/builders/utils.py b/backends/samsung/builders/utils.py new file mode 100644 index 00000000000..58c84ff6d31 --- /dev/null +++ b/backends/samsung/builders/utils.py @@ -0,0 +1,65 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from enum import Enum + +import torch +from executorch.backends.samsung.utils.utils import is_graph_input, is_graph_output +from executorch.backends.transforms.utils import get_param_tensor, is_param_node + +from torch.export import ExportedProgram + +DATA_TYPE_STR_MAPPING = { + torch.int8: "INT8", + torch.uint8: "UINT8", + torch.int16: "INT16", + torch.uint16: "UINT16", + torch.int32: "INT32", + torch.int64: "INT64", + torch.float16: "FLOAT16", + torch.float32: "FLOAT32", +} + +TORCH_TYPE_QTYPE_MAPPING = { + torch.int8: torch.qint8, + torch.uint8: torch.quint8, + torch.int32: torch.qint32, +} + + +class TensorType(Enum): + INPUT = 0 + OUTPUT = 1 + CONSTANT = 2 + FEATUREMAP = 3 + + +def get_tensor_type(exported_program: ExportedProgram, tensor: torch.fx.Node) -> str: + if is_graph_input(exported_program, tensor): + return TensorType.INPUT + elif is_graph_output(tensor): + return TensorType.OUTPUT + elif is_param_node(exported_program, tensor): + return TensorType.CONSTANT + else: + return TensorType.FEATUREMAP + + +def get_map_dtype(dtype): + if dtype not in DATA_TYPE_STR_MAPPING: + raise RuntimeError("Data type cannot be decided: ", dtype) + return DATA_TYPE_STR_MAPPING[dtype] + + +def get_tensor(exported_program: ExportedProgram, node: torch.fx.Node): + if not is_param_node(exported_program, node): + return node.meta["val"] + tensor = get_param_tensor(exported_program, node) + return tensor.contiguous() + + +def affine_type_to_str(ttype: TensorType): + return str(ttype).removeprefix("TensorType.") diff --git a/backends/samsung/enn_preprocess.py b/backends/samsung/enn_preprocess.py new file mode 100644 index 00000000000..ca95e5e8611 --- /dev/null +++ b/backends/samsung/enn_preprocess.py @@ -0,0 +1,92 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Dict, final, List + +import executorch.backends.samsung.python.PyEnnWrapperAdaptor as PyEnnWrapper +import torch +from executorch.backends.samsung.builders.node_visitor import get_node_visitors +from executorch.backends.samsung.serialization.compile_options import ( + ENN_COMPILE_OPTION_TITLE, +) +from executorch.backends.samsung.serialization.enn_graph_schema import EnnGraph +from executorch.backends.samsung.utils.utils import get_compile_spec +from executorch.backends.transforms.addmm_mm_to_linear import AddmmToLinearTransform +from executorch.backends.transforms.fuse_batch_norm_with_conv import ( + FuseBatchNormWithConvPass, +) + +from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass + +from executorch.exir.backend.backend_details import ( + BackendDetails, + CompileSpec, + PreprocessResult, +) + +from executorch.exir.passes import PassManager + +from torch.export.exported_program import ExportedProgram + + +@final +class EnnBackend(BackendDetails): + @staticmethod + def preprocess( + edge_program: ExportedProgram, + compile_specs: List[CompileSpec], + ) -> PreprocessResult: + enn_wrapper = PyEnnWrapper.EnnWrapper() + option_spec = get_compile_spec( + compile_specs, ENN_COMPILE_OPTION_TITLE, required=True + ) + enn_wrapper.Init(option_spec.value) + + enn_preprocess_passes = PassManager( + passes=[ + FuseBatchNormWithConvPass(edge_program), + AddmmToLinearTransform(), + RemoveGetItemPass(), + ] + ) + pass_result = enn_preprocess_passes(edge_program.graph_module) + assert pass_result is not None + + enn_graph = EnnGraph() + # node visitors + node_visitors = get_node_visitors(edge_program) + + vals_to_ids: Dict[torch.fx.Node, int] = {} + for node in pass_result.graph_module.graph.nodes: + if node.op == "call_function": + logging.info(f"Visiting: {node}, {node.target.__name__}") + if node.target.__name__ in node_visitors: + node_visitors[node.target.__name__].define_node( + node, enn_graph, vals_to_ids + ) + else: + raise RuntimeError( + f"{node.target.__name__}" " is not supported in ENN Delegate" + ) + elif node.op in [ + "get_attr", + "placeholder", + "output", + ]: + continue + else: + raise RuntimeError(f"{node.op}" " is not supported in ENN Delegate") + + # Compile Graph + enn_graph.finish() + ser_buf = enn_graph.serialize() + enn_context_binary = enn_wrapper.Compile(ser_buf) + assert enn_context_binary is not None and len(enn_context_binary) > 0 + enn_wrapper.Destroy() + return PreprocessResult( + processed_bytes=bytes(enn_context_binary), debug_handle_map={} + ) diff --git a/backends/samsung/partition/enn_partitioner.py b/backends/samsung/partition/enn_partitioner.py new file mode 100644 index 00000000000..466a7d13e08 --- /dev/null +++ b/backends/samsung/partition/enn_partitioner.py @@ -0,0 +1,111 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +import logging +from typing import Any, Dict, List + +import executorch.backends.samsung.builders.node_visitor as node_visitor + +import executorch.backends.samsung.python.PyEnnWrapperAdaptor as PyEnnWrapper + +import torch +from executorch.backends.samsung.enn_preprocess import EnnBackend +from executorch.backends.samsung.serialization.compile_options import ( + ENN_COMPILE_OPTION_TITLE, +) +from executorch.backends.samsung.utils.utils import get_compile_spec +from executorch.exir.backend.backend_details import CompileSpec +from executorch.exir.backend.canonical_partitioners.pattern_op_partitioner import ( + generate_partitions_from_list_of_nodes, +) +from executorch.exir.backend.partitioner import ( + DelegationSpec, + Partitioner, + PartitionResult, +) +from executorch.exir.backend.utils import tag_constant_data + +from executorch.exir.dialects._ops import ops as exir_ops +from torch.fx.passes.infra.partitioner import Partition +from torch.fx.passes.operator_support import OperatorSupportBase + +SUPPORTED_OPS = [ + exir_ops.edge.aten.addmm.default, +] + + +class EnnOperatorSupport(OperatorSupportBase): + + def __init__( + self, + edge_program: torch.export.ExportedProgram, + compile_specs: List[CompileSpec], + ): + self.edge_program = edge_program + self.enn_wrapper = PyEnnWrapper.EnnWrapper() + self.node_visitors = node_visitor.get_node_visitors(edge_program) + option_spec = get_compile_spec( + compile_specs, ENN_COMPILE_OPTION_TITLE, required=True + ) + self.enn_wrapper.Init(option_spec.value) + + def is_node_supported(self, _, node: torch.fx.Node) -> bool: + if node.op != "call_function": + return False + + if node.op in [ + "get_attr", + "placeholder", + "output", + ]: + return False + + if node.target in SUPPORTED_OPS or node.target.__name__ in self.node_visitors: + return True + + supported = self.enn_wrapper.IsNodeSupportedByBackend() + return supported + + def __del__(self): + self.enn_wrapper.Destroy() + + +class EnnPartitioner(Partitioner): + def __init__(self, compile_specs: List[CompileSpec]): + # TODO(anyone): Add meaningful initialize + self.delegation_spec = DelegationSpec(EnnBackend.__name__, compile_specs) + self.partition_tags: Dict[str, DelegationSpec] = {} + self.compile_specs = compile_specs + + def generate_partitions( + self, edge_program: torch.export.ExportedProgram + ) -> List[Any]: + self.op_support_checker = EnnOperatorSupport(edge_program, self.compile_specs) + return generate_partitions_from_list_of_nodes( + edge_program.graph_module, + op_support=self.op_support_checker, + ) + + def tag_nodes(self, partitions: List[Partition]) -> None: + partition_tags: Dict[str, DelegationSpec] = {} + for partition in partitions: + # Add delegation tags + for node in partition.nodes: + delegation_tag = f"enn_{partition.id}" + node.meta["delegation_tag"] = delegation_tag + partition_tags[delegation_tag] = self.delegation_spec + return partition_tags + + # override + def partition(self, edge_program: torch.export.ExportedProgram) -> PartitionResult: + partitions = self.generate_partitions(edge_program) + logging.info(f"Find {len(partitions)} " "subgraphs to partition and lowering.") + if len(partitions) != 0: + self.partition_tags = self.tag_nodes(partitions) + tag_constant_data(edge_program) + del self.op_support_checker + return PartitionResult( + tagged_exported_program=edge_program, partition_tags=self.partition_tags + ) diff --git a/backends/samsung/python/.gitignore b/backends/samsung/python/.gitignore new file mode 100644 index 00000000000..8e711cc19ff --- /dev/null +++ b/backends/samsung/python/.gitignore @@ -0,0 +1,5 @@ +# exclude the pybind libraries +* + +# keep folder for ease of use +!.gitignore diff --git a/backends/samsung/runtime/CMakeLists.txt b/backends/samsung/runtime/CMakeLists.txt new file mode 100644 index 00000000000..deb93f31bc8 --- /dev/null +++ b/backends/samsung/runtime/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# logging +target_sources( + enn_logging + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/logging.h + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/logging.cpp +) + +if(${ANDROID}) + # backend + target_sources( + enn_backend + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/enn_backend.cpp + ${CMAKE_CURRENT_LIST_DIR}/enn_executor.cpp + ${CMAKE_CURRENT_LIST_DIR}/enn_api_implementation.cpp + ) +endif() diff --git a/backends/samsung/runtime/enn_api_implementation.cpp b/backends/samsung/runtime/enn_api_implementation.cpp new file mode 100644 index 00000000000..c3914dd2297 --- /dev/null +++ b/backends/samsung/runtime/enn_api_implementation.cpp @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#include +#include +#include + +#define ENN_LOAD_API_FUNC(handle, name, enn_api_ptr) \ + enn_api_ptr->name = \ + reinterpret_cast(loadApiFunction(handle, #name, false)); \ + if (enn_api_ptr->name == nullptr) { \ + ENN_LOG_ERROR("Unable to access symbols in enn api library: %s", #name); \ + dlclose(handle); \ + return Error::Internal; \ + } + +namespace torch { +namespace executor { +namespace enn { +void* loadApiFunction(void* handle, const char* name, bool optional) { + if (handle == nullptr) { + return nullptr; + } + void* fn = dlsym(handle, name); + if (fn == nullptr && !optional) { + ENN_LOG_WARN("Failed to load function %s", name); + } + return fn; +} + +std::mutex EnnApi::instance_mutex_; + +EnnApi* EnnApi::getEnnApiInstance() { + std::lock_guard lgd(instance_mutex_); + static EnnApi enn_api; + if (!enn_api.getInitialize()) { + auto status = enn_api.loadApiLib(); + if (status == Error::Ok) { + enn_api.initialize_ = true; + } + } + return &enn_api; +} + +EnnApi::~EnnApi() { + if (getInitialize()) { + unloadApiLib(); + } +} + +bool EnnApi::getInitialize() const { + return initialize_; +} + +Error EnnApi::loadApiLib() { + const char enn_api_lib_name[] = "libenn_public_api_cpp.so"; + libenn_public_api_ = dlopen(enn_api_lib_name, RTLD_NOW | RTLD_LOCAL); + ET_CHECK_OR_RETURN_ERROR( + libenn_public_api_ != nullptr, Internal, "Lib load failed.") + + ENN_LOAD_API_FUNC(libenn_public_api_, EnnInitialize, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnSetPreferencePerfMode, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnGetPreferencePerfMode, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnOpenModel, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnOpenModelFromMemory, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnSetFastIpc, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnUnsetFastIpc, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnExecuteModelFastIpc, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnExecuteModel, this); + ENN_LOAD_API_FUNC( + libenn_public_api_, EnnExecuteModelWithSessionIdAsync, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnExecuteModelWithSessionIdWait, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnCloseModel, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnDeinitialize, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnAllocateAllBuffers, this); + ENN_LOAD_API_FUNC( + libenn_public_api_, EnnAllocateAllBuffersWithSessionId, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnBufferCommit, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnGetBuffersInfo, this); + ENN_LOAD_API_FUNC(libenn_public_api_, EnnReleaseBuffers, this); + + return Error::Ok; +} + +Error EnnApi::unloadApiLib() { + if (dlclose(libenn_public_api_) != 0) { + ENN_LOG_ERROR("Failed to close enn public api library. %s", dlerror()); + return Error::Internal; + }; + return Error::Ok; +} + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/runtime/enn_api_implementation.h b/backends/samsung/runtime/enn_api_implementation.h new file mode 100644 index 00000000000..efc57f0d276 --- /dev/null +++ b/backends/samsung/runtime/enn_api_implementation.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once +#include +#include + +#include +#include +#include +#include + +namespace torch { +namespace executor { +namespace enn { + +class EnnApi { + public: + EnnApi(const EnnApi&) = delete; + EnnApi& operator=(const EnnApi&) = delete; + EnnApi(EnnApi&&) = delete; + EnnApi& operator=(EnnApi&&) = delete; + ~EnnApi(); + + static EnnApi* getEnnApiInstance(); + + EnnReturn (*EnnInitialize)(void); + EnnReturn (*EnnSetPreferencePerfMode)(const uint32_t val); + EnnReturn (*EnnGetPreferencePerfMode)(uint32_t* val_ptr); + EnnReturn (*EnnOpenModel)(const char* model_file, EnnModelId* model_id); + EnnReturn (*EnnOpenModelFromMemory)( + const char* va, + const uint32_t size, + EnnModelId* model_id); + EnnReturn (*EnnSetFastIpc)(void); + EnnReturn (*EnnUnsetFastIpc)(void); + EnnReturn (*EnnExecuteModelFastIpc)( + const EnnModelId model_id, + int client_sleep_usec); + EnnReturn (*EnnExecuteModel)(const EnnModelId model_id); + EnnReturn (*EnnExecuteModelWithSessionIdAsync)( + const EnnModelId model_id, + const int session_id); + EnnReturn (*EnnCloseModel)(const EnnModelId model_id); + EnnReturn (*EnnDeinitialize)(void); + EnnReturn (*EnnAllocateAllBuffers)( + const EnnModelId model_id, + EnnBufferPtr** out_buffers, + NumberOfBuffersInfo* out_buffers_info); + EnnReturn (*EnnAllocateAllBuffersWithSessionId)( + const EnnModelId model_id, + EnnBufferPtr** out_buffers, + NumberOfBuffersInfo* out_buffers_info, + const int session_id, + const bool do_commit); + EnnReturn (*EnnExecuteModelWithSessionIdWait)( + const EnnModelId model_id, + const int session_id); + EnnReturn (*EnnBufferCommit)(const EnnModelId model_id); + EnnReturn (*EnnGetBuffersInfo)( + const EnnModelId model_id, + NumberOfBuffersInfo* buffers_info); + EnnReturn ( + *EnnReleaseBuffers)(EnnBufferPtr* buffers, const int32_t numOfBuffers); + + private: + static std::mutex instance_mutex_; + std::atomic_bool initialize_ = false; + // Pointer to the dlopen libs + void* libenn_public_api_ = nullptr; + + EnnApi() = default; + bool getInitialize() const; + Error loadApiLib(); + Error unloadApiLib(); +}; + +typedef EnnReturn (*EnnInitialize_fn)(void); +typedef EnnReturn (*EnnSetPreferencePerfMode_fn)(const uint32_t val); +typedef EnnReturn (*EnnGetPreferencePerfMode_fn)(uint32_t* val_ptr); +typedef EnnReturn ( + *EnnOpenModel_fn)(const char* model_file, EnnModelId* model_id); +typedef EnnReturn (*EnnOpenModelFromMemory_fn)( + const char* va, + const uint32_t size, + EnnModelId* model_id); +typedef EnnReturn (*EnnSetFastIpc_fn)(void); +typedef EnnReturn (*EnnUnsetFastIpc_fn)(void); +typedef EnnReturn (*EnnExecuteModelFastIpc_fn)( + const EnnModelId model_id, + int client_sleep_usec); +typedef EnnReturn (*EnnExecuteModel_fn)(const EnnModelId model_id); +typedef EnnReturn (*EnnExecuteModelWithSessionIdAsync_fn)( + const EnnModelId model_id, + const int session_id); +typedef EnnReturn (*EnnCloseModel_fn)(const EnnModelId model_id); +typedef EnnReturn (*EnnDeinitialize_fn)(void); +typedef EnnReturn (*EnnAllocateAllBuffers_fn)( + const EnnModelId model_id, + EnnBufferPtr** out_buffers, + NumberOfBuffersInfo* out_buffers_info); +typedef EnnReturn (*EnnAllocateAllBuffersWithSessionId_fn)( + const EnnModelId model_id, + EnnBufferPtr** out_buffers, + NumberOfBuffersInfo* out_buffers_info, + const int session_id, + const bool do_commit); +typedef EnnReturn (*EnnExecuteModelWithSessionIdWait_fn)( + const EnnModelId model_id, + const int session_id); +typedef EnnReturn (*EnnBufferCommit_fn)(const EnnModelId model_id); +typedef EnnReturn (*EnnGetBuffersInfo_fn)( + const EnnModelId model_id, + NumberOfBuffersInfo* buffers_info); +typedef EnnReturn ( + *EnnReleaseBuffers_fn)(EnnBufferPtr* buffers, const int32_t numOfBuffers); + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/runtime/enn_backend.cpp b/backends/samsung/runtime/enn_backend.cpp new file mode 100644 index 00000000000..ec5ab89206a --- /dev/null +++ b/backends/samsung/runtime/enn_backend.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +#pragma clang diagnostic ignored "-Wglobal-constructors" + +namespace torch { +namespace executor { + +class EnnBackend final : public PyTorchBackendInterface { + public: + ~EnnBackend() = default; + + bool is_available() const override { + return true; + } + + Result init( + BackendInitContext& context, + FreeableBuffer* processed, + ArrayRef compile_specs) const override { + MemoryAllocator* runtime_allocator = context.get_runtime_allocator(); + auto executor = runtime_allocator->allocateInstance(); + + const char* binary_buf_addr = + reinterpret_cast(processed->data()); + size_t buf_size = processed->size(); + Error err = executor->initialize(binary_buf_addr, buf_size); + if (err != Error::Ok) { + ENN_LOG_ERROR("Exynos backend initialize failed."); + executor->~EnnExecutor(); + return err; + } + return executor; + } + + Error execute( + BackendExecutionContext& context, + DelegateHandle* handle, + Span args) const override { + auto executor = static_cast(handle); + + std::vector inputs; + std::vector outputs; + for (int32_t index = 0; + index < executor->getInputSize() + executor->getOutputSize(); + index++) { + ET_CHECK_OR_RETURN_ERROR( + args[index]->isTensor(), + InvalidArgument, + "Expected argument to delegate at index %u to be a Tensor, but got %" PRIu32, + index, + static_cast(args[index]->tag)); + Tensor* tensor = &args[index]->toTensor(); + enn::DataBuffer data_buffer = { + .buf_ptr_ = tensor->mutable_data_ptr(), + .size_ = tensor->nbytes()}; + if (index < executor->getInputSize()) { + inputs.push_back(data_buffer); + } else { + outputs.push_back(data_buffer); + } + } + Error err = executor->eval(inputs, outputs); + return err; + } + + void destroy(DelegateHandle* handle) const override { + if (handle != nullptr) { + auto executor = static_cast(handle); + executor->~EnnExecutor(); + } + } +}; // namespace executor + +namespace { +auto cls = EnnBackend(); +Backend backend{"EnnBackend", &cls}; +static auto success_with_compiler = register_backend(backend); +} // namespace + +} // namespace executor +} // namespace torch diff --git a/backends/samsung/runtime/enn_executor.cpp b/backends/samsung/runtime/enn_executor.cpp new file mode 100644 index 00000000000..bdada72f86f --- /dev/null +++ b/backends/samsung/runtime/enn_executor.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#include +#include +#include + +#include +#include +#include + +namespace torch { +namespace executor { +namespace enn { + +Error EnnExecutor::initialize(const char* binary_buf_addr, size_t buf_size) { + const EnnApi* enn_api_inst = EnnApi::getEnnApiInstance(); + auto ret = enn_api_inst->EnnInitialize(); + ET_CHECK_OR_RETURN_ERROR( + ret == ENN_RET_SUCCESS, Internal, "Enn initialize failed."); + + ET_LOG(Info, "Start to open model %p, %ld", binary_buf_addr, buf_size); + ret = enn_api_inst->EnnOpenModelFromMemory( + binary_buf_addr, buf_size, &model_id_); + + ET_CHECK_OR_RETURN_ERROR( + ret == ENN_RET_SUCCESS, + Internal, + "Failed to load Enn model from buffer %d", + (int)ret); + ET_LOG(Info, "Open successfully."); + NumberOfBuffersInfo buffers_info; + ret = enn_api_inst->EnnAllocateAllBuffersWithSessionId( + model_id_, &alloc_buffer_, &buffers_info, 0, true); + ET_CHECK_OR_RETURN_ERROR( + ret == ENN_RET_SUCCESS, + Internal, + "Failed to allocate buffers for model_id = 0x%" PRIX64, + model_id_); + num_of_inputs_ = buffers_info.n_in_buf; + num_of_outputs_ = buffers_info.n_out_buf; + + return Error::Ok; +} + +Error EnnExecutor::eval( + const std::vector& inputs, + const std::vector& outputs) { + const EnnApi* enn_api_inst = EnnApi::getEnnApiInstance(); + ET_CHECK_OR_RETURN_ERROR( + inputs.size() == getInputSize(), + InvalidArgument, + "Invalid number of inputs, expect %" PRIu32 " while get %ld", + getInputSize(), + inputs.size()); + ET_CHECK_OR_RETURN_ERROR( + outputs.size() == getOutputSize(), + InvalidArgument, + "Invalid number of outputs, expected %" PRIu32 " while get %ld", + getOutputSize(), + outputs.size()); + + int relative_input_index = 0; + for (const auto& input : inputs) { + EnnBufferPtr* input_buffer_ptr = alloc_buffer_; + EnnBuffer& enn_buffer = *input_buffer_ptr[relative_input_index]; + memcpy(enn_buffer.va, input.buf_ptr_, input.size_); + relative_input_index++; + } + + ENN_LOG_DEBUG("Start to execute model."); + auto ret = enn_api_inst->EnnExecuteModel(model_id_); + if (ret != ENN_RET_SUCCESS) { + ENN_LOG_ERROR("EnnExecuteModel Failed"); + return Error::Internal; + } + + EnnBufferPtr* output_buffer_ptr = alloc_buffer_ + getInputSize(); + int relative_output_index = 0; + for (const auto& output : outputs) { + EnnBuffer& enn_buffer = *output_buffer_ptr[relative_output_index]; + memcpy(output.buf_ptr_, enn_buffer.va, output.size_); + relative_output_index++; + } + return Error::Ok; +} + +EnnExecutor::~EnnExecutor() { + const EnnApi* enn_api_inst = EnnApi::getEnnApiInstance(); + NumberOfBuffersInfo buffers_info; + if (enn_api_inst->EnnGetBuffersInfo(model_id_, &buffers_info) == + ENN_RET_SUCCESS) { + const int32_t num_of_buffers = + buffers_info.n_in_buf + buffers_info.n_out_buf; + enn_api_inst->EnnReleaseBuffers(alloc_buffer_, num_of_buffers); + } + enn_api_inst->EnnCloseModel(model_id_); +} + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/runtime/enn_executor.h b/backends/samsung/runtime/enn_executor.h new file mode 100644 index 00000000000..902b420a036 --- /dev/null +++ b/backends/samsung/runtime/enn_executor.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include +#include + +#include + +namespace torch { +namespace executor { +namespace enn { + +struct DataBuffer { + void* buf_ptr_ = nullptr; + size_t size_ = 0; +}; + +class EnnExecutor { + public: + Error initialize(const char* binary_buf_addr, size_t buf_size); + + Error eval( + const std::vector& inputs, + const std::vector& outputs); + + int32_t getInputSize() const { + return num_of_inputs_; + } + int32_t getOutputSize() const { + return num_of_outputs_; + } + + ~EnnExecutor(); + + private: + EnnModelId model_id_; + EnnBufferPtr* alloc_buffer_ = nullptr; + int32_t num_of_inputs_ = 0; + int32_t num_of_outputs_ = 0; +}; + +} // namespace enn +} // namespace executor +} // namespace torch diff --git a/backends/samsung/runtime/enn_type.h b/backends/samsung/runtime/enn_type.h new file mode 100644 index 00000000000..4b91fe2d08a --- /dev/null +++ b/backends/samsung/runtime/enn_type.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include + +typedef uint64_t EnnModelId; // higher 32-bits are initialized with zero + +typedef enum _EnnReturn { + ENN_RET_SUCCESS = 0, + ENN_RET_FAILED, + ENN_RET_IO, + ENN_RET_INVAL, + ENN_RET_FILTERED, + ENN_RET_CANCELED, + ENN_RET_MEM_ERR, + ENN_RET_SIZE, + ENN_RET_FAILED_TIMEOUT_ENN = 10, + ENN_RET_FAILED_TIMEOUT_DD, + ENN_RET_FAILED_TIMEOUT_FW, + ENN_RET_FAILED_TIMEOUT_HW_NOTRECOVERED, + ENN_RET_FAILED_TIMEOUT_HW_RECOVERED, + ENN_RET_FAILED_SERVICE_NULL, + ENN_RET_FAILED_RESOURCE_BUSY, + ENN_RET_NOT_SUPPORTED = 0xFF, +} EnnReturn; + +/* NOTE: should be sync with types.hal */ +typedef enum _enn_buf_dir_e { + ENN_DIR_IN, + ENN_DIR_OUT, + ENN_DIR_EXT, + ENN_DIR_NONE, + ENN_DIR_SIZE +} enn_buf_dir_e; + +// data structure for user buffer +typedef struct _ennBuffer { + void* va; + uint32_t size; // requested size + uint32_t offset; +} EnnBuffer; + +typedef EnnBuffer* EnnBufferPtr; + +typedef struct _NumberOfBuffersInfo { + uint32_t n_in_buf; + uint32_t n_out_buf; +} NumberOfBuffersInfo; diff --git a/backends/samsung/runtime/logging.cpp b/backends/samsung/runtime/logging.cpp new file mode 100644 index 00000000000..04ab22ddc0d --- /dev/null +++ b/backends/samsung/runtime/logging.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#include +#include +#ifdef __ANDROID__ +#include +#endif +#include + +void EnnLogImpl(ENN_LOG_LEVEL log_level, const char* format, ...) { + va_list args; + va_start(args, format); +#ifdef __ANDROID__ + int android_severity = ANDROID_LOG_DEBUG; + switch (log_level) { + case ENN_LOG_LEVEL::DEBUG: + android_severity = ANDROID_LOG_DEBUG; + break; + case ENN_LOG_LEVEL::INFO: + android_severity = ANDROID_LOG_INFO; + break; + case ENN_LOG_LEVEL::WARNING: + android_severity = ANDROID_LOG_WARN; + break; + case ENN_LOG_LEVEL::ERROR: + android_severity = ANDROID_LOG_ERROR; + break; + default: + android_severity = ANDROID_LOG_UNKNOWN; + break; + } + __android_log_print(android_severity, "[Exynos ExecuTorch]", format, args); +#endif + const char* serverity_name; + switch (log_level) { + case ENN_LOG_LEVEL::DEBUG: + serverity_name = "DEBUG"; + break; + case ENN_LOG_LEVEL::INFO: + serverity_name = "INFO"; + break; + case ENN_LOG_LEVEL::WARNING: + serverity_name = "WARN"; + break; + case ENN_LOG_LEVEL::ERROR: + serverity_name = "ERROR"; + break; + default: + serverity_name = "UNKNOWN"; + break; + } + fprintf(stderr, "[%s][Exynos ExecuTorch]", serverity_name); + vfprintf(stderr, format, args); + fputc('\n', stderr); + va_end(args); +} + +#if defined(NDEBUG) +ENN_LOG_LEVEL EnnLogManager::output_log_level_ = ENN_LOG_LEVEL::INFO; +#else +ENN_LOG_LEVEL EnnLogManager::output_log_level_ = ENN_LOG_LEVEL::DEBUG; +#endif +std::mutex EnnLogManager::log_mutex_; + +void EnnLogManager::setLogLevel(ENN_LOG_LEVEL log_level) { + std::lock_guard lgd(log_mutex_); + output_log_level_ = log_level; +} + +bool EnnLogManager::isLogOn(ENN_LOG_LEVEL log_level) { + std::lock_guard lgd(log_mutex_); + return static_cast(log_level) >= static_cast(output_log_level_); +} diff --git a/backends/samsung/runtime/logging.h b/backends/samsung/runtime/logging.h new file mode 100644 index 00000000000..6271017e126 --- /dev/null +++ b/backends/samsung/runtime/logging.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ +#pragma once + +#include + +#define FILENAME(fp) (strrchr(fp, '/') ? strrchr(fp, '/') + 1 : fp) + +enum class ENN_LOG_LEVEL { + DEBUG = 0, + INFO = 1, + WARNING = 2, + ERROR = 3, + MAX_LEVEL = ERROR, +}; + +class EnnLogManager { + public: + static void setLogLevel(ENN_LOG_LEVEL log_level); + static bool isLogOn(ENN_LOG_LEVEL log_level); + + private: + static ENN_LOG_LEVEL output_log_level_; + static std::mutex log_mutex_; +}; + +void EnnLogImpl(ENN_LOG_LEVEL log_level, const char* format, ...); + +#ifdef NDEBUG +#define __LOGX(log_level, fmt, ...) \ + if (EnnLogManager::isLogOn(log_level)) { \ + EnnLogImpl(log_level, fmt, ##__VA_ARGS__); \ + } +#else +#define __LOGX(log_level, fmt, ...) \ + if (EnnLogManager::isLogOn(log_level)) { \ + EnnLogImpl( \ + log_level, \ + "[%s:%d]: " fmt, \ + FILENAME(__FILE__), \ + __LINE__, \ + ##__VA_ARGS__); \ + } +#endif + +#define ENN_LOG_DEBUG(fmt, ...) __LOGX(ENN_LOG_LEVEL::DEBUG, fmt, ##__VA_ARGS__) +#define ENN_LOG_INFO(fmt, ...) __LOGX(ENN_LOG_LEVEL::INFO, fmt, ##__VA_ARGS__) +#define ENN_LOG_WARN(fmt, ...) \ + __LOGX(ENN_LOG_LEVEL::WARNING, fmt, ##__VA_ARGS__) +#define ENN_LOG_ERROR(fmt, ...) __LOGX(ENN_LOG_LEVEL::ERROR, fmt, ##__VA_ARGS__) diff --git a/backends/samsung/serialization/compile_options.py b/backends/samsung/serialization/compile_options.py new file mode 100644 index 00000000000..1ad2350cfeb --- /dev/null +++ b/backends/samsung/serialization/compile_options.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import json +import os +import tempfile + +from dataclasses import dataclass +from enum import IntEnum, unique + +import pkg_resources +from executorch.exir._serialize._dataclass import _DataclassEncoder +from executorch.exir._serialize._flatbuffer import _flatc_compile +from executorch.exir.backend.backend_details import CompileSpec + + +@unique +class SamsungChipset(IntEnum): + UNDEFINED_CHIP_V = 0 + E9955 = 9955 + + +@dataclass +class EnnExecuTorchOptions: + chipset: SamsungChipset = SamsungChipset.UNDEFINED_CHIP_V + + +ENN_COMPILE_OPTION_TITLE = "enn_compile_options" +COMPILE_OPTION_SCHEMA_NAME = "compile_options_def" + + +def gen_samsung_backend_compile_spec_core(options: EnnExecuTorchOptions) -> CompileSpec: + with tempfile.TemporaryDirectory() as d: + # schema + schema_path = os.path.join(d, "{}.fbs".format(COMPILE_OPTION_SCHEMA_NAME)) + with open(schema_path, "wb") as schema_file: + schema_file.write( + pkg_resources.resource_string( + __name__, "{}.fbs".format(COMPILE_OPTION_SCHEMA_NAME) + ) + ) + # dump json + json_path = os.path.join(d, "{}.json".format(COMPILE_OPTION_SCHEMA_NAME)) + enn_options_json = json.dumps(options, cls=_DataclassEncoder, indent=4) + with open(json_path, "wb") as json_file: + json_file.write(enn_options_json.encode("ascii")) + + _flatc_compile(d, schema_path, json_path) + output_path = os.path.join(d, "{}.eeto".format(COMPILE_OPTION_SCHEMA_NAME)) + with open(output_path, "rb") as output_file: + return CompileSpec(ENN_COMPILE_OPTION_TITLE, output_file.read()) + + +def gen_samsung_backend_compile_spec( + chipset: str, +): + """ + A function to generate an ExecuTorch binary for Samsung Backend. + + Attributes: + chipset (str): chipset name in SamsungChipset. For example, E9955 or e9955 both work. + + Returns: + CompileSpec: key is COMPILE_OPTION_SCHEMA_NAME, value is serialization binary of fb schema + """ + option = EnnExecuTorchOptions( + getattr(SamsungChipset, chipset.upper()), + ) + + return gen_samsung_backend_compile_spec_core(option) diff --git a/backends/samsung/serialization/compile_options_def.fbs b/backends/samsung/serialization/compile_options_def.fbs new file mode 100644 index 00000000000..d38c2772715 --- /dev/null +++ b/backends/samsung/serialization/compile_options_def.fbs @@ -0,0 +1,23 @@ +//============================================================================ +// +// Copyright (c) 2025 Samsung Electronics. All Rights Reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. +// +//============================================================================ + +namespace enn_option; + +// Identifier of a valid executor schema. +file_identifier "EETO"; +// Extension of written files. +file_extension "eeto"; + + +table EnnExecuTorchOptions { + // The version of chipset. Specify the soc to compile and execute model. + chipset: int; +} + +root_type EnnExecuTorchOptions; diff --git a/backends/samsung/serialization/enn_graph_schema.py b/backends/samsung/serialization/enn_graph_schema.py new file mode 100644 index 00000000000..7e74182f9d7 --- /dev/null +++ b/backends/samsung/serialization/enn_graph_schema.py @@ -0,0 +1,85 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging +from typing import Dict, List, Optional, Union + +import executorch.backends.samsung.python.PyGraphWrapperAdaptor as PyGraphWrapper + +import numpy as np + +import torch + + +class EnnGraph: + def __init__(self): + # default + self.version = "0.6.0" + self.graph = PyGraphWrapper.PyEnnGraphWrapper() + self.graph.Init() + + self.inputs = [] + self.outputs = [] + + def define_op( + self, + name, + type, + input_ids: List[int], + output_ids: List[int], + params: Optional[Dict] = None, + ): + op = PyGraphWrapper.PyEnnOpWrapper(name, type, input_ids, output_ids) + + if params is not None: + assert isinstance(params, dict), "Please pass op params as dict type." + for key in params: + py_param_wrapper = PyGraphWrapper.OpParamWrapper(key) + if isinstance(params[key], (list, tuple)): + py_param_wrapper.SetVectorValue(params[key]) + elif isinstance(params[key], str): + py_param_wrapper.SetStringValue(params[key]) + elif isinstance(params[key], (int, float, bool)): + py_param_wrapper.SetScalarValue(params[key]) + else: + logging.error("Unsupported param type.") + op.AddOpParam(py_param_wrapper) + + self.graph.DefineOpNode(op) + + def define_tensor( + self, + name: str, + shape: List, + data_type: str, + tensor_type: str, + data: Optional[Union[np.ndarray, torch.Tensor]] = None, + ) -> int: + layout = "NCHW" if len(shape) == 4 else "UNDEFINED" + + tensor = PyGraphWrapper.PyEnnTensorWrapper(name, shape, data_type, layout) + + if data is not None: + if isinstance(data, torch.Tensor): + data = data.detach().numpy() + tensor.AddData(data) + + id = self.graph.DefineTensor(tensor) + + if tensor_type == "INPUT": + self.inputs.append(id) + elif tensor_type == "OUTPUT": + self.outputs.append(id) + + return id + + def finish(self): + self.graph.SetGraphInputTensors(self.inputs) + self.graph.SetGraphOutputTensors(self.outputs) + self.graph.FinishBuild() + + def serialize(self): + return self.graph.Serialize() diff --git a/backends/samsung/test/ops/test_add.py b/backends/samsung/test/ops/test_add.py new file mode 100644 index 00000000000..8b1b4b4a770 --- /dev/null +++ b/backends/samsung/test/ops/test_add.py @@ -0,0 +1,62 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + + +import unittest + +import torch + +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.backends.samsung.test.tester import SamsungTester + + +class Add(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + return x + y + + +class AddConstant(torch.nn.Module): + def __init__(self, constant) -> None: + super().__init__() + self.constant = constant + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x + self.constant + + +class TestAdd(unittest.TestCase): + def _test(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec("E9955")], + ) + ( + tester.export() + .check_count({"torch.ops.aten.add.Tensor": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_add_Tensor"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + ) + + def test_fp32_simple_add(self): + inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8)) + self._test(Add(), inputs) + + def test_fp32_const_add(self): + inputs = (torch.randn(1, 3, 8, 8),) + self._test(AddConstant(torch.randn(1, 3, 8, 8)), inputs) + + def test_fp32_add_broadcast(self): + inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8)) + self._test(Add(), inputs) diff --git a/backends/samsung/test/ops/test_avg_pool2d.py b/backends/samsung/test/ops/test_avg_pool2d.py new file mode 100644 index 00000000000..2614516dd0d --- /dev/null +++ b/backends/samsung/test/ops/test_avg_pool2d.py @@ -0,0 +1,66 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + + +import unittest + +import torch + +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.backends.samsung.test.tester import SamsungTester + + +class AvgPool2d(torch.nn.Module): + def __init__( + self, + kernel_size=2, + stride=1, + padding=0, + ) -> None: + super().__init__() + self.avg_pool = torch.nn.AvgPool2d( + kernel_size=kernel_size, + stride=stride, + padding=padding, + count_include_pad=False, + ceil_mode=False, + ).to(torch.float) + + def get_example_inputs(self) -> tuple[torch.Tensor]: + input_1 = torch.randn(1, 16, 24, 24) + return (input_1,) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.avg_pool(x) + + +class TestAvgPool2d(unittest.TestCase): + def _test(self, module: torch.nn.Module): + tester = SamsungTester( + module, + module.get_example_inputs(), + [gen_samsung_backend_compile_spec("E9955")], + ) + ( + tester.export() + .check_count({"torch.ops.aten.avg_pool2d.default": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_avg_pool2d_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + ) + + def test_fp32_avg_pool2d(self): + self._test(AvgPool2d()) + + def test_fp32_avg_pool2d_with_stride(self): + self._test(AvgPool2d(stride=2)) + + def test_fp32_avg_pool2d_with_kernel_size(self): + self._test(AvgPool2d(kernel_size=4)) diff --git a/backends/samsung/test/ops/test_conv2d.py b/backends/samsung/test/ops/test_conv2d.py new file mode 100644 index 00000000000..e832a5713dc --- /dev/null +++ b/backends/samsung/test/ops/test_conv2d.py @@ -0,0 +1,98 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + + +import unittest + +import torch + +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.backends.samsung.test.tester import SamsungTester + + +class Conv2d(torch.nn.Module): + def __init__( + self, + in_channels=3, + out_channels=16, + stride=(2, 2), + padding=(1, 1), + dilation=(1, 1), + groups=1, + bias=True, + ) -> None: + super().__init__() + self.conv = torch.nn.Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups, + bias=bias, + ).to(torch.float) + + self.in_channels = in_channels + + def get_example_inputs(self) -> tuple[torch.Tensor]: + input_1 = torch.randn(1, self.in_channels, 24, 24) + return (input_1,) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.conv(x) + + +class TransposeConv2d(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + self.conv = torch.nn.ConvTranspose2d( + in_channels=32, + out_channels=8, + kernel_size=2, + stride=(2, 2), + padding=(0, 0), + dilation=(1, 1), + bias=True, + ) + + def get_example_inputs(self) -> tuple[torch.Tensor]: + input_1 = torch.randn(1, 32, 24, 24) + return (input_1,) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.conv(x) + + +class TestConv2d(unittest.TestCase): + def _test(self, module: torch.nn.Module): + tester = SamsungTester( + module, + module.get_example_inputs(), + [gen_samsung_backend_compile_spec("E9955")], + ) + ( + tester.export() + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_convolution_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + ) + + def test_fp32_conv2d_without_bias(self): + self._test(Conv2d(bias=False)) + + def test_fp32_conv2d_with_bias(self): + self._test(Conv2d(bias=True)) + + def test_fp32_depthwise_conv2d(self): + self._test(Conv2d(in_channels=8, out_channels=8, groups=8)) + + def test_fp32_transpose_conv2d(self): + self._test(TransposeConv2d()) diff --git a/backends/samsung/test/ops/test_max_pool2d.py b/backends/samsung/test/ops/test_max_pool2d.py new file mode 100644 index 00000000000..9ead91b2bff --- /dev/null +++ b/backends/samsung/test/ops/test_max_pool2d.py @@ -0,0 +1,71 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + + +import unittest + +import torch + +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.backends.samsung.test.tester import SamsungTester + + +class MaxPool2d(torch.nn.Module): + def __init__( + self, + kernel_size=2, + stride=1, + padding=0, + dilation=1, + ) -> None: + super().__init__() + self.max_pool = torch.nn.MaxPool2d( + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + return_indices=False, + ceil_mode=False, + ).to(torch.float) + + def get_example_inputs(self) -> tuple[torch.Tensor]: + input_1 = torch.randn(1, 16, 24, 24) + return (input_1,) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.max_pool(x) + + +class TestMaxPool2d(unittest.TestCase): + def _test(self, module: torch.nn.Module): + tester = SamsungTester( + module, + module.get_example_inputs(), + [gen_samsung_backend_compile_spec("E9955")], + ) + ( + tester.export() + .check_count({"torch.ops.aten.max_pool2d.default": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + ) + + def test_fp32_max_pool2d(self): + self._test(MaxPool2d()) + + def test_fp32_max_pool2d_with_padding(self): + self._test(MaxPool2d(padding=1)) + + def test_fp32_max_pool2d_with_kernel_size(self): + self._test(MaxPool2d(kernel_size=4)) + + def test_fp32_max_pool2d_with_dilation(self): + self._test(MaxPool2d(dilation=2)) diff --git a/backends/samsung/test/ops/test_mul.py b/backends/samsung/test/ops/test_mul.py new file mode 100644 index 00000000000..57d13c68b87 --- /dev/null +++ b/backends/samsung/test/ops/test_mul.py @@ -0,0 +1,62 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + + +import unittest + +import torch + +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.backends.samsung.test.tester import SamsungTester + + +class Mul(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + + def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + return x * y + + +class MulConstant(torch.nn.Module): + def __init__(self, constant) -> None: + super().__init__() + self.constant = constant + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x * self.constant + + +class TestMul(unittest.TestCase): + def _test(self, module: torch.nn.Module, inputs): + tester = SamsungTester( + module, + inputs, + [gen_samsung_backend_compile_spec("E9955")], + ) + ( + tester.export() + .check_count({"torch.ops.aten.mul.Tensor": 1}) + .to_edge_transform_and_lower() + .check_not(["executorch_exir_dialects_edge__ops_aten_mul_Tensor"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + ) + + def test_fp32_simple_mul(self): + inputs = (torch.randn(1, 3, 8, 8), torch.randn(1, 3, 8, 8)) + self._test(Mul(), inputs) + + def test_fp32_const_mul(self): + inputs = (torch.randn(1, 3, 8, 8),) + self._test(MulConstant(torch.randn(1, 3, 8, 8)), inputs) + + def test_fp32_mul_broadcast(self): + inputs = (torch.randn(1, 1, 8, 8), torch.randn(1, 3, 8, 8)) + self._test(Mul(), inputs) diff --git a/backends/samsung/test/tester/__init__.py b/backends/samsung/test/tester/__init__.py new file mode 100644 index 00000000000..8cd333b2ac7 --- /dev/null +++ b/backends/samsung/test/tester/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. + +from executorch.backends.samsung.test.tester.samsung_tester import SamsungTester + + +__all__ = ["SamsungTester"] diff --git a/backends/samsung/test/tester/samsung_tester.py b/backends/samsung/test/tester/samsung_tester.py new file mode 100644 index 00000000000..b750fb26a96 --- /dev/null +++ b/backends/samsung/test/tester/samsung_tester.py @@ -0,0 +1,83 @@ +# Copyright (c) Samsung Electronics Co. LTD +# All rights reserved +# +# Licensed under the BSD License (the "License"); you may not use this file +# except in compliance with the License. See the license file in the root +# directory of this source tree for more details. +from typing import List, Optional, Tuple + +import executorch.backends.test.harness.stages as BaseStages +import torch +from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner + +from executorch.backends.test.harness import Tester as TesterBase +from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower +from executorch.exir.backend.backend_details import CompileSpec + +from torch.export import ExportedProgram + + +class Export(BaseStages.Export): + pass + + +class Quantize(BaseStages.Quantize): + pass + + +class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower): + def __init__( + self, + compile_specs: Optional[List[CompileSpec]] = None, + edge_compile_config: Optional[EdgeCompileConfig] = None, + ): + compile_specs = compile_specs or [] + self.partitioners = [EnnPartitioner(compile_specs=compile_specs)] + self.edge_compile_config = edge_compile_config or EdgeCompileConfig( + _skip_dim_order=True, _check_ir_validity=False + ) + self.edge_dialect_program = None + + def run( + self, artifact: ExportedProgram, inputs=None, generate_etrecord: bool = False + ) -> None: + self.edge_dialect_program = to_edge_transform_and_lower( + artifact, + partitioner=self.partitioners, + compile_config=self.edge_compile_config, + ) + + +class ToExecutorch(BaseStages.ToExecutorch): + pass + + +class SamsungTester(TesterBase): + def __init__( + self, + module: torch.nn.Module, + example_inputs: Tuple[torch.Tensor], + compile_specs: Optional[List[CompileSpec]] = None, + ): + module.eval() + + super().__init__( + module=module, + example_inputs=example_inputs, + dynamic_shapes=None, + ) + + self.original_module = module + self.exported_module = module + self.example_inputs = example_inputs + self.compile_specs = compile_specs + + def to_edge_transform_and_lower( + self, + edge_compile_config: Optional[EdgeCompileConfig] = None, + ): + to_edge_transform_and_lower_stage = ToEdgeTransformAndLower( + self.compile_specs, edge_compile_config + ) + + return super().to_edge_transform_and_lower(to_edge_transform_and_lower_stage) diff --git a/backends/samsung/utils/export_utils.py b/backends/samsung/utils/export_utils.py new file mode 100644 index 00000000000..a9b7de7c5ae --- /dev/null +++ b/backends/samsung/utils/export_utils.py @@ -0,0 +1,34 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Optional, Tuple + +import executorch.exir as exir +import torch +from executorch.backends.samsung.partition.enn_partitioner import EnnPartitioner +from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform +from executorch.exir import EdgeCompileConfig +from executorch.exir.backend.backend_details import CompileSpec +from executorch.exir.program._program import to_edge_transform_and_lower + + +def to_edge_transform_and_lower_to_enn( + module: torch.nn.Module, + inputs: Tuple[torch.Tensor], + compile_specs: Optional[CompileSpec] = None, +) -> exir.ExecutorchProgramManager: + assert ( + compile_specs is not None + ), "Please provide compile specifications for enn backend" + prog = torch.export.export(module, inputs) + + ahead_pass_list = [RemoveCloneOpsTransform()] + return to_edge_transform_and_lower( + prog, + ahead_pass_list, + {"forward": [EnnPartitioner(compile_specs)]}, + compile_config=EdgeCompileConfig(_skip_dim_order=True), + ) diff --git a/backends/samsung/utils/utils.py b/backends/samsung/utils/utils.py new file mode 100644 index 00000000000..5da9808f38f --- /dev/null +++ b/backends/samsung/utils/utils.py @@ -0,0 +1,37 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import List + +import torch + +from executorch.backends.transforms.utils import is_param_node +from executorch.exir.backend.backend_details import CompileSpec + +from torch.export.exported_program import ExportedProgram + + +def get_compile_spec( + compile_specs: List[CompileSpec], spec_name: str, required=False +) -> CompileSpec: + for spec in compile_specs: + if spec_name == spec.key: + return spec + assert not required, f"Require {spec_name} but it doesn't exist." + + +def is_graph_input(exported_program: ExportedProgram, node: torch.fx.Node) -> bool: + return node.op == "placeholder" and not is_param_node(exported_program, node) + + +def is_graph_output(node: torch.fx.Node) -> bool: + # skip getitem node + for user in node.users.keys(): + if user.op == "output" or ( + user.target.__name__ == "getitem" and is_graph_output(user) + ): + return True + return False diff --git a/examples/README.md b/examples/README.md index 3dbdacaac68..096a9850b29 100644 --- a/examples/README.md +++ b/examples/README.md @@ -21,6 +21,7 @@ examples | └── mps # Contains end-to-end demos of MPS backend ├── arm # Contains demos of the Arm TOSA and Ethos-U NPU flows ├── qualcomm # Contains demos of Qualcomm QNN backend +â├── samsung # Contains demos of Samsung Exynos backend ├── cadence # Contains demos of exporting and running a simple model on Xtensa DSPs ├── third-party # Third-party libraries required for working on the demos └── README.md # This file @@ -69,6 +70,10 @@ The [`arm`](arm) directory contains scripts to help you run a PyTorch model on a You will find demos of [ExecuTorch QNN Backend](qualcomm) in the [`qualcomm`](qualcomm) directory. +### Exynos Backend + +You will find demos of [ExecuTorch Exynos Backend](samsung) in the [`samsung`](samsung) directory. + ### Cadence HiFi4 DSP The [`Cadence`](cadence) directory hosts a demo that showcases the process of exporting and executing a model on Xtensa Hifi4 DSP. You can utilize [this tutorial](../docs/source/backends-cadence.md) to guide you in configuring the demo and running it. diff --git a/examples/samsung/README.md b/examples/samsung/README.md new file mode 100644 index 00000000000..426dce40670 --- /dev/null +++ b/examples/samsung/README.md @@ -0,0 +1,60 @@ +# Exynos backend Examples + +This directory contains examples for some AI models. + +Please make sure you have built the library and executable before +you start, if you have no idea how to build, please refer to [backend README](../../backends/samsung/README.md). + +## Environment +We set up `PYTHONPATH` because it's easier to develop and import executorch Python APIs. +Users might also build and install executorch package as usual python package. +```bash +export PYTHONPATH=${EXECUTORCH_ROOT}/.. +``` + +Note: Since we set `PYTHONPATH`, we may have issue with finding `program.fbs` and `scalar_type.fbs` +when we export a model. A workaround is that we copy them to directory `${EXECUTORCH_ROOT}/exir/_serialize/`. +We can find the files in `${EXECUTORCH_ROOT}/schema` or in +`${EXECUTORCH_ROOT}/pip-out/lib.linux-x86_64-cpython-310/executorch/exir/_serialize`. + +## Device +Prepare an android phone with samsung exynos chip. Use `adb` to connect with the mobile phone. + +Check the chip's version, when lower the model, set the corresponding chip version. + +## Lowering + +Before running an example, please copy python artifacts to target directory `PYTHON_TARGET_DIR`. +If you use `build.sh` script to compile samsung backend, please skip the copy step. + +Set up `PYTHON_TARGET_DIR` to `${EXECUTORCH_ROOT}/backends/samsung/python`. +```bash +cp -rf ${EXECUTORCH_ROOT}/build_x86_64/backends/samsung/Py*.so ${PYTHON_TARGET_DIR} +cp -rf ${EXYNOS_AI_LITECORE_PATH}/python/snc_py_api*.so ${PYTHON_TARGET_DIR} +``` + +Take `EXECUTORCH_ROOT` as work directory and here is an example for ic3. +```bash +python -m executorch.examples.samsung.aot_compiler --chipset E9955 -m ic3 --output_dir ic3_artifact +``` + +## Execution + +After lowering, we could get a pte model and then run it on mobile phone. + +#### Step 1: Push required ENN libraries and executor runner to device +```bash +DEVICE_DIR=/data/local/tmp/executorch +adb shell mkdir ${DEVICE_DIR} +adb push ${EXECUTORCH_ROOT}/cmake-android-out/backends/samsung/enn_executor_runner ${DEVICE_DIR} +``` + +#### Step 2: Indicate dynamic linkers and execute model +```bash +adb push ./ic3_exynos_fp32.pte ${DEVICE_DIR} +adb shell "cd ${DEVICE_DIR} \ + && export LD_LIBRARY_PATH=${DEVICE_DIR} \ + && ./enn_executor_runner -model ./ic3_exynos_fp32.pte -input ./ic3_input_0.bin --output_path ." +``` + +`enn_executor_runner` has more usages, please refer to the help message. diff --git a/examples/samsung/aot_compiler.py b/examples/samsung/aot_compiler.py new file mode 100644 index 00000000000..5b092d3d9ac --- /dev/null +++ b/examples/samsung/aot_compiler.py @@ -0,0 +1,97 @@ +# Copyright (c) 2025 Samsung Electronics Co. LTD +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import argparse +import collections +import logging +import os + +import torch + +from executorch.backends.samsung.serialization.compile_options import ( + gen_samsung_backend_compile_spec, +) +from executorch.backends.samsung.utils.export_utils import ( + to_edge_transform_and_lower_to_enn, +) +from executorch.exir import ExecutorchBackendConfig +from executorch.extension.export_util.utils import save_pte_program + +from ..models import MODEL_NAME_TO_MODEL +from ..models.model_factory import EagerModelFactory + +FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s" +logging.basicConfig(level=logging.INFO, format=FORMAT) + +SUPPORT_MODEL_NAMES = ["mv2", "ic3", "resnet18", "resnet50"] + + +def save_tensors(tensors, prefix, artifact_dir): + if isinstance(tensors, tuple): + for index, output in enumerate(tensors): + save_path = prefix + "_" + str(index) + ".bin" + output.detach().numpy().tofile(os.path.join(artifact_dir, save_path)) + elif isinstance(tensors, torch.Tensor): + tensors.detach().numpy().tofile(os.path.join(artifact_dir, prefix + ".bin")) + elif isinstance(tensors, collections.OrderedDict): + for index, output in enumerate(tensors.values()): + save_path = prefix + "_" + str(index) + ".bin" + output.detach().numpy().tofile(os.path.join(artifact_dir, save_path)) + else: + logging.warning("Unsupported type (", type(tensors), ") skip saving tensor. ") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument( + "-c", + "--chipset", + required=True, + help="Samsung chipset, i.e. E9955, etc", + type=str, + ) + parser.add_argument( + "-m", + "--model_name", + required=True, + help=f"Model name. Valid ones: {SUPPORT_MODEL_NAMES}", + ) + parser.add_argument("-o", "--output_dir", default=".", help="output directory") + + args = parser.parse_args() + + if args.model_name not in SUPPORT_MODEL_NAMES: + raise RuntimeError( + f"Model {args.model_name} is not a valid name. or not support yet. " + "In the near future, more example models will be supported. Currently, " + f"Available models are {SUPPORT_MODEL_NAMES}." + ) + + model, example_inputs, dynamic_shapes, _ = EagerModelFactory.create_model( + *MODEL_NAME_TO_MODEL[args.model_name] + ) + assert ( + dynamic_shapes is None + ), "enn backend doesn't support dynamic shapes currently." + + model = model.eval() + outputs = model(*example_inputs) + + compile_specs = [gen_samsung_backend_compile_spec(args.chipset)] + edge = to_edge_transform_and_lower_to_enn( + model, example_inputs, compile_specs=compile_specs + ) + + exec_prog = edge.to_executorch( + config=ExecutorchBackendConfig(extract_delegate_segments=True) + ) + + model_name = f"{args.model_name}_exynos_fp32" + save_pte_program(exec_prog, model_name, args.output_dir) + + save_tensors(example_inputs, f"{args.model_name}_input", args.output_dir) + save_tensors(outputs, f"{args.model_name}_output", args.output_dir) diff --git a/examples/samsung/executor_runner/enn_executor_runner.cpp b/examples/samsung/executor_runner/enn_executor_runner.cpp new file mode 100644 index 00000000000..5facd6301f3 --- /dev/null +++ b/examples/samsung/executor_runner/enn_executor_runner.cpp @@ -0,0 +1,277 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * Copyright (c) 2025 Samsung Electronics Co. LTD + * All rights reserved + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + * + */ + +/** + * @file + * + * This tool can run ExecuTorch model files with Enn runtime. + * It assumes all inputs and output are fp32, please give a list for input + * files. And Enn backends is going to inference, and output results. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static uint8_t method_allocator_pool[4 * 1024U * 1024U]; // 4 MB + +DEFINE_string(model, "model.pte", "Model serialized in flatbuffer format."); +DEFINE_string( + input, + "", + "Input file path, support multiple inputs: input_1 input_2 ..."); + +DEFINE_string(output_path, "", "Output Execution results to target directory."); + +using namespace torch::executor; +using torch::executor::util::FileDataLoader; + +std::vector split(std::string str, char delimiter = ' ') { + std::vector result; + std::stringstream ss(str); + std::string temp; + while (std::getline(ss, temp, delimiter)) { + if (!temp.empty()) { + result.push_back(temp); + } + } + return result; +} + +class DataReader { + public: + typedef std::vector data_t; + + DataReader(size_t size) : data_set_(size) {} + + void read(const std::string file_path) { + ET_CHECK(index_ < data_set_.size()); + data_t& data = data_set_[index_]; + std::ifstream input_file(file_path.c_str(), std::ios::binary); + ET_CHECK(input_file.is_open()); + input_file.seekg(0, std::ios::end); + data.resize(input_file.tellg()); + input_file.seekg(0); + input_file.read(reinterpret_cast(data.data()), data.size()); + input_file.close(); + ++index_; + } + + void* get(int32_t index) { + ET_CHECK(index < data_set_.size()); + return data_set_[index].data(); + } + + size_t nbytes(int32_t index) { + ET_CHECK(index < data_set_.size()); + return data_set_[index].size(); + } + + ~DataReader() = default; + + private: + std::vector data_set_; + int32_t index_ = 0; +}; + +void saveOutput(const exec_aten::Tensor& tensor, int32_t output_index) { + if (FLAGS_output_path.empty()) { + return; + } + auto output_file_name = + FLAGS_output_path + "/output_" + std::to_string(output_index) + ".bin"; + std::ofstream fout(output_file_name.c_str(), std::ios::binary); + ET_CHECK_MSG( + fout.is_open(), + "Directory or have no visit permission: %s", + FLAGS_output_path.c_str()); + fout.write(tensor.const_data_ptr(), tensor.nbytes()); + fout.close(); +} + +int main(int argc, char** argv) { + runtime_init(); + + gflags::ParseCommandLineFlags(&argc, &argv, true); + if (argc != 1) { + std::string msg = "Extra commandline args:"; + for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) { + msg += std::string(" ") + argv[i]; + } + ET_LOG(Error, "%s", msg.c_str()); + return 1; + } + + // Create a loader to get the data of the program file. There are other + // DataLoaders that use mmap() or point to data that's already in memory, and + // users can create their own DataLoaders to load from arbitrary sources. + const char* model_path = FLAGS_model.c_str(); + Result loader = FileDataLoader::from(model_path); + ET_CHECK_MSG( + loader.ok(), + "FileDataLoader::from() failed: 0x%" PRIx32, + (uint32_t)loader.error()); + + // Parse the program file. This is immutable, and can also be reused between + // multiple execution invocations across multiple threads. + Result program = Program::load(&loader.get()); + if (!program.ok()) { + ET_LOG(Error, "Failed to parse model file %s", model_path); + return 1; + } + ET_LOG(Info, "Model file %s is loaded.", model_path); + + // Use the first method in the program. + const char* method_name = nullptr; + { + const auto method_name_result = program->get_method_name(0); + ET_CHECK_MSG(method_name_result.ok(), "Program has no methods"); + method_name = *method_name_result; + } + ET_LOG(Info, "Using method %s", method_name); + + // MethodMeta describes the memory requirements of the method. + Result method_meta = program->method_meta(method_name); + ET_CHECK_MSG( + method_meta.ok(), + "Failed to get method_meta for %s: 0x%" PRIx32, + method_name, + (uint32_t)method_meta.error()); + + // + // The runtime does not use malloc/new; it allocates all memory using the + // MemoryManger provided by the client. Clients are responsible for allocating + // the memory ahead of time, or providing MemoryAllocator subclasses that can + // do it dynamically. + // + + // The method allocator is used to allocate all dynamic C++ metadata/objects + // used to represent the loaded method. This allocator is only used during + // loading a method of the program, which will return an error if there was + // not enough memory. + // + // The amount of memory required depends on the loaded method and the runtime + // code itself. The amount of memory here is usually determined by running the + // method and seeing how much memory is actually used, though it's possible to + // subclass MemoryAllocator so that it calls malloc() under the hood (see + // MallocMemoryAllocator). + // + // In this example we use a statically allocated memory pool. + MemoryAllocator method_allocator{ + MemoryAllocator(sizeof(method_allocator_pool), method_allocator_pool)}; + + // The memory-planned buffers will back the mutable tensors used by the + // method. The sizes of these buffers were determined ahead of time during the + // memory-planning pasees. + // + // Each buffer typically corresponds to a different hardware memory bank. Most + // mobile environments will only have a single buffer. Some embedded + // environments may have more than one for, e.g., slow/large DRAM and + // fast/small SRAM, or for memory associated with particular cores. + std::vector> planned_buffers; // Owns the memory + std::vector> planned_spans; // Passed to the allocator + size_t num_memory_planned_buffers = method_meta->num_memory_planned_buffers(); + for (size_t id = 0; id < num_memory_planned_buffers; ++id) { + // .get() will always succeed because id < num_memory_planned_buffers. + size_t buffer_size = + static_cast(method_meta->memory_planned_buffer_size(id).get()); + ET_LOG(Info, "Setting up planned buffer %zu, size %zu.", id, buffer_size); + planned_buffers.push_back(std::make_unique(buffer_size)); + planned_spans.push_back({planned_buffers.back().get(), buffer_size}); + } + HierarchicalAllocator planned_memory( + {planned_spans.data(), planned_spans.size()}); + + // Assemble all of the allocators into the MemoryManager that the Executor + // will use. + MemoryManager memory_manager(&method_allocator, &planned_memory); + + // + // Load the method from the program, using the provided allocators. Running + // the method can mutate the memory-planned buffers, so the method should only + // be used by a single thread at at time, but it can be reused. + // + + Result method = program->load_method(method_name, &memory_manager); + ET_CHECK_MSG( + method.ok(), + "Loading of method %s failed with status 0x%" PRIx32, + method_name, + (uint32_t)method.error()); + + auto input_files = split(FLAGS_input); + ET_CHECK_MSG( + input_files.size() == method->inputs_size(), + "Please check the number of given input binary files"); + DataReader input_data_reader(input_files.size()); + for (const auto& input_file : input_files) { + input_data_reader.read(input_file); + } + + for (int input_index = 0; input_index < method->inputs_size(); + ++input_index) { + MethodMeta method_meta = method->method_meta(); + Result tensor_meta = method_meta.input_tensor_meta(input_index); + ET_CHECK_MSG( + input_data_reader.nbytes(input_index) == tensor_meta->nbytes(), + "Given inputs size is invalid"); + TensorImpl impl = TensorImpl( + tensor_meta->scalar_type(), + tensor_meta->sizes().size(), + const_cast(tensor_meta->sizes().data()), + input_data_reader.get(input_index), + const_cast(tensor_meta->dim_order().data())); + Error ret = method->set_input(Tensor(&impl), input_index); + ET_CHECK_MSG(ret == Error::Ok, "Failed to set input tensor: %d", ret); + } + // Allocate input tensors and set all of their elements to 1. The `inputs` + // variable owns the allocated memory and must live past the last call to + // `execute()`. + // auto inputs = util::prepare_input_tensors(*method); + + // Run the model. + ET_LOG(Info, "Start inference."); + auto start = std::chrono::high_resolution_clock::now(); + Error status = method->execute(); + auto end = std::chrono::high_resolution_clock::now(); + double elapse = + std::chrono::duration_cast(end - start) + .count() / + 1000.0; + ET_CHECK_MSG( + status == Error::Ok, + "Execution of method %s failed with status 0x%" PRIx32, + method_name, + static_cast(status)); + ET_LOG(Info, "End with elapsed time(ms): %f", elapse); + + // Get the outputs. + std::vector outputs(method->outputs_size()); + status = method->get_outputs(outputs.data(), outputs.size()); + ET_CHECK(status == Error::Ok); + + for (size_t output_index = 0; output_index < method->outputs_size(); + ++output_index) { + auto output_tensor = outputs[output_index].toTensor(); + // Save the results to given directory in order. + saveOutput(output_tensor, output_index); + } + + return 0; +}