Skip to content

feat: op perf opt #38

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 103 commits into from
Apr 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
103 commits
Select commit Hold shift + click to select a range
1dc284e
add op define xml
chraac Mar 23, 2025
d418ac8
copy qnn libs in cmake
chraac Mar 23, 2025
39a6b7b
fix htp skel path
chraac Mar 23, 2025
89178a4
add windows copy file list
chraac Mar 24, 2025
976f20c
wip
chraac Mar 24, 2025
20bd6f4
add generated package
chraac Mar 24, 2025
8f4a29d
remove unused params
chraac Mar 24, 2025
420ef68
add cmake list
chraac Mar 24, 2025
7a54a85
set qnn sdk and hexagon sdk path
chraac Mar 24, 2025
1191311
wip
chraac Mar 24, 2025
28b82ea
wip
chraac Mar 26, 2025
3b19cce
fix tools version
chraac Mar 26, 2025
4d0c4f1
fix compiling error
chraac Mar 26, 2025
120ae13
fix dims calc
chraac Mar 26, 2025
9bb8ca5
wip
chraac Mar 26, 2025
da962ca
add mulmat 2d
chraac Mar 26, 2025
330a4a0
wip
chraac Mar 27, 2025
cacc5db
reduction
chraac Mar 27, 2025
7576290
wip
chraac Mar 27, 2025
6144c92
wip
chraac Mar 27, 2025
c60778b
fix compiling error in x64
chraac Mar 27, 2025
6acb780
wip
chraac Mar 27, 2025
b087d57
fix device description in emulator
chraac Mar 28, 2025
ee25527
wip
chraac Mar 28, 2025
0d6b038
add flag
chraac Mar 28, 2025
dbd9a51
copy necessary libs
chraac Mar 28, 2025
3607f93
wip
chraac Mar 28, 2025
36e1e32
load HtpPrepare first for emulator
chraac Mar 28, 2025
1e66c16
enable custom op for 2d matrix
chraac Mar 28, 2025
206dec8
verify op config before add to node
chraac Mar 28, 2025
58417ae
Revert "verify op config before add to node"
chraac Mar 28, 2025
46ccf73
wip
chraac Mar 28, 2025
e358c46
wip
chraac Mar 28, 2025
1806254
wip
chraac Mar 28, 2025
6b5eb7f
revert tool version change
chraac Mar 29, 2025
6d41530
use hexagon sdk version 5.5.0
chraac Mar 29, 2025
d08f3ea
wip
chraac Mar 31, 2025
fe61817
move to sub dir
chraac Apr 1, 2025
9ee23eb
add hexagon npu device and server lib
chraac Apr 2, 2025
1a25db7
fix npu lib build
chraac Apr 3, 2025
3446f5e
Merge branch 'dev-refactoring' into dev-op-perf-opt
chraac Apr 3, 2025
39473d9
refactoring: rename QNNBackend enum
chraac Apr 4, 2025
8817691
fix compiling error
chraac Apr 4, 2025
c80c547
wip
chraac Apr 4, 2025
ba3e1e8
remove qnn/backend.hpp
chraac Apr 5, 2025
16ed5f9
add hexagon dsp host layer
chraac Apr 5, 2025
07d40a3
extract rpc_mem from qnn submodule
chraac Apr 6, 2025
76cf09f
fix dsp compiling error
chraac Apr 6, 2025
ce9fcbd
wip
chraac Apr 6, 2025
7c6820c
wip
chraac Apr 6, 2025
1b09909
open and lose npu device
chraac Apr 6, 2025
89166ab
split objects into separated files
chraac Apr 7, 2025
b5ad6af
fix linking error
chraac Apr 7, 2025
3e78d87
add npu_tensor
chraac Apr 7, 2025
24f3500
add host graph
chraac Apr 8, 2025
eaa1a26
map rpc buffer before usage
chraac Apr 9, 2025
7d17ab5
fix some todos
chraac Apr 9, 2025
1223923
add shared module
chraac Apr 10, 2025
a4692f9
split rpc_interface from rpc_mem
chraac Apr 10, 2025
4d7c3b8
get get_dsp_arch from device
chraac Apr 10, 2025
cf7f67a
wip
chraac Apr 10, 2025
59915a0
rename host classes
chraac Apr 10, 2025
2ec1b58
fix hexagon sdk arch getter
chraac Apr 10, 2025
db2138b
fix device open
chraac Apr 11, 2025
b4cbaa0
fix linking error
chraac Apr 11, 2025
7debd97
fix crash
chraac Apr 11, 2025
1b07d73
use tensor_data_type
chraac Apr 12, 2025
9b74855
fix npu lib crash
chraac Apr 12, 2025
28e0e4f
fix debug log print
chraac Apr 12, 2025
951ca28
skip empty graph
chraac Apr 12, 2025
b5d3dc3
wip
chraac Apr 12, 2025
50eddd6
add log
chraac Apr 12, 2025
cca931d
fix unmap fail
chraac Apr 13, 2025
e20a79e
fix tensor set
chraac Apr 14, 2025
c08ff28
remove some logs
chraac Apr 14, 2025
02702ff
flush back memory after finished
chraac Apr 14, 2025
7eea5bb
fix nb
chraac Apr 14, 2025
f1b487a
wip
chraac Apr 14, 2025
3b1296c
wip
chraac Apr 14, 2025
985ff7f
add helper function
chraac Apr 14, 2025
5214f01
impl add op
chraac Apr 14, 2025
60093aa
fix some add in test-backend-ops
chraac Apr 15, 2025
0ee7efa
add elt wise sub and mul
chraac Apr 15, 2025
cea8167
fix crash on some inplace op
chraac Apr 15, 2025
c101761
wip
chraac Apr 15, 2025
675fe92
fix elt wise op calc
chraac Apr 15, 2025
3fc9316
wip
chraac Apr 15, 2025
fb4d07f
Merge branch 'dev-refactoring' into dev-op-perf-opt
chraac Apr 16, 2025
1732e53
split mul_mat into file
chraac Apr 16, 2025
30858d6
add caps array
chraac Apr 16, 2025
3cab8b1
wip
chraac Apr 16, 2025
366b8b5
wip
chraac Apr 16, 2025
ce67613
print support/unsupport op
chraac Apr 16, 2025
c9b4391
copy lldb-server for newer android sdk
chraac Apr 16, 2025
2963c28
add tensor_spec
chraac Apr 16, 2025
0395b98
add assert
chraac Apr 17, 2025
a59252f
fix crash when loading model
chraac Apr 17, 2025
b485f19
rename cmake option
chraac Apr 18, 2025
9cd0737
fix name
chraac Apr 18, 2025
aa0eb3e
fix device memory and description
chraac Apr 18, 2025
ba02199
fix compiling error on qnn only build
chraac Apr 19, 2025
45e5205
fix some potential UBs
chraac Apr 20, 2025
acbdb97
fix comments
chraac Apr 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions ggml/include/ggml-qnn.h
Original file line number Diff line number Diff line change
@@ -1,24 +1,11 @@
#pragma once

#include "ggml-backend.h"
#include "ggml.h"

#ifdef __cplusplus
extern "C" {
#endif

#define GGML_QNN_NAME "qnn"
#define GGML_QNN_MAX_DEVICES QNN_BACKEND_COUNT

enum QNNBackend {
QNN_BACKEND_CPU = 0,
QNN_BACKEND_GPU,
QNN_BACKEND_NPU,
QNN_BACKEND_COUNT,
};

GGML_API bool ggml_backend_is_qnn(ggml_backend_t backend);

GGML_API ggml_backend_reg_t ggml_backend_qnn_reg(void);

#ifdef __cplusplus
Expand Down
111 changes: 109 additions & 2 deletions ggml/src/ggml-qnn/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
message(STATUS "Using QNN backend")

option(GGML_HEXAGON_NPU_ONLY "ggml-qnn: Only use Hexagon NPU" OFF)
option(GGML_QNN_ENABLE_HEXAGON_BACKEND "ggml-qnn: Enable Hexagon custom package" ${GGML_HEXAGON_NPU_ONLY})

if(CMAKE_SYSTEM_NAME STREQUAL "Android")
find_library(LOG_LIB log)
set(QNN_LINK_LIBRARIES ${LOG_LIB})
set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
add_compile_options(-g -O0)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
set(QNN_DEFAULT_LIB_SEARCH_PATH "" CACHE STRING "customized library search path for QNN backend")
else()
Expand All @@ -21,15 +25,22 @@ if(NOT DEFINED GGML_QNN_SDK_PATH)
endif()

message("CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
message("CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}")
message("CMAKE_CXX_FLAGS_RELEASE: ${CMAKE_CXX_FLAGS_RELEASE}")
message("QNN_SDK_PATH: ${GGML_QNN_SDK_PATH}")

file(GLOB QNN_SOURCES "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
file(GLOB QNN_SOURCES "${CMAKE_CURRENT_LIST_DIR}/qnn/*.cpp")
file(GLOB COMMON_SOURCES "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
ggml_add_backend_library(ggml-qnn
${QNN_SOURCES}
${COMMON_SOURCES}
)

target_include_directories(ggml-qnn PRIVATE ${GGML_QNN_SDK_PATH}/include/QNN ${CMAKE_CURRENT_LIST_DIR})
target_include_directories(ggml-qnn PRIVATE
${GGML_QNN_SDK_PATH}/include/QNN
${CMAKE_CURRENT_LIST_DIR}/qnn
${CMAKE_CURRENT_LIST_DIR}
)
target_link_libraries(ggml-qnn PRIVATE ${QNN_LINK_LIBRARIES})

if(NOT "${QNN_DEFAULT_LIB_SEARCH_PATH}" STREQUAL "")
Expand All @@ -52,3 +63,99 @@ if(GGML_QNN_ENABLE_PERFORMANCE_TRACKING)
else()
message("GGML_QNN_ENABLE_PERFORMANCE_TRACKING is disabled")
endif()

add_subdirectory(shared)

if(GGML_HEXAGON_NPU_ONLY)
message("GGML_HEXAGON_NPU_ONLY is enabled")
add_compile_definitions(GGML_HEXAGON_NPU_ONLY)
set(GGML_QNN_ENABLE_HEXAGON_BACKEND ON)
else()
message("GGML_HEXAGON_NPU_ONLY is disabled")
endif()

if(GGML_QNN_ENABLE_HEXAGON_BACKEND)
message("GGML_QNN_ENABLE_HEXAGON_BACKEND is enabled")
add_subdirectory(npu)
target_link_libraries(hexagon-npu-host runtime-common)
target_link_libraries(ggml-qnn PRIVATE hexagon-npu-host)
else()
message("GGML_QNN_ENABLE_HEXAGON_BACKEND is disabled")
target_link_libraries(ggml-qnn PRIVATE runtime-common)
endif()

# Copy QNN dynamic libraries
set(QNN_DYNAMIC_LIBS "")

if(CMAKE_SYSTEM_NAME STREQUAL "Android" OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
# Android
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-android")
elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
# Linux x86_64
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/x86_64-linux-clang")
else()
# Linux aarch64
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-oe-linux-gcc11.2")
endif()

list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libQnnSystem.so")
list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libQnnCpu.so")
list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libQnnGpu.so")
list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libQnnHtp.so")
file(GLOB HTP_STUB_LIBS "${QNN_SDK_LIB_PATH}/libQnnHtp*.so")
list(APPEND QNN_DYNAMIC_LIBS ${HTP_STUB_LIBS})

if(CMAKE_SYSTEM_NAME STREQUAL "Android")
file(GLOB HTP_SKEL_LIBS "${GGML_QNN_SDK_PATH}/lib/hexagon-*/unsigned/libQnnHtp*Skel.so")
list(APPEND QNN_DYNAMIC_LIBS ${HTP_SKEL_LIBS})

if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
if(EXISTS "${CMAKE_ANDROID_NDK}/prebuilt/android-arm64/gdbserver/gdbserver")
list(APPEND QNN_DYNAMIC_LIBS "${CMAKE_ANDROID_NDK}/prebuilt/android-arm64/gdbserver/gdbserver")
message("old ndk, copy gdbserver")
else()
file(GLOB LLDB_SERVER "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/aarch64/lldb-server")
list(APPEND QNN_DYNAMIC_LIBS ${LLDB_SERVER})
message("new ndk, copy lldb-server")
endif()

file(GLOB OMP_LIBS "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/aarch64/libomp.so")
file(GLOB ASAN_LIBS "${CMAKE_ANDROID_NDK}/toolchains/llvm/prebuilt/linux-x86_64/lib64/clang/*/lib/linux/libclang_rt.asan-aarch64-android.so")
list(APPEND QNN_DYNAMIC_LIBS ${OMP_LIBS})
list(APPEND QNN_DYNAMIC_LIBS ${ASAN_LIBS})
endif()
else()
# Linux
list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/libHtpPrepare.so")
endif()
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows")
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
# x86_64
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/x86_64-windows-msvc")
else()
# aarch64
set(QNN_SDK_LIB_PATH "${GGML_QNN_SDK_PATH}/lib/aarch64-windows-msvc")
endif()

list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/QnnSystem.dll")
list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/QnnCpu.dll")
list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/QnnGpu.dll")
list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/QnnHtp.dll")
file(GLOB HTP_STUB_LIBS "${QNN_SDK_LIB_PATH}/QnnHtp*.dll")

if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
list(APPEND QNN_DYNAMIC_LIBS "${QNN_SDK_LIB_PATH}/HtpPrepare.dll")
endif()

list(APPEND QNN_DYNAMIC_LIBS ${HTP_STUB_LIBS})
endif()

foreach(QNN_DYNAMIC_LIB ${QNN_DYNAMIC_LIBS})
message("Copy: ${QNN_DYNAMIC_LIB} -> ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}")
add_custom_command(
TARGET ggml-qnn POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${QNN_DYNAMIC_LIB}
${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
endforeach()
11 changes: 0 additions & 11 deletions ggml/src/ggml-qnn/backend-ops.hpp

This file was deleted.

147 changes: 147 additions & 0 deletions ggml/src/ggml-qnn/npu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
enable_language(ASM)
cmake_policy(SET CMP0115 OLD)

if(DEFINED ENV{HEXAGON_SDK_ROOT})
set(HEXAGON_SDK_ROOT $ENV{HEXAGON_SDK_ROOT})
message("HEXAGON_SDK_ROOT: ${HEXAGON_SDK_ROOT}")
else()
message(FATAL_ERROR "HEXAGON_SDK_ROOT not defined")
endif()

if(HEXAGON_SDK_ROOT)
include(${HEXAGON_SDK_ROOT}/build/cmake/hexagon_fun.cmake)
else()
include(${HEXAGON_CMAKE_ROOT}/hexagon_fun.cmake)
endif()

# Base Include dirs for the Project
set(common_incs
${CMAKE_CURRENT_BINARY_DIR}/
${HEXAGON_SDK_ROOT}/incs/
${HEXAGON_SDK_ROOT}/incs/stddef/
${HEXAGON_SDK_ROOT}/incs/HAP/
${HEXAGON_SDK_ROOT}/rtos/qurt/
${HEXAGON_SDK_ROOT}/utils/examples/
)

include_directories(${common_incs})

if(${CMAKE_SYSTEM_NAME} MATCHES "Android|Linux|Windows")
# host build
file(GLOB common_srcs "${CMAKE_CURRENT_LIST_DIR}/common/*.cpp")
file(GLOB host_srcs "${CMAKE_CURRENT_LIST_DIR}/host/*.cpp")
set(stub_srcs "${CMAKE_CURRENT_BINARY_DIR}/npu_device_stub.c")
add_library(hexagon-npu-host STATIC
${common_srcs}
${host_srcs}
${stub_srcs}
)

# disable warnings for the stub
set_source_files_properties(
${stub_srcs}
PROPERTIES
COMPILE_FLAGS "-w"
)

build_idl(idl/hexagon_npu.idl hexagon-npu-host)

# Add compile definitions to the target
target_compile_definitions(hexagon-npu-host PUBLIC
VERIFY_PRINT_ERROR
GGML_QNN_ENABLE_HEXAGON_BACKEND
)

target_include_directories(hexagon-npu-host PRIVATE
${HEXAGON_SDK_ROOT}/ipc/fastrpc/rpcmem/inc/
${QNN_SDK_ROOT}/include/QNN/
${CMAKE_CURRENT_LIST_DIR}/host/
${CMAKE_CURRENT_LIST_DIR}/
)

target_include_directories(hexagon-npu-host PUBLIC
${HEXAGON_SDK_ROOT}/incs/ # TODO: this is for rpc-mem
)

if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "Windows")
set_target_properties(hexagon-npu-host PROPERTIES OUTPUT_NAME "hexagon_npu")
endif()

if(${CMAKE_SYSTEM_NAME} MATCHES "Android|Linux")
target_link_options(hexagon-npu-host PUBLIC -pie)
endif()

link_options(hexagon-npu-host)

if(${CMAKE_SYSTEM_NAME} MATCHES "Android")
set(PREBUILT_LIB_DIR "android_aarch64")
elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
set(PREBUILT_LIB_DIR "UbuntuARM_aarch64")
else()
# Windows
set(PREBUILT_LIB_DIR "windows_aarch64")
endif()

choose_dsprpc("3" dsprpc) # cdsprpc
link_custom_library(hexagon-npu-host ${dsprpc})
else()
# hexagon npu build
cmake_minimum_required(VERSION 3.14.3)
project(hexagon_npu C CXX ASM)

# check if QNN_SDK_ROOT is set
if(NOT DEFINED ENV{QNN_SDK_ROOT})
message(FATAL_ERROR "QNN_SDK_ROOT not defined")
endif()

set(QNN_SDK_ROOT $ENV{QNN_SDK_ROOT})
message("QNN_SDK_ROOT: ${QNN_SDK_ROOT}")
include_directories(
${QNN_SDK_ROOT}/include/QNN/
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")

file(GLOB common_srcs "${CMAKE_CURRENT_LIST_DIR}/common/*.cpp")
file(GLOB device_srcs "${CMAKE_CURRENT_LIST_DIR}/device/*.cpp")
set(skel_srcs "${CMAKE_CURRENT_BINARY_DIR}/npu_device_skel.c")
add_library(hexagon_npu_skel_OBJS OBJECT
${common_srcs}
${device_srcs}
${skel_srcs}
)

if(CMAKE_BUILD_TYPE MATCHES "Debug|Dbg")
message("Debug build, enable all logging")
target_compile_definitions(hexagon_npu_skel_OBJS PUBLIC
_DEBUG
DEBUG_LOGGING
)
else()
message("Release build, disable debug logging")
target_compile_definitions(hexagon_npu_skel_OBJS PUBLIC
NDEBUG
RELEASE_LOGGING
)
endif()

build_idl(idl/hexagon_npu.idl hexagon_npu_skel_OBJS)

# disable warnings for the skel
set_source_files_properties(
${skel_srcs}
PROPERTIES
COMPILE_FLAGS "-w"
)

add_library(hexagon_npu_skel SHARED $<TARGET_OBJECTS:hexagon_npu_skel_OBJS>)

target_link_libraries(hexagon_npu_skel
${HEXAGON_LIB_DIR}/${HEXAGON_ARCH}/G0/pic/libc++abi.a
${HEXAGON_LIB_DIR}/${HEXAGON_ARCH}/G0/pic/libc++.a
)
set_target_properties(hexagon_npu_skel PROPERTIES OUTPUT_NAME "hexagon_npu_skel_${HEXAGON_ARCH}")

copy_binaries(hexagon_npu_skel)
endif()

# vim: set noet fenc=utf-8 ff=unix ft=cmake :
Loading