Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 9780963

Browse files
committedMar 11, 2025·
ggml-qnn: pr to upstream
1 parent 25789ae commit 9780963

10 files changed

+1510
-1619
lines changed
 

‎cmake/aarch64-w64-mingw32.cmake

-18
This file was deleted.

‎cmake/arm64-windows-cygwin.cmake

-16
This file was deleted.

‎cmake/arm64-windows-llvm.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ set( CMAKE_CXX_COMPILER clang++ )
99
set( CMAKE_C_COMPILER_TARGET ${target} )
1010
set( CMAKE_CXX_COMPILER_TARGET ${target} )
1111

12-
#set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
13-
#set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
12+
set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
13+
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
1414

1515
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
1616
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )

‎ggml/src/ggml-qnn/ggml-qnn.cpp

+1,488-1,081
Large diffs are not rendered by default.

‎scripts/build-run-android.sh

+10-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
1414
#https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
1515
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
1616
QNN_SDK_URL=https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
17-
QNN_SDK_PATH=/opt/qcom/aistack/qairt/2.31.0.250130/
17+
QNN_SDK_PATH=/opt/qcom/aistack/qairt/2.32.0.250228/
1818

1919
#default is QNN NPU
2020
qnnbackend=2
@@ -97,11 +97,14 @@ function check_qnn_libs()
9797
{
9898
#reuse the cached qnn libs on Android phone
9999
adb shell ls ${REMOTE_PATH}/libQnnCpu.so
100+
adb shell ls ${REMOTE_PATH}/libQnnGpu.so
101+
adb shell ls ${REMOTE_PATH}/libQnnHtp.so
100102
if [ $? -eq 0 ]; then
101103
printf "QNN libs already exist on Android phone\n"
102104
else
103105
update_qnn_libs
104106
fi
107+
update_qnn_cfg
105108
}
106109

107110

@@ -119,6 +122,12 @@ function update_qnn_libs()
119122
}
120123

121124

125+
function update_qnn_cfg()
126+
{
127+
adb push ./scripts/ggml-qnn.cfg ${REMOTE_PATH}/
128+
}
129+
130+
122131
function build_ggml_qnn()
123132
{
124133
show_pwd

‎scripts/ggml-qnn.cfg

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[general]
2+
# enable/disable QNN's internal log
3+
print_qnn_internal_log = 0
4+
# 0: general approach,similar to ggml-sycl or ggml-cann
5+
# 1: mapping entire ggml cgraph to QNN graph
6+
inference_approach = 0
7+
8+
[npu]
9+
npu_inference_datatype = "fp16"

‎src/llama.cpp

+1-16
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,6 @@
1414
#include "ggml-backend.h"
1515
#include "ggml-cpp.h"
1616

17-
#ifdef GGML_USE_QNN
18-
#include "ggml-qnn.h"
19-
#endif
20-
2117
#include <algorithm>
2218
#include <array>
2319
#include <cassert>
@@ -9714,19 +9710,8 @@ struct llama_context * llama_init_from_model(
97149710
// add ACCEL backends (such as BLAS)
97159711
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
97169712
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
9717-
9718-
#ifdef GGML_USE_QNN // avoid side-effect to other backends
9719-
if (QNN_BACKEND_GGML == model->params.main_gpu) {
9720-
break;
9721-
}
9722-
#endif
97239713
if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_ACCEL) {
9724-
ggml_backend_t backend = nullptr;
9725-
#ifndef GGML_USE_QNN
9726-
backend = ggml_backend_dev_init(dev, nullptr);
9727-
#else
9728-
backend = ggml_backend_dev_init(dev, reinterpret_cast<const char *>(model->params.main_gpu));
9729-
#endif
9714+
ggml_backend_t backend = ggml_backend_dev_init(dev, nullptr);
97309715
if (backend == nullptr) {
97319716
LLAMA_LOG_ERROR("%s: failed to initialize %s backend\n", __func__, ggml_backend_dev_name(dev));
97329717
llama_free(ctx);

‎tests/CMakeLists.txt

-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,6 @@ llama_target_and_test(test-chat-template.cpp)
137137
# llama_target_and_test(test-opt.cpp) # SLOW
138138
llama_target_and_test(test-gguf.cpp)
139139
llama_target_and_test(test-backend-ops.cpp)
140-
llama_target_and_test(ggml-qnn-ut.cpp)
141140

142141
llama_target_and_test(test-model-load-cancel.cpp LABEL "model")
143142
llama_target_and_test(test-autorelease.cpp LABEL "model")

0 commit comments

Comments
 (0)
Please sign in to comment.