Skip to content

feat: run on win #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 40 commits into from
Feb 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
0601eeb
move qnn_instance function implementation into cpp
chraac Feb 6, 2025
792790f
wip
chraac Feb 6, 2025
8b736fd
wip
chraac Feb 6, 2025
c667db2
move dl related function into separated file
chraac Feb 7, 2025
05df736
use cast op for gpu
chraac Feb 7, 2025
2520e59
Revert "use cast op for gpu"
chraac Feb 8, 2025
f56519c
Reapply "use cast op for gpu"
chraac Feb 8, 2025
9b871c3
fix compiling error in win
chraac Feb 10, 2025
e5df438
fix align_alloc in win
chraac Feb 10, 2025
071865d
fix compiling error
chraac Feb 10, 2025
954a7b1
add get sys free/total mem for win
chraac Feb 10, 2025
5ff8ec9
wip
chraac Feb 10, 2025
1bfa4ac
suppress warning in win
chraac Feb 11, 2025
d50facf
Merge branch 'dev-refactoring' into dev-run-on-win
chraac Feb 12, 2025
c72178f
add missing chrono header
chraac Feb 12, 2025
a6b67b5
Merge branch 'dev-refactoring' into dev-run-on-win
chraac Feb 13, 2025
c39a84e
set the correct qnn lib name for windows
chraac Feb 14, 2025
5d96028
add flag to control cpu backend
chraac Feb 14, 2025
02263e9
wip
chraac Feb 14, 2025
03a6d59
wip
chraac Feb 14, 2025
49ada21
Revert "Reapply "use cast op for gpu""
chraac Feb 15, 2025
f38cf10
fix compiling error for linux build
chraac Feb 15, 2025
bb70a73
fix cdsprpc dynamic library name
chraac Feb 16, 2025
afd8926
wip
chraac Feb 17, 2025
16cb2d4
skip rpc load fail
chraac Feb 17, 2025
e107e4a
fix page_align_alloc
chraac Feb 17, 2025
a0ba1be
suppress some warning in gcc
chraac Feb 18, 2025
7bce5cc
wip
chraac Feb 18, 2025
f18020a
reuse align to function
chraac Feb 18, 2025
a39688e
more log
chraac Feb 18, 2025
9faa80d
add log and fix warning
chraac Feb 18, 2025
31ce4aa
wip
chraac Feb 18, 2025
8bc4215
fix asan errors and memory leaks
chraac Feb 19, 2025
868f594
fix the get_io_tensors_from_graph
chraac Feb 19, 2025
bf48a56
improve comment
chraac Feb 19, 2025
faf45ad
print GGML_QNN_DEFAULT_LIB_SEARCH_PATH
chraac Feb 19, 2025
3165121
revert some unused changes
chraac Feb 20, 2025
1ecca44
move library search path setter into qnn module
chraac Feb 20, 2025
63b35f9
fix android library loading
chraac Feb 20, 2025
98e5835
skip qnn_device_get_platform_info for npu emulator
chraac Feb 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions ggml/src/ggml-qnn/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android")
find_library(LOG_LIB log)
set(QNN_LINK_LIBRARIES ${LOG_LIB})
set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
set(QNN_DEFAULT_LIB_SEARCH_PATH "" CACHE STRING "customized library search path for QNN backend")
else()
message(FATAL_ERROR "QNN now only available on Android")
message(FATAL_ERROR "QNN now only available on Android, Windows and Linux")
endif()

if(NOT DEFINED GGML_QNN_SDK_PATH)
# try read from environment variable
# TODO: create a function to search for the SDK path
if(DEFINED ENV{QNN_SDK_PATH})
set(GGML_QNN_SDK_PATH $ENV{QNN_SDK_PATH})
else()
Expand All @@ -29,5 +32,14 @@ ggml_add_backend_library(ggml-qnn
target_include_directories(ggml-qnn PRIVATE ${GGML_QNN_SDK_PATH}/include/QNN ${CMAKE_CURRENT_LIST_DIR})
target_link_libraries(ggml-qnn PRIVATE ${QNN_LINK_LIBRARIES})

string(REGEX REPLACE "/$" "" GGML_QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}")
target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}/")
if(NOT "${QNN_DEFAULT_LIB_SEARCH_PATH}" STREQUAL "")
string(REGEX REPLACE "/$" "" QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}")
endif()

message("GGML_QNN_DEFAULT_LIB_SEARCH_PATH: ${QNN_DEFAULT_LIB_SEARCH_PATH}")
target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}")

if(GGML_QNN_ENABLE_CPU_BACKEND)
message("GGML_QNN_ENABLE_CPU_BACKEND is enabled")
target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_ENABLE_CPU_BACKEND)
endif()
2 changes: 1 addition & 1 deletion ggml/src/ggml-qnn/backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_t
case GGML_TYPE_F16:
case GGML_TYPE_Q8_0:
case GGML_TYPE_Q4_0:
if (!(ctx->supported_types & (1 << tensor->type))) {
if (!(ctx->supported_types & (uint64_t(1) << tensor->type))) {
QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x", qnn::get_backend_name(ctx->device),
ggml_type_name(tensor->type), ctx->supported_types);
return false;
Expand Down
3 changes: 3 additions & 0 deletions ggml/src/ggml-qnn/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,14 @@ class qnn_mem_buffer : public qnn_buffer_interface {
if (data) {
memcpy(_buffer, data, size);
}

QNN_LOG_DEBUG("alloc buffer: %p, size: %ld", _buffer, size);
}

explicit qnn_mem_buffer(size_t size) : qnn_mem_buffer(nullptr, size) {}

~qnn_mem_buffer() {
QNN_LOG_DEBUG("free buffer: %p, size: %ld", _buffer, _size);
// the free will do nothing if the _buffer is nullptr
qnn::align_free(_buffer);
}
Expand Down
71 changes: 71 additions & 0 deletions ggml/src/ggml-qnn/dl_loader.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#pragma once

#ifdef __linux__
#include <dlfcn.h>
#include <fcntl.h>
#elif defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#endif

#include <string>

namespace qnn {

#ifdef __linux__
typedef void *dl_handler_t;

inline qnn::dl_handler_t dl_load(const std::string &lib_path) {
return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL);
}

inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) { return dlsym(handle, symbol.c_str()); }

inline bool dl_unload(qnn::dl_handler_t handle) { return dlclose(handle) == 0; }

inline const char *dl_error() { return dlerror(); }
#elif defined(_WIN32)
using dl_handler_t = HMODULE;

inline qnn::dl_handler_t dl_load(const std::string &lib_path) {
// suppress error dialogs for missing DLLs
auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);

auto handle = LoadLibraryA(lib_path.c_str()); // TODO: use wstring version for unicode paths

SetErrorMode(old_mode);
return handle;
}

inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) {
auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);

void *p = (void *)GetProcAddress(handle, symbol.c_str());

SetErrorMode(old_mode);
return p;
}

inline bool dl_unload(qnn::dl_handler_t handle) {
FreeLibrary(handle);
return true;
}

inline const char *dl_error() {
// TODO: implement dl_error for Windows
return nullptr;
}

#endif

template <typename Fn>
Fn dl_sym_typed(qnn::dl_handler_t handle, const std::string &function_name) {
return reinterpret_cast<Fn>(dl_sym(handle, function_name));
}

} // namespace qnn
72 changes: 19 additions & 53 deletions ggml/src/ggml-qnn/ggml-qnn.cpp
Original file line number Diff line number Diff line change
@@ -1,23 +1,7 @@
#include "ggml-qnn.h"

#include <cassert>
#include <chrono>
#include <condition_variable>
#include <fstream>
#include <functional>
#include <iostream>
#include <list>
#include <memory>
#include <mutex>
#include <queue>
#include <random>
#include <regex>
#include <set>
#include <sstream>
#include <thread>
#include <tuple>
#include <unordered_set>
#include <utility>
#include <vector>

#include "ggml-backend-impl.h"
Expand All @@ -44,6 +28,16 @@

namespace {

#ifdef _WIN32
constexpr const char *kQnnCpuLibName = "QnnCpu.dll";
constexpr const char *kQnnGpuLibName = "QnnGpu.dll";
constexpr const char *kQnnNpuLibName = "QnnHtp.dll";
#else
constexpr const char *kQnnCpuLibName = "libQnnCpu.so";
constexpr const char *kQnnGpuLibName = "libQnnGpu.so";
constexpr const char *kQnnNpuLibName = "libQnnHtp.so";
#endif

struct qnn_device_caps {
const char *name;
const char *description;
Expand All @@ -59,23 +53,23 @@ constexpr const qnn_device_caps kDeviceCaps[] = {
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul
"qnn-cpu",
"Qualcomm Kryo CPU",
"libQnnCpu.so",
kQnnCpuLibName,
GGML_BACKEND_DEVICE_TYPE_CPU,
(1 << GGML_TYPE_I8) | (1 << GGML_TYPE_F32),
},
{
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul
"qnn-gpu",
"Qualcomm Adreno GPU",
"libQnnGpu.so",
kQnnGpuLibName,
GGML_BACKEND_DEVICE_TYPE_GPU,
(1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16),
},
{
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul
"qnn-npu",
"Qualcomm NPU",
"libQnnHtp.so",
kQnnNpuLibName,
GGML_BACKEND_DEVICE_TYPE_ACCEL,
(1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16) | (1 << GGML_TYPE_I16) | (1 << GGML_TYPE_I8),
},
Expand Down Expand Up @@ -214,6 +208,8 @@ void ggml_backend_qnn_free(ggml_backend_t backend) {
instance->qnn_finalize();
instance.reset();
}

delete backend;
}

bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor *src,
Expand Down Expand Up @@ -332,42 +328,10 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev,
const auto device = dev_ctx->device;
QNN_LOG_DEBUG("device %s", qnn::get_backend_name(device));
QNN_LOG_DEBUG("extend_lib_search_path %s", extend_lib_search_path);
std::string path = extend_lib_search_path;

// TODO: Fix this for other platforms
#if defined(__ANDROID__) || defined(ANDROID)
if (device == QNN_BACKEND_NPU) {
if (setenv("LD_LIBRARY_PATH",
(path + ":/vendor/dsp/cdsp:/vendor/lib64:/vendor/dsp/"
"dsp:/vendor/dsp/images")
.c_str(),
1) == 0) {
QNN_LOG_DEBUG("QNN NPU backend setenv successfully");
} else {
QNN_LOG_ERROR("QNN NPU backend setenv failure");
}
if (setenv("ADSP_LIBRARY_PATH",
(path + ";/vendor/dsp/cdsp;/vendor/lib/rfsa/adsp;/system/lib/"
"rfsa/adsp;/vendor/dsp/dsp;/vendor/dsp/images;/dsp")
.c_str(),
1) == 0) {
QNN_LOG_DEBUG("QNN NPU backend setenv successfully");
} else {
QNN_LOG_ERROR("QNN NPU backend setenv failure");
}
} else {
if (setenv("LD_LIBRARY_PATH", path.c_str(), 1) == 0) {
QNN_LOG_DEBUG("%s backend setenv successfully", qnn::get_backend_name(device));
} else {
QNN_LOG_ERROR("%s backend setenv failure", qnn::get_backend_name(device));
}
}
#endif

auto instance = std::make_shared<qnn::qnn_instance>(path, dev_ctx->lib_name, "ggml");
auto instance = std::make_shared<qnn::qnn_instance>(extend_lib_search_path, dev_ctx->lib_name);
auto result = instance->qnn_init(nullptr);
if (result != 0) {
QNN_LOG_WARN("init qnn subsystem failed with qnn backend %s, pls check why", qnn::get_backend_name(device));
QNN_LOG_WARN("failed to init qnn backend %s", qnn::get_backend_name(device));
return nullptr;
}
auto qnn_interface = instance->get_qnn_interface();
Expand Down Expand Up @@ -466,13 +430,15 @@ struct ggml_backend_qnn_reg_impl : ggml_backend_reg {
QNN_LOG_DEBUG("qnn backend registry init");
for (size_t i = 0; i < QNN_BACKEND_COUNT; i++) {
const auto device_enum = (QNNBackend)(QNN_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU
#ifndef GGML_QNN_ENABLE_CPU_BACKEND
if (device_enum == QNN_BACKEND_CPU) {
/*
* here we skip the initialization of CPU device,
* cause it'll block unsupported ops fallback to ggml cpu backend
*/
continue;
}
#endif

device_contexts.emplace_back(std::make_unique<ggml_backend_qnn_device_context>(
/* .device = */ device_enum, // init from the last device, i.e. NPU
Expand Down
73 changes: 57 additions & 16 deletions ggml/src/ggml-qnn/graph.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

#include "graph.hpp"

#include <set>
#include <algorithm>
#include <unordered_map>

#include "ggml-impl.h"
Expand Down Expand Up @@ -106,13 +106,29 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers,
return true;
}

/**
* @brief Extracts input and output tensors from a computational graph.
*
* This function identifies the input and output tensors of a computational graph by analyzing the connectivity between
* tensor nodes. It does this by iterating over each node in the graph, using a connectivity map that associates every
* tensor with its number of incoming connections (in_degree), outgoing connections (out_degree), and an insertion index
* that preserves order. The insertion index is used later to sort the tensors in their original discovery order.
*
* TODO: this algorithm is not perfect and may not work for all cases. It assumes that the tensors are
* connected in a way that allows for unambiguous categorization.
* It also assumes that the tensors are connected in a way that allows for unambiguous categorization.
*/
int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_t &inputs,
qnn::ggml_tensor_array_t &outputs) {
using ggml_tensor_set_t = std::set<ggml_tensor *>;
struct _tensor_connectivity_info {
size_t in_degree = 0;
size_t out_degree = 0;
size_t insert_index = 0;
};

ggml_tensor_set_t input_set;
ggml_tensor_set_t output_set;
ggml_tensor_set_t visited_set;
using ggml_tensor_connectivity_map_t = std::unordered_map<ggml_tensor *, _tensor_connectivity_info>;

ggml_tensor_connectivity_map_t connectivity_map;
int rank = 0;
for (int i = 0; i < cgraph->n_nodes; i++) {
ggml_tensor *dst = cgraph->nodes[i];
Expand All @@ -126,25 +142,50 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_
}

rank = std::max(rank, ggml_n_dims(dst));
input_set.erase(dst);
if (!visited_set.count(dst)) {
output_set.insert(dst);
visited_set.insert(dst);
if (connectivity_map.count(dst) == 0) {
connectivity_map[dst] = {
1, // in-degree, at least 1
0,
connectivity_map.size(),
};
} else {
++(connectivity_map[dst].in_degree);
}

for (size_t i = 0; i < GGML_MAX_DIMS && dst->src[i]; ++i) {
auto *src = dst->src[i];
rank = std::max(rank, ggml_n_dims(src));
output_set.erase(src);
if (!visited_set.count(src)) {
input_set.insert(src);
visited_set.insert(src);

if (connectivity_map.count(src) == 0) {
connectivity_map[src] = {
0,
1, // out-degree, at least 1
connectivity_map.size(),
};
} else {
++(connectivity_map[src].out_degree);
}
}
}

inputs.assign(input_set.begin(), input_set.end());
outputs.assign(output_set.begin(), output_set.end());
for (const auto &kv : connectivity_map) {
if (kv.second.in_degree == 0) {
inputs.push_back(kv.first);
}

if (kv.second.out_degree == 0) {
outputs.push_back(kv.first);
}
}

std::sort(inputs.begin(), inputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) {
return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index;
});

std::sort(outputs.begin(), outputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) {
return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index;
});

return rank;
}

Expand Down Expand Up @@ -187,7 +228,7 @@ qnn_graph::qnn_graph(const std::string &graph_name, QNNBackend device, std::shar

QnnHtpGraph_CustomConfig_t vtcm_config;
vtcm_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE;
vtcm_config.vtcmSizeInMB = vtcm_size_in_mb;
vtcm_config.vtcmSizeInMB = (uint32_t)vtcm_size_in_mb;
QnnGraph_Config_t graph_vtcm_config;
graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
graph_vtcm_config.customConfig = &vtcm_config;
Expand Down
Loading