Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FastTokenizer] Support FastTokenizer on Android #783

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -425,13 +425,6 @@ if(ENABLE_VISION)
endif()
endif()

if(ANDROID OR IOS)
if(ENABLE_TEXT)
set(ENABLE_TEXT OFF CACHE BOOL "Force ENABLE_TEXT OFF" FORCE)
message(STATUS "Found Android or IOS, force ENABLE_TEXT OFF. We do not support fast_tokenizer with Android/IOS now.")
endif()
endif()

if(ENABLE_TEXT)
add_definitions(-DENABLE_TEXT)
list(APPEND ALL_DEPLOY_SRCS ${DEPLOY_TEXT_SRCS})
Expand Down Expand Up @@ -488,7 +481,7 @@ set_target_properties(${LIBRARY_NAME} PROPERTIES VERSION ${FASTDEPLOY_VERSION})
if(MSVC)
# disable warnings for dll export
target_compile_options(${LIBRARY_NAME} PRIVATE "$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CXX>>:/wd4251>$<$<BUILD_INTERFACE:$<COMPILE_LANGUAGE:CUDA>>:-Xcompiler=/wd4251>")
file(GLOB FD_FILES_REQUIRE_BIGOBJ ${CSRCS_DIR_NAME}/fastdeploy/function/reduce.cc)
file(GLOB FD_FILES_REQUIRE_BIGOBJ ${CSRCS_DIR_NAME}/fastdeploy/function/reduce.cc)
set_source_files_properties(${FD_FILES_REQUIRE_BIGOBJ} PROPERTIES COMPILE_FLAGS "/bigobj")
endif()

Expand Down
15 changes: 11 additions & 4 deletions FastDeploy.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,18 @@ endif()

if (ENABLE_TEXT)
if(ANDROID)
message(FATAL_ERROR "Not support fastdeploy text APIs with Android now!")
if(NOT ANDROID_TOOLCHAIN MATCHES "clang")
message(FATAL_ERROR "Currently, only support clang toolchain while cross compiling FastDeploy for Android with FastTokenizer, but found ${ANDROID_TOOLCHAIN}.")
endif()
add_library(core_tokenizers STATIC IMPORTED GLOBAL)
set_property(TARGET core_tokenizers PROPERTY IMPORTED_LOCATION
${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib/${ANDROID_ABI}/libcore_tokenizers.so)
list(APPEND FASTDEPLOY_LIBS core_tokenizers)
else()
# Add dependency libs later: Linux/Mac/Win/...
find_library(FAST_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib NO_DEFAULT_PATH)
list(APPEND FASTDEPLOY_LIBS ${FAST_TOKENIZER_LIB})
endif()
# Add dependency libs later
find_library(FAST_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/lib NO_DEFAULT_PATH)
list(APPEND FASTDEPLOY_LIBS ${FAST_TOKENIZER_LIB})
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/include)
list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/fast_tokenizer/third_party/include)
endif()
Expand Down
91 changes: 64 additions & 27 deletions cmake/fast_tokenizer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,16 @@ set(FASTTOKENIZER_INC_DIR
"${FASTTOKENIZER_INSTALL_DIR}/include"
"${FASTTOKENIZER_INSTALL_DIR}/third_party/include"
CACHE PATH "fast_tokenizer include directory." FORCE)
set(FASTTOKENIZER_LIB_DIR
"${FASTTOKENIZER_INSTALL_DIR}/lib/"
CACHE PATH "fast_tokenizer lib directory." FORCE)
if(ANDROID)
set(FASTTOKENIZER_LIB_DIR
"${FASTTOKENIZER_INSTALL_DIR}/lib/${ANDROID_ABI}"
CACHE PATH "fast_tokenizer lib directory." FORCE)
else()
set(FASTTOKENIZER_LIB_DIR
"${FASTTOKENIZER_INSTALL_DIR}/lib/"
CACHE PATH "fast_tokenizer lib directory." FORCE)
endif()

set(FASTTOKENIZER_THIRD_LIB_DIR
"${FASTTOKENIZER_INSTALL_DIR}/third_party/lib/"
CACHE PATH "fast_tokenizer lib directory." FORCE)
Expand All @@ -37,21 +44,21 @@ include_directories(${FASTTOKENIZER_INC_DIR})

# Set lib path
if(WIN32)
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/core_tokenizers.lib"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
set(ICUDT_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
set(ICUUC_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")

set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/core_tokenizers.lib"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
set(ICUDT_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icudt.lib")
set(ICUUC_LIB "${FASTTOKENIZER_THIRD_LIB_DIR}/icuuc.lib")
elseif(APPLE)
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.dylib"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
elseif(ANDROID)
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
else()

set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
set(FASTTOKENIZER_COMPILE_LIB "${FASTTOKENIZER_LIB_DIR}/libcore_tokenizers.so"
CACHE FILEPATH "fast_tokenizer compile library." FORCE)
endif(WIN32)
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")

set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/")
set(FASTTOKENIZER_VERSION "1.0.0")
Expand All @@ -68,6 +75,15 @@ elseif(APPLE)
else()
set(FASTTOKENIZER_FILE "fast_tokenizer-osx-x86_64-${FASTTOKENIZER_VERSION}.tgz")
endif()
elseif(ANDROID)
# check ABI, toolchain
if((NOT ANDROID_ABI MATCHES "armeabi-v7a") AND (NOT ANDROID_ABI MATCHES "arm64-v8a"))
message(FATAL_ERROR "FastDeploy with FastTokenizer on Android only support armeabi-v7a, arm64-v8a now.")
endif()
if(NOT ANDROID_TOOLCHAIN MATCHES "clang")
message(FATAL_ERROR "Currently, only support clang toolchain while cross compiling FastDeploy for Android with FastTokenizer, but found ${ANDROID_TOOLCHAIN}.")
endif()
set(FASTTOKENIZER_FILE "fast_tokenizer-android-${ANDROID_ABI}-${FASTTOKENIZER_VERSION}.tgz")
else()
if(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64")
set(FASTTOKENIZER_FILE "fast_tokenizer-linux-aarch64-${FASTTOKENIZER_VERSION}.tgz")
Expand All @@ -77,18 +93,39 @@ else()
endif()
set(FASTTOKENIZER_URL "${FASTTOKENIZER_URL_BASE}${FASTTOKENIZER_FILE}")

ExternalProject_Add(
${FASTTOKENIZER_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${FASTTOKENIZER_URL}
PREFIX ${FASTTOKENIZER_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR} ${FASTTOKENIZER_INSTALL_DIR}
BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
if(ANDROID)
ExternalProject_Add(
${FASTTOKENIZER_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${FASTTOKENIZER_URL}
PREFIX ${FASTTOKENIZER_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E remove_directory ${FASTTOKENIZER_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${FASTTOKENIZER_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${FASTTOKENIZER_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E make_directory ${FASTTOKENIZER_INSTALL_DIR}/third_party &&
${CMAKE_COMMAND} -E rename ${FASTTOKENIZER_SOURCE_DIR}/lib/ ${FASTTOKENIZER_INSTALL_DIR}/lib/${ANDROID_ABI} &&
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR}/include ${FASTTOKENIZER_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR}/third_party/include ${FASTTOKENIZER_INSTALL_DIR}/third_party/include
BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
else()
ExternalProject_Add(
${FASTTOKENIZER_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${FASTTOKENIZER_URL}
PREFIX ${FASTTOKENIZER_PREFIX_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${FASTTOKENIZER_SOURCE_DIR} ${FASTTOKENIZER_INSTALL_DIR}
BUILD_BYPRODUCTS ${FASTTOKENIZER_COMPILE_LIB})
endif()

add_library(fast_tokenizer STATIC IMPORTED GLOBAL)
set_property(TARGET fast_tokenizer PROPERTY IMPORTED_LOCATION ${FASTTOKENIZER_COMPILE_LIB})
Expand Down
4 changes: 4 additions & 0 deletions fastdeploy/backends/lite/lite_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ bool LiteBackend::Infer(std::vector<FDTensor>& inputs,
tensor->CopyFromCpu<uint8_t, paddle::lite_api::TargetType::kARM>(
reinterpret_cast<const uint8_t*>(const_cast<void*>(
inputs[i].CpuData())));
} else if (inputs[i].dtype == FDDataType::INT64) {
tensor->CopyFromCpu<int64_t, paddle::lite_api::TargetType::kARM>(
reinterpret_cast<const int64_t*>(const_cast<void*>(
inputs[i].CpuData())));
} else {
FDASSERT(false, "Unexpected data type of %d.", inputs[i].dtype);
}
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/text/uie/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ bool UIEModel::Initialize() {

void UIEModel::SetValidBackend() {
// TODO(zhoushunjie): Add lite backend in future
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER};
valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, Backend::LITE};
valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
}

Expand Down
6 changes: 3 additions & 3 deletions fastdeploy/vision/visualize/segmentation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static cv::Mat FastVisSegmentationNEON(
const uint8_t *im_ptr = static_cast<const uint8_t*>(im.data);

if (!quantize_weight) {
#pragma omp parallel for num_threads(2) schedule(static)
#pragma omp parallel for proc_bind(close) num_threads(2) schedule(static)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里不需要WITH_OMP判断吗?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里不需要,编译器会自动判断,因为没有用到omp.h头文件,所以暂时不需要WITH_OPENMP来判断

for (int i = 0; i < size - 15; i += 16) {
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
// e.g 0b00000001 << 7 -> 0b10000000 128;
Expand Down Expand Up @@ -87,7 +87,7 @@ static cv::Mat FastVisSegmentationNEON(

if (new_multi_factor == 8) {
// Only keep mask, no need to blending with origin image.
#pragma omp parallel for num_threads(2) schedule(static)
#pragma omp parallel for proc_bind(close) num_threads(2) schedule(static)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

num_thread = 2这个值可以写成一个常量,如
constexpr int VISUALIZATION_NUM_THREADS=2。或者是一个变量,用户可以调用接口进行设置。

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里佳军之前的建议是暂时先不增加额外的接口给用户用,避免增加用户的学习成本。设置成2为默认值,是根据手机芯片大小核的设计来的,一般为1超+3大+4小或2超+2大+4小,跑在小核上会慢很多;在不同的机子上实验过,发现2线程加速比最优。

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

for (int i = 0; i < size - 15; i += 16) {
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
// e.g 0b00000001 << 7 -> 0b10000000 128;
Expand All @@ -112,7 +112,7 @@ static cv::Mat FastVisSegmentationNEON(
uint8x16_t old_mulx16 = vdupq_n_u8(old_multi_factor);
uint8x16_t new_mulx16 = vdupq_n_u8(new_multi_factor);
// Blend the two colors together with quantize 'weight'.
#pragma omp parallel for num_threads(2) schedule(static)
#pragma omp parallel for proc_bind(close) num_threads(2) schedule(static)
for (int i = 0; i < size - 15; i += 16) {
uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
Expand Down