diff --git a/.gitmodules b/.gitmodules index 8472d78404e..335e1dbd9c8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -82,3 +82,6 @@ [submodule "contrib/cpu_features"] path = contrib/cpu_features url = https://github.com/google/cpu_features +[submodule "contrib/arm-optimized-routines"] + path = contrib/arm-optimized-routines + url = https://github.com/ARM-software/optimized-routines diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 71f81ae3ee5..4520d1cb176 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -165,3 +165,7 @@ add_subdirectory(benchmark) set (BUILD_TESTING OFF CACHE BOOL "Disable cpu-features testing" FORCE) add_subdirectory(cpu_features) + +if (ARCH_AARCH64 AND ARCH_LINUX) + add_subdirectory(arm-optimized-routines-cmake) +endif () diff --git a/contrib/arm-optimized-routines b/contrib/arm-optimized-routines new file mode 160000 index 00000000000..e373f659523 --- /dev/null +++ b/contrib/arm-optimized-routines @@ -0,0 +1 @@ +Subproject commit e373f6595230087a8ddea449bfb14b47150b4059 diff --git a/contrib/arm-optimized-routines-cmake/CMakeLists.txt b/contrib/arm-optimized-routines-cmake/CMakeLists.txt new file mode 100644 index 00000000000..b08a761fe62 --- /dev/null +++ b/contrib/arm-optimized-routines-cmake/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This library is to override performance-critical routines for aarch64 targets. +# The implementations are imported from official ARM repo. +# To reduce dispatching cost, indirect function technique is utilized. Therefore, +# this library should only be enabled with ELF targets. + +# Considerations: +# - By Jun, 2022, most enterprise OSs (CentOS 7, CentOS Stream 8 and RHEL 8) still +# use relatively old glibc on ARM64, where ASIMD, MTE, DC ZVA and SVE are not +# fully utilized. However, it is becoming increasingly common to use ARM64 instances +# in cloud-native situations. +# - `optimized-routines` repo is actively maintained by ARM officials. Therefore, +# the qualities can be ensured while using it also enables us to keep sync with latest +# acceleration techniques. + +ENABLE_LANGUAGE(C) +ENABLE_LANGUAGE(ASM) +set(TIFLASH_AOR_DIR ../arm-optimized-routines) + +file(GLOB TIFLASH_AARCH64_STRING_FILES ${TIFLASH_AOR_DIR}/string/aarch64/*.S) +add_library(tiflash-aarch64-string STATIC ${TIFLASH_AARCH64_STRING_FILES} src/aor.c) +target_compile_options(tiflash-aarch64-string PRIVATE -march=armv8-a+sve) +target_include_directories(tiflash-aarch64-string PRIVATE ${TIFLASH_AOR_DIR}/string/include) + +file(GLOB TIFLASH_AARCH64_MATH_FILES ${TIFLASH_AOR_DIR}/math/*.c) +add_library(tiflash-aarch64-math STATIC ${TIFLASH_AARCH64_MATH_FILES}) +target_include_directories(tiflash-aarch64-math PRIVATE ${TIFLASH_AOR_DIR}/math/include) \ No newline at end of file diff --git a/contrib/arm-optimized-routines-cmake/src/aor.c b/contrib/arm-optimized-routines-cmake/src/aor.c new file mode 100644 index 00000000000..daff1df3c4b --- /dev/null +++ b/contrib/arm-optimized-routines-cmake/src/aor.c @@ -0,0 +1,115 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +// Provide default macro definitions in case that they are not defined on current linux distro. +// For example, TiFlash compiled on older linux kernels may also be used in newer ones. +// These values should be stable for Linux: only false negative is expected when running on +// older kernels, but it is acceptable as `google/cpu_features` is also doing so. +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + +#ifndef HWCAP_SVE +#define HWCAP_SVE (1 << 22) +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif + +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif + +/// check if MTE is supported in current environment +static inline bool mte_supported(void) +{ + return (getauxval(AT_HWCAP2) & HWCAP2_MTE) != 0; +} + +/// check if SVE is supported in current environment +static inline bool sve_supported(void) +{ + return (getauxval(AT_HWCAP) & HWCAP_SVE) != 0; +} + +#define STRINGIFY_IMPL(X) #X +#define STRINGIFY(X) STRINGIFY_IMPL(X) +/** + * \brief + * Symbol is defined as hidden visibility. Therefore, implementations here are only to override routines with TiFlash + * binary itself. This is because dependencies like `ld.so`, `libgcc_s.so`, etc will need essential routines like + * `memcpy` to finish the early loading procedure. Therefore, declare such symbols as visible indirect function will + * create cyclic dependency. It shall be good enough to override symbols within TiFlash, as most heavy computation works + * are happening in the main binary. + * \param NAME: exported symbol name + * \param SVE: preferred implementation when SVE is available + * \param MTE: preferred implementation when MTE is available + * \param ASIMD: preferred implementation for generic aarch64 targets (ASIMD is required by default for Armv8 and above) + */ +#define DISPATCH(NAME, SVE, MTE, ASIMD) \ + extern typeof(ASIMD) __tiflash_##NAME __attribute__((ifunc(STRINGIFY(__tiflash_##NAME##_resolver)))); \ + extern typeof(ASIMD) NAME __attribute__((visibility("hidden"), alias(STRINGIFY(__tiflash_##NAME)))); \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wunused-function\"") static typeof(ASIMD) * __tiflash_##NAME##_resolver(void) \ + { \ + if (sve_supported()) \ + { \ + return SVE; \ + } \ + if (mte_supported()) \ + { \ + return MTE; \ + } \ + return ASIMD; \ + } \ + _Pragma("GCC diagnostic pop") +#undef memcpy +#undef memmove +#undef memset +#undef memchr +#undef memrchr +#undef memcmp +#undef strcpy +#undef stpcpy +#undef strcmp +#undef strchr +#undef strrchr +#undef strchrnul +#undef strlen +#undef strnlen +#undef strncmp + +DISPATCH(memcpy, __memcpy_aarch64_sve, __memcpy_aarch64_simd, __memcpy_aarch64_simd) +DISPATCH(memmove, __memmove_aarch64_sve, __memmove_aarch64_simd, __memmove_aarch64_simd) +DISPATCH(memset, __memset_aarch64, __memset_aarch64, __memset_aarch64) +DISPATCH(memchr, __memchr_aarch64_sve, __memchr_aarch64_mte, __memchr_aarch64) +DISPATCH(memrchr, __memrchr_aarch64, __memrchr_aarch64, __memrchr_aarch64) +DISPATCH(memcmp, __memcmp_aarch64_sve, __memcmp_aarch64, __memcmp_aarch64) +DISPATCH(strcpy, __strcpy_aarch64_sve, __strcpy_aarch64, __strcpy_aarch64) +DISPATCH(stpcpy, __stpcpy_aarch64_sve, __stpcpy_aarch64, __stpcpy_aarch64) +DISPATCH(strcmp, __strcmp_aarch64_sve, __strcmp_aarch64, __strcmp_aarch64) +DISPATCH(strchr, __strchr_aarch64_sve, __strchr_aarch64_mte, __strchr_aarch64) +DISPATCH(strrchr, __strrchr_aarch64_sve, __strrchr_aarch64_mte, __strrchr_aarch64) +DISPATCH(strchrnul, __strchrnul_aarch64_sve, __strchrnul_aarch64_mte, __strchrnul_aarch64) +DISPATCH(strlen, __strlen_aarch64_sve, __strlen_aarch64_mte, __strlen_aarch64) +DISPATCH(strnlen, __strnlen_aarch64_sve, __strnlen_aarch64, __strnlen_aarch64) +DISPATCH(strncmp, __strncmp_aarch64_sve, __strncmp_aarch64, __strncmp_aarch64) \ No newline at end of file diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 477844cd509..3358ae2a60e 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -152,6 +152,7 @@ void loadMiConfig(Logger * log) } #undef TRY_LOAD_CONF #endif + namespace { [[maybe_unused]] void tryLoadBoolConfigFromEnv(Poco::Logger * log, bool & target, const char * name) @@ -967,7 +968,10 @@ class Server::TcpHttpServersHolder LOG_DEBUG(log, debug_msg); } - const std::vector> & getServers() const { return servers; } + const std::vector> & getServers() const + { + return servers; + } private: Server & server; @@ -1003,7 +1007,6 @@ int Server::main(const std::vector & /*args*/) #ifdef TIFLASH_ENABLE_SVE_SUPPORT tryLoadBoolConfigFromEnv(log, simd_option::ENABLE_SVE, "TIFLASH_ENABLE_SVE"); #endif - registerFunctions(); registerAggregateFunctions(); registerWindowFunctions(); diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 5fd25c5d238..2bedb312d07 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -198,3 +198,7 @@ if (ARCH_AMD64) src/crc64_sse2_asimd.cpp APPEND COMPILE_FLAGS "-mpclmul") endif() + +if (ARCH_AARCH64 AND ARCH_LINUX) + target_link_libraries (common PUBLIC tiflash-aarch64-string tiflash-aarch64-math) +endif()