diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml index fd21ed8d3fa..e5e4bace1ae 100644 --- a/ci/conda_env_cpp.yml +++ b/ci/conda_env_cpp.yml @@ -21,6 +21,7 @@ brotli bzip2 c-ares cmake +cpu_features double-conversion flatbuffers gflags diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 64e2e84aa22..4f20e33564c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -586,6 +586,9 @@ set(ARROW_LINK_LIBS ${double-conversion_LIBRARIES}) set(ARROW_STATIC_LINK_LIBS ${double-conversion_LIBRARIES}) set(ARROW_STATIC_INSTALL_INTERFACE_LIBS ${double-conversion_LIBRARIES}) +list(APPEND ARROW_STATIC_LINK_LIBS cpu_features::cpu_features) +list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS cpu_features::cpu_features) + if(ARROW_WITH_URIPARSER) list(APPEND ARROW_STATIC_LINK_LIBS uriparser::uriparser) list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS uriparser::uriparser) diff --git a/cpp/cmake_modules/Findcpu_features.cmake b/cpp/cmake_modules/Findcpu_features.cmake new file mode 100644 index 00000000000..542fd241191 --- /dev/null +++ b/cpp/cmake_modules/Findcpu_features.cmake @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(cpu_features_ROOT) + find_library(cpu_features_LIB + NAMES cpu_features + PATHS ${cpu_features_ROOT} + NO_DEFAULT_PATH + PATH_SUFFIXES ${LIB_PATH_SUFFIXES}) +else() + find_library(cpu_features_LIB NAMES cpu_features) +endif() + +find_package_handle_standard_args(cpu_features REQUIRED_VARS cpu_features_LIB + cpu_features_INCLUDE_DIR) + +if(cpu_features_FOUND) + add_library(cpu_features::cpu_features UNKNOWN IMPORTED) + set_target_properties(cpu_features::cpu_features + PROPERTIES IMPORTED_LOCATION "${cpu_features_LIB}" + INTERFACE_INCLUDE_DIRECTORIES + "${cpu_features_INCLUDE_DIR}") +endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 55153381fe5..51b72244b53 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -72,7 +72,8 @@ set(ARROW_THIRDPARTY_DEPENDENCIES Thrift uriparser ZLIB - ZSTD) + ZSTD + cpu_features) # TODO(wesm): External GTest shared libraries are not currently # supported when building with MSVC because of the way that @@ -161,6 +162,8 @@ macro(build_dependency DEPENDENCY_NAME) build_zlib() elseif("${DEPENDENCY_NAME}" STREQUAL "ZSTD") build_zstd() + elseif("${DEPENDENCY_NAME}" STREQUAL "cpu_features") + build_cpu_features() else() message(FATAL_ERROR "Unknown thirdparty dependency to build: ${DEPENDENCY_NAME}") endif() @@ -414,6 +417,12 @@ else() set(BZIP2_SOURCE_URL "https://fossies.org/linux/misc/bzip2-${BZIP2_VERSION}.tar.gz") endif() +if(DEFINED ENV{ARROW_CPU_FEATURES_URL}) + set(CPU_FEATURES_SOURCE_URL "$ENV{ARROW_CPU_FEATURES_URL}") +else() + set(CPU_FEATURES_SOURCE_URL "https://github.com/google/cpu_features/archive/${CPU_FEATURES_VERSION}.tar.gz") +endif() + # ---------------------------------------------------------------------- # ExternalProject options @@ -2368,6 +2377,56 @@ if(ARROW_ORC) message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() +# ---------------------------------------------------------------------- +# cpu_features +macro(build_cpu_features) + message(STATUS "Building cpu_features from source") + set(CPU_FEATURES_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/cpu_features_ep-install") + + set(CPU_FEATURES_CMAKE_ARGS + ${EP_COMMON_TOOLCHAIN} + "-DCMAKE_INSTALL_PREFIX=${CPU_FEATURES_PREFIX}" + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR}) + + if(MSVC) + set(CPU_FEATURES_STATIC_LIB "${CPU_FEATURES_PREFIX}/${CMAKE_INSTALL_LIBDIR}/cpu_features_static.lib") + else() + set(CPU_FEATURES_STATIC_LIB "${CPU_FEATURES_PREFIX}/${CMAKE_INSTALL_LIBDIR}/libcpu_features.a") + endif() + + externalproject_add(cpu_features_ep + ${EP_LOG_OPTIONS} + CMAKE_ARGS ${CPU_FEATURES_CMAKE_ARGS} + INSTALL_DIR ${CPU_FEATURES_PREFIX} + URL ${CPU_FEATURES_SOURCE_URL} + BUILD_BYPRODUCTS "${CPU_FEATURES_STATIC_LIB}") + + file(MAKE_DIRECTORY "${CPU_FEATURES_PREFIX}/include") + + add_library(cpu_features::cpu_features STATIC IMPORTED) + set_target_properties(cpu_features::cpu_features + PROPERTIES IMPORTED_LOCATION "${CPU_FEATURES_STATIC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${CPU_FEATURES_PREFIX}/include") + + add_dependencies(toolchain cpu_features_ep) + add_dependencies(cpu_features::cpu_features cpu_features_ep) +endmacro() + +set(ARROW_CPU_FEATURES_REQUIRED_VERSION "0.4.0") +if(cpu_features_SOURCE STREQUAL "AUTO") + find_package(cpu_features ${ARROW_CPU_FEATURES_REQUIRED_VERSION} QUIET) + if(NOT cpu_features_FOUND) + build_cpu_features() + endif() +elseif(cpu_features_SOURCE STREQUAL "BUNDLED") + build_cpu_features() +endif() + +get_target_property(cpu_features_INCLUDE_DIR cpu_features::cpu_features + INTERFACE_INCLUDE_DIRECTORIES) +include_directories(SYSTEM ${cpu_features_INCLUDE_DIR}) + # Write out the package configurations. configure_file("src/arrow/util/config.h.cmake" "src/arrow/util/config.h") diff --git a/cpp/src/arrow/compute/context.cc b/cpp/src/arrow/compute/context.cc index 82c0c238ae4..10779b7795b 100644 --- a/cpp/src/arrow/compute/context.cc +++ b/cpp/src/arrow/compute/context.cc @@ -26,7 +26,7 @@ namespace arrow { namespace compute { FunctionContext::FunctionContext(MemoryPool* pool) - : pool_(pool), cpu_info_(internal::CpuInfo::GetInstance()) {} + : pool_(pool), cpu_info_(&internal::CpuInfo::GetInstance()) {} MemoryPool* FunctionContext::memory_pool() const { return pool_; } diff --git a/cpp/src/arrow/util/cpu-info.cc b/cpp/src/arrow/util/cpu-info.cc index 9a8cde67eba..a71b722e27f 100644 --- a/cpp/src/arrow/util/cpu-info.cc +++ b/cpp/src/arrow/util/cpu-info.cc @@ -15,8 +15,6 @@ // specific language governing permissions and limitations // under the License. -// From Apache Impala (incubating) as of 2016-01-29. - #include "arrow/util/cpu-info.h" #ifdef __APPLE__ @@ -26,16 +24,8 @@ #include #include -#ifndef _MSC_VER -#include -#endif - #ifdef _WIN32 -#include -#include -#include #include "arrow/util/windows_compatibility.h" - #endif #include @@ -50,285 +40,40 @@ #include "arrow/util/logging.h" -using boost::algorithm::contains; -using boost::algorithm::trim; -using std::max; - -#if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5 -void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) { - __asm__ __volatile__("cpuid" - : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]), - "=d"(CPUInfo[3]) - : "a"(function_id), "c"(subfunction_id)); -} -#endif - namespace arrow { namespace internal { - -static struct { - std::string name; - int64_t flag; -} flag_mappings[] = { - {"ssse3", CpuInfo::SSSE3}, - {"sse4_1", CpuInfo::SSE4_1}, - {"sse4_2", CpuInfo::SSE4_2}, - {"popcnt", CpuInfo::POPCNT}, -}; -static const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]); - -#ifndef _WIN32 -namespace { - -// Helper function to parse for hardware flags. -// values contains a list of space-seperated flags. check to see if the flags we -// care about are present. -// Returns a bitmap of flags. -int64_t ParseCPUFlags(const std::string& values) { - int64_t flags = 0; - for (int i = 0; i < num_flags; ++i) { - if (contains(values, flag_mappings[i].name)) { - flags |= flag_mappings[i].flag; - } - } - return flags; -} - -} // namespace -#endif - -#ifdef _WIN32 -bool RetrieveCacheSize(int64_t* cache_sizes) { - if (!cache_sizes) { - return false; - } - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr; - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr; - DWORD buffer_size = 0; - size_t offset = 0; - typedef BOOL(WINAPI * GetLogicalProcessorInformationFuncPointer)(void*, void*); - GetLogicalProcessorInformationFuncPointer func_pointer = - (GetLogicalProcessorInformationFuncPointer)GetProcAddress( - GetModuleHandle("kernel32"), "GetLogicalProcessorInformation"); - - if (!func_pointer) { - return false; - } - - // Get buffer size - if (func_pointer(buffer, &buffer_size) && GetLastError() != ERROR_INSUFFICIENT_BUFFER) - return false; - - buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(buffer_size); - - if (!buffer || !func_pointer(buffer, &buffer_size)) { - return false; - } - - buffer_position = buffer; - while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size) { - if (RelationCache == buffer_position->Relationship) { - PCACHE_DESCRIPTOR cache = &buffer_position->Cache; - if (cache->Level >= 1 && cache->Level <= 3) { - cache_sizes[cache->Level - 1] += cache->Size; - } - } - offset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - buffer_position++; - } - - if (buffer) { - free(buffer); - } - return true; -} - -bool RetrieveCPUInfo(int64_t* hardware_flags, std::string* model_name) { - if (!hardware_flags || !model_name) { - return false; - } - const int register_ECX_id = 1; - int highest_valid_id = 0; - int highest_extended_valid_id = 0; - std::bitset<32> features_ECX; - std::array cpu_info; - - // Get highest valid id - __cpuid(cpu_info.data(), 0); - highest_valid_id = cpu_info[0]; - - if (highest_valid_id <= register_ECX_id) return false; - - __cpuidex(cpu_info.data(), register_ECX_id, 0); - features_ECX = cpu_info[2]; - - // Get highest extended id - __cpuid(cpu_info.data(), 0x80000000); - highest_extended_valid_id = cpu_info[0]; - - // Retrieve CPU model name - if (highest_extended_valid_id >= static_cast(0x80000004)) { - model_name->clear(); - for (int i = 0x80000002; i <= static_cast(0x80000004); ++i) { - __cpuidex(cpu_info.data(), i, 0); - *model_name += - std::string(reinterpret_cast(cpu_info.data()), sizeof(cpu_info)); - } - } - - if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3; - if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1; - if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2; - if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT; - return true; + +const cpu_features::X86Features CpuInfo::features_ = cpu_features::GetX86Info().features; + +CpuInfo& CpuInfo::GetInstance() { + // There is no need for heavyweight singleton, + // as CPUID is faster and will always return the same value + static CpuInfo instance; + return instance; } -#endif - -CpuInfo::CpuInfo() : hardware_flags_(0), num_cores_(1), model_name_("unknown") {} -std::unique_ptr g_cpu_info; -static std::mutex cpuinfo_mutex; - -CpuInfo* CpuInfo::GetInstance() { - std::lock_guard lock(cpuinfo_mutex); - if (!g_cpu_info) { - g_cpu_info.reset(new CpuInfo); - g_cpu_info->Init(); - } - return g_cpu_info.get(); +const cpu_features::X86Features CpuInfo::Features() const { + return features_; } -void CpuInfo::Init() { - std::string line; - std::string name; - std::string value; - - float max_mhz = 0; - int num_cores = 0; - - memset(&cache_sizes_, 0, sizeof(cache_sizes_)); - -#ifdef _WIN32 - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - num_cores = system_info.dwNumberOfProcessors; - - LARGE_INTEGER performance_frequency; - if (QueryPerformanceFrequency(&performance_frequency)) { - max_mhz = static_cast(performance_frequency.QuadPart); - } -#else - // Read from /proc/cpuinfo - std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in); - while (cpuinfo) { - getline(cpuinfo, line); - size_t colon = line.find(':'); - if (colon != std::string::npos) { - name = line.substr(0, colon - 1); - value = line.substr(colon + 1, std::string::npos); - trim(name); - trim(value); - if (name.compare("flags") == 0) { - hardware_flags_ |= ParseCPUFlags(value); - } else if (name.compare("cpu MHz") == 0) { - // Every core will report a different speed. We'll take the max, assuming - // that when impala is running, the core will not be in a lower power state. - // TODO: is there a more robust way to do this, such as - // Window's QueryPerformanceFrequency() - float mhz = static_cast(atof(value.c_str())); - max_mhz = max(mhz, max_mhz); - } else if (name.compare("processor") == 0) { - ++num_cores; - } else if (name.compare("model name") == 0) { - model_name_ = value; - } - } - } - if (cpuinfo.is_open()) cpuinfo.close(); -#endif - -#ifdef __APPLE__ - // On Mac OS X use sysctl() to get the cache sizes - size_t len = 0; - sysctlbyname("hw.cachesize", NULL, &len, NULL, 0); - uint64_t* data = static_cast(malloc(len)); - sysctlbyname("hw.cachesize", data, &len, NULL, 0); - DCHECK_GE(len / sizeof(uint64_t), 3); - for (size_t i = 0; i < 3; ++i) { - cache_sizes_[i] = data[i]; - } -#elif _WIN32 - if (!RetrieveCacheSize(cache_sizes_)) { - SetDefaultCacheSize(); - } - RetrieveCPUInfo(&hardware_flags_, &model_name_); -#else - SetDefaultCacheSize(); -#endif - - if (max_mhz != 0) { - cycles_per_ms_ = static_cast(max_mhz); -#ifndef _WIN32 - cycles_per_ms_ *= 1000; -#endif - } else { - cycles_per_ms_ = 1000000; - } - original_hardware_flags_ = hardware_flags_; - - if (num_cores > 0) { - num_cores_ = num_cores; - } else { - num_cores_ = 1; - } -} - -void CpuInfo::VerifyCpuRequirements() { - if (!IsSupported(CpuInfo::SSSE3)) { +void CpuInfo::CheckMinCpuAndHalt() { + if (!features_.ssse3) { DCHECK(false) << "CPU does not support the Supplemental SSE3 instruction set"; } } bool CpuInfo::CanUseSSE4_2() const { #ifdef ARROW_USE_SIMD - return IsSupported(CpuInfo::SSE4_2); + return features_.sse4_2; #else return false; #endif } -void CpuInfo::EnableFeature(int64_t flag, bool enable) { - if (!enable) { - hardware_flags_ &= ~flag; - } else { - // Can't turn something on that can't be supported - DCHECK_NE(original_hardware_flags_ & flag, 0); - hardware_flags_ |= flag; - } -} - -int64_t CpuInfo::hardware_flags() { return hardware_flags_; } - -int64_t CpuInfo::CacheSize(CacheLevel level) { return cache_sizes_[level]; } - -int64_t CpuInfo::cycles_per_ms() { return cycles_per_ms_; } - -int CpuInfo::num_cores() { return num_cores_; } - -std::string CpuInfo::model_name() { return model_name_; } - -void CpuInfo::SetDefaultCacheSize() { -#ifndef _SC_LEVEL1_DCACHE_SIZE - // Provide reasonable default values if no info - cache_sizes_[0] = 32 * 1024; // Level 1: 32k - cache_sizes_[1] = 256 * 1024; // Level 2: 256k - cache_sizes_[2] = 3072 * 1024; // Level 3: 3M -#else - // Call sysconf to query for the cache sizes - cache_sizes_[0] = sysconf(_SC_LEVEL1_DCACHE_SIZE); - cache_sizes_[1] = sysconf(_SC_LEVEL2_CACHE_SIZE); - cache_sizes_[2] = sysconf(_SC_LEVEL3_CACHE_SIZE); -#endif +std::string CpuInfo::ModelName() { + char brand_string[49]; + cpu_features::FillX86BrandString( brand_string ); + return brand_string; } } // namespace internal diff --git a/cpp/src/arrow/util/cpu-info.h b/cpp/src/arrow/util/cpu-info.h index 714d7ac5bc5..fbcfd2752d0 100644 --- a/cpp/src/arrow/util/cpu-info.h +++ b/cpp/src/arrow/util/cpu-info.h @@ -15,9 +15,6 @@ // specific language governing permissions and limitations // under the License. -// From Apache Impala (incubating) as of 2016-01-29. Pared down to a minimal -// set of functions needed for Apache Arrow / Apache parquet-cpp - #ifndef ARROW_UTIL_CPU_INFO_H #define ARROW_UTIL_CPU_INFO_H @@ -25,74 +22,49 @@ #include #include "arrow/util/visibility.h" +#include +#if defined(CPU_FEATURES_ARCH_X86) +#include +#endif namespace arrow { namespace internal { /// CpuInfo is an interface to query for cpu information at runtime. The caller can -/// ask for the sizes of the caches and what hardware features are supported. -/// On Linux, this information is pulled from a couple of sys files (/proc/cpuinfo and -/// /sys/devices) +/// query on hardware features support, i.e. is SSSE4. AVX2, etc. +/// It is cross platform as Google's cpu_features library is used under the hood class ARROW_EXPORT CpuInfo { public: - static constexpr int64_t SSSE3 = (1 << 1); - static constexpr int64_t SSE4_1 = (1 << 2); - static constexpr int64_t SSE4_2 = (1 << 3); - static constexpr int64_t POPCNT = (1 << 4); - - /// Cache enums for L1 (data), L2 and L3 - enum CacheLevel { - L1_CACHE = 0, - L2_CACHE = 1, - L3_CACHE = 2, - }; - - static CpuInfo* GetInstance(); - - /// Determine if the CPU meets the minimum CPU requirements and if not, issue an error - /// and terminate. - void VerifyCpuRequirements(); - - /// Returns all the flags for this cpu - int64_t hardware_flags(); - - /// Returns whether of not the cpu supports this flag - bool IsSupported(int64_t flag) const { return (hardware_flags_ & flag) != 0; } - - /// \brief The processor supports SSE4.2 and the Arrow libraries are built - /// with support for it - bool CanUseSSE4_2() const; - - /// Toggle a hardware feature on and off. It is not valid to turn on a feature - /// that the underlying hardware cannot support. This is useful for testing. - void EnableFeature(int64_t flag, bool enable); - - /// Returns the size of the cache in KB at this cache level - int64_t CacheSize(CacheLevel level); - - /// Returns the number of cpu cycles per millisecond - int64_t cycles_per_ms(); - /// Returns the number of cores (including hyper-threaded) on this machine. - int num_cores(); + /// CpuInfo is defined as a singletion without rigorious thread safety checks, + /// as CPUID is safe itself + static CpuInfo& GetInstance(); /// Returns the model name of the cpu (e.g. Intel i7-2600) - std::string model_name(); + std::string ModelName(); - private: - CpuInfo(); - - void Init(); - - /// Inits CPU cache size variables with default values - void SetDefaultCacheSize(); + /// Checks for the existence of SSE 4.2 on x*6-64 compatible processors + bool CanUseSSE4_2() const; - int64_t hardware_flags_; - int64_t original_hardware_flags_; - int64_t cache_sizes_[L3_CACHE + 1]; - int64_t cycles_per_ms_; - int num_cores_; - std::string model_name_; + /// Checks for the existence of Suplemental SSE3 and halts if the feature is not available + void CheckMinCpuAndHalt(); + + /// Returns the initialized feature type appropriate for the current arhcitecture +#if defined(CPU_FEATURES_ARCH_X86) + const cpu_features::X86Features +#endif + Features() const; + +private : + CpuInfo() = default; + ~CpuInfo() = default; + CpuInfo(const CpuInfo&) = delete; + CpuInfo& operator=(const CpuInfo&) = delete; + +private: +#if defined(CPU_FEATURES_ARCH_X86) + static const cpu_features::X86Features features_; +#endif }; } // namespace internal diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index d960cb0d007..d3d5d4aa79b 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -49,6 +49,7 @@ THRIFT_MD5_CHECKSUM=3deebbb4d1ca77dd9c9e009a1ea02183 URIPARSER_VERSION=0.9.2 ZLIB_VERSION=1.2.11 ZSTD_VERSION=v1.4.0 +CPU_FEATURES_VERSION=v0.4.1 # The first field is the name of the environment variable expected by cmake. # This _must_ match what is defined. The second field is the name of the diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml index 4638980f128..6083d5ae4f5 100644 --- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml +++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml @@ -23,6 +23,7 @@ requirements: - boost-cpp - brotli - c-ares + - cpu_features - double-conversion - flatbuffers - gflags @@ -48,6 +49,7 @@ requirements: - boost-cpp - brotli - c-ares + - cpu_features - double-conversion - gflags - glog