From 12be56994b087c71980b8e5d3a962e2d53c79fd2 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 8 Jul 2024 17:52:58 +0900 Subject: [PATCH] GH-42149: [C++] Use FetchContent for bundled ORC (#43011) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change This also has a workaround for https://issues.apache.org/jira/browse/ORC-1732 . ### What changes are included in this PR? ORC 2.0.1 has a dependency detection problem. We can't override the detection with ExternalProject but can override the detection with FetchContent. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #42149 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- cpp/CMakeLists.txt | 8 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 279 ++++++++++++------ cpp/thirdparty/versions.txt | 4 +- .../linux-packages/apache-arrow/debian/rules | 1 + 4 files changed, 201 insertions(+), 91 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 679842c31e0b1..2e2a4971840a8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -71,6 +71,14 @@ if(POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() +# https://cmake.org/cmake/help/latest/policy/CMP0170.html +# +# CMP0170 is for enforcing dependency populations by users with +# FETCHCONTENT_FULLY_DISCONNECTED=ON. +if(POLICY CMP0170) + cmake_policy(SET CMP0170 NEW) +endif() + set(ARROW_VERSION "17.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 7dab0a362ff24..8cb3ec83f57db 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2532,6 +2532,7 @@ macro(build_zlib) set_property(TARGET ZLIB::ZLIB PROPERTY IMPORTED_LOCATION "${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a") + target_include_directories(ZLIB::ZLIB INTERFACE "${EMSCRIPTEN_SYSROOT}/include") list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") @@ -4490,116 +4491,216 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") # ---------------------------------------------------------------------- # Apache ORC -macro(build_orc) +function(build_orc) message(STATUS "Building Apache ORC from source") - set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") - set(ORC_HOME "${ORC_PREFIX}") - set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") - set(ORC_STATIC_LIB - "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}") + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) + fetchcontent_declare(orc + ${FC_DECLARE_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") + prepare_fetchcontent() + + set(CMAKE_UNITY_BUILD FALSE) + + set(ORC_PREFER_STATIC_LZ4 + OFF + CACHE BOOL "" FORCE) + get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) + set(LZ4_HOME + ${LZ4_ROOT} + CACHE STRING "" FORCE) + set(LZ4_LIBRARY + LZ4::lz4 + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_PROTOBUF + OFF + CACHE BOOL "" FORCE) + get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) + set(PROTOBUF_HOME + ${Protobuf_ROOT} + CACHE STRING "" FORCE) + # ORC uses this. + target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} + INTERFACE "${PROTOBUF_INCLUDE_DIR}") + set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC}) + set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF}) + set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC}) + + set(ORC_PREFER_STATIC_SNAPPY + OFF + CACHE BOOL "" FORCE) + get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) + set(SNAPPY_HOME + ${Snappy_ROOT} + CACHE STRING "" FORCE) + set(SNAPPY_LIBRARY + ${Snappy_TARGET} + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_ZLIB + OFF + CACHE BOOL "" FORCE) + get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) + set(ZLIB_HOME + ${ZLIB_ROOT} + CACHE STRING "" FORCE) + set(ZLIB_LIBRARY + ZLIB::ZLIB + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_ZSTD + OFF + CACHE BOOL "" FORCE) + get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) + set(ZSTD_HOME + ${ZSTD_ROOT} + CACHE STRING "" FORCE) + set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD}) + + set(BUILD_CPP_TESTS + OFF + CACHE BOOL "" FORCE) + set(BUILD_JAVA + OFF + CACHE BOOL "" FORCE) + set(BUILD_LIBHDFSPP + OFF + CACHE BOOL "" FORCE) + set(BUILD_TOOLS + OFF + CACHE BOOL "" FORCE) + set(INSTALL_VENDORED_LIBS + OFF + CACHE BOOL "" FORCE) + set(STOP_BUILD_ON_WARNING + OFF + CACHE BOOL "" FORCE) + + # We can remove this with ORC 2.0.2 or later. + list(PREPEND CMAKE_MODULE_PATH + ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) + + fetchcontent_makeavailable(orc) + + add_library(orc::orc INTERFACE IMPORTED) + target_link_libraries(orc::orc INTERFACE orc) + target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" + "${orc_SOURCE_DIR}/c++/include") + + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) + else() + set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") + set(ORC_HOME "${ORC_PREFIX}") + set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") + set(ORC_STATIC_LIB + "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) - get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) + get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) - get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) + get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) - get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) + get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) - get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) - set(ORC_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" - -DSTOP_BUILD_ON_WARNING=OFF - -DBUILD_LIBHDFSPP=OFF - -DBUILD_JAVA=OFF - -DBUILD_TOOLS=OFF - -DBUILD_CPP_TESTS=OFF - -DINSTALL_VENDORED_LIBS=OFF - "-DLZ4_HOME=${ORC_LZ4_ROOT}" - "-DPROTOBUF_EXECUTABLE=$" - "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" - "-DPROTOBUF_INCLUDE_DIR=$" - "-DPROTOBUF_LIBRARY=$" - "-DPROTOC_LIBRARY=$" - "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" - "-DSNAPPY_LIBRARY=$" - "-DLZ4_LIBRARY=$" - "-DLZ4_STATIC_LIB=$" - "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" - "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" - "-DZSTD_HOME=${ORC_ZSTD_ROOT}" - "-DZSTD_INCLUDE_DIR=$" - "-DZSTD_LIBRARY=$") - if(ZLIB_ROOT) - set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") - endif() + set(ORC_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" + -DSTOP_BUILD_ON_WARNING=OFF + -DBUILD_LIBHDFSPP=OFF + -DBUILD_JAVA=OFF + -DBUILD_TOOLS=OFF + -DBUILD_CPP_TESTS=OFF + -DINSTALL_VENDORED_LIBS=OFF + "-DLZ4_HOME=${ORC_LZ4_ROOT}" + "-DPROTOBUF_EXECUTABLE=$" + "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" + "-DPROTOBUF_INCLUDE_DIR=$" + "-DPROTOBUF_LIBRARY=$" + "-DPROTOC_LIBRARY=$" + "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" + "-DSNAPPY_LIBRARY=$" + "-DLZ4_LIBRARY=$" + "-DLZ4_STATIC_LIB=$" + "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" + "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" + "-DZSTD_HOME=${ORC_ZSTD_ROOT}" + "-DZSTD_INCLUDE_DIR=$" + "-DZSTD_LIBRARY=$") + if(ZLIB_ROOT) + set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") + endif() - # Work around CMake bug - file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) + # Work around CMake bug + file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) - externalproject_add(orc_ep - ${EP_COMMON_OPTIONS} - URL ${ORC_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS ${ORC_STATIC_LIB} - CMAKE_ARGS ${ORC_CMAKE_ARGS} - DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} - ${ARROW_PROTOBUF_PROTOC} - ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET} - LZ4::lz4 - ZLIB::ZLIB) - - set(ORC_VENDORED 1) - - add_library(orc::orc STATIC IMPORTED) - set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") - target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") - target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET}) - # Protobuf generated files may use ABSL_DCHECK*() and - # absl::log_internal_check_op is needed for them. - if(TARGET absl::log_internal_check_op) - target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) - endif() - if(NOT MSVC) - if(NOT APPLE AND ARROW_ENABLE_THREADING) - target_link_libraries(orc::orc INTERFACE Threads::Threads) - endif() - target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) - endif() - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9") - target_link_libraries(orc::orc INTERFACE stdc++fs) + externalproject_add(orc_ep + ${EP_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS ${ORC_STATIC_LIB} + CMAKE_ARGS ${ORC_CMAKE_ARGS} + DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} + ${ARROW_PROTOBUF_PROTOC} + ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET} + LZ4::lz4 + ZLIB::ZLIB) + add_library(orc::orc STATIC IMPORTED) + set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") + target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") + target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET}) + # Protobuf generated files may use ABSL_DCHECK*() and + # absl::log_internal_check_op is needed for them. + if(TARGET absl::log_internal_check_op) + target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8") - target_link_libraries(orc::orc INTERFACE c++fs) + if(NOT MSVC) + if(NOT APPLE AND ARROW_ENABLE_THREADING) + target_link_libraries(orc::orc INTERFACE Threads::Threads) + endif() + target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) endif() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) + add_dependencies(orc::orc orc_ep) + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) endif() - add_dependencies(orc::orc orc_ep) - - list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) -endmacro() + set(ORC_VENDORED + TRUE + PARENT_SCOPE) + set(ARROW_BUNDLED_STATIC_LIBS + ${ARROW_BUNDLED_STATIC_LIBS} + PARENT_SCOPE) +endfunction() if(ARROW_ORC) resolve_dependency(orc HAVE_ALT TRUE) - target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) if(ORC_VENDORED) set(ARROW_ORC_VERSION ${ARROW_ORC_BUILD_VERSION}) else() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) set(ARROW_ORC_VERSION ${orcAlt_VERSION}) + message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") + message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() - message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") - message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() # ---------------------------------------------------------------------- diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 986ac056b61a6..ab988badec145 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412 -ARROW_ORC_BUILD_VERSION=2.0.0 -ARROW_ORC_BUILD_SHA256_CHECKSUM=9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df +ARROW_ORC_BUILD_VERSION=2.0.1 +ARROW_ORC_BUILD_SHA256_CHECKSUM=1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d ARROW_PROTOBUF_BUILD_VERSION=v21.3 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f # Because of https://github.com/Tencent/rapidjson/pull/1323, we require diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 83bcad98a7a6e..6c3074ab234e1 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -51,6 +51,7 @@ override_dh_auto_configure: -DARROW_WITH_ZSTD=ON \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DCUDAToolkit_ROOT=/usr \ + -DFETCHCONTENT_FULLY_DISCONNECTED=OFF \ -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON