diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index e69c7bf2516d9..850bf6821ab84 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -60,6 +60,7 @@ pushd cpp\build cmake -G "%GENERATOR%" %ARROW_CMAKE_ARGS% ^ -DARROW_ACERO=ON ^ + -DARROW_AZURE=OFF ^ -DARROW_BOOST_USE_SHARED=ON ^ -DARROW_BUILD_EXAMPLES=ON ^ -DARROW_BUILD_STATIC=OFF ^ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 125f1f48d482e..bb487f6a5f7d8 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -99,6 +99,7 @@ RUN apt-get update -y -q && \ libssl-dev \ libthrift-dev \ libutf8proc-dev \ + libxml2-dev \ libzstd-dev \ make \ ninja-build \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index 0840b3fa5c68d..aa313bc0e8a58 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -98,6 +98,7 @@ RUN apt-get update -y -q && \ libssl-dev \ libthrift-dev \ libutf8proc-dev \ + libxml2-dev \ libzstd-dev \ make \ ninja-build \ @@ -160,6 +161,9 @@ RUN /arrow/ci/scripts/install_azurite.sh COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + # Prioritize system packages and local installation # The following dependencies will be downloaded due to missing/invalid packages # provided by the distribution: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 27d7097a2901f..5fc977cfc4acc 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -795,6 +795,11 @@ if(ARROW_WITH_OPENTELEMETRY) list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) endif() +if(ARROW_AZURE) + list(APPEND ARROW_SHARED_LINK_LIBS ${AZURESDK_LINK_LIBRARIES}) + list(APPEND ARROW_STATIC_LINK_LIBS ${AZURESDK_LINK_LIBRARIES}) +endif() + if(ARROW_WITH_UTF8PROC) list(APPEND ARROW_SHARED_LINK_LIBS utf8proc::utf8proc) list(APPEND ARROW_STATIC_LINK_LIBS utf8proc::utf8proc) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 1767c05b5ee3a..c8238221bf0c8 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -548,6 +548,47 @@ else() "${THIRDPARTY_MIRROR_URL}/aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz") endif() +if(DEFINED ENV{ARROW_AZURE_CORE_URL}) + set(AZURE_CORE_SOURCE_URL "$ENV{ARROW_AZURE_CORE_URL}") +else() + set_urls(AZURE_CORE_SOURCE_URL + "https://github.com/Azure/azure-sdk-for-cpp/archive/azure-core_${ARROW_AZURE_CORE_BUILD_VERSION}.tar.gz" + ) +endif() + +if(DEFINED ENV{ARROW_AZURE_IDENTITY_URL}) + set(AZURE_IDENTITY_SOURCE_URL "$ENV{ARROW_AZURE_IDENTITY_URL}") +else() + set_urls(AZURE_IDENTITY_SOURCE_URL + "https://github.com/Azure/azure-sdk-for-cpp/archive/azure-identity_${ARROW_AZURE_IDENTITY_BUILD_VERSION}.tar.gz" + ) +endif() + +if(DEFINED ENV{ARROW_AZURE_STORAGE_BLOBS_URL}) + set(AZURE_STORAGE_BLOBS_SOURCE_URL "$ENV{ARROW_AZURE_STORAGE_BLOBS_URL}") +else() + set_urls(AZURE_STORAGE_BLOBS_SOURCE_URL + "https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-blobs_${ARROW_AZURE_STORAGE_BLOBS_BUILD_VERSION}.tar.gz" + ) +endif() + +if(DEFINED ENV{ARROW_AZURE_STORAGE_COMMON_URL}) + set(AZURE_STORAGE_COMMON_SOURCE_URL "$ENV{ARROW_AZURE_STORAGE_COMMON_URL}") +else() + set_urls(AZURE_STORAGE_COMMON_SOURCE_URL + "https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-common_${ARROW_AZURE_STORAGE_COMMON_BUILD_VERSION}.tar.gz" + ) +endif() + +if(DEFINED ENV{ARROW_AZURE_STORAGE_FILES_DATALAKE_URL}) + set(AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL + "$ENV{ARROW_AZURE_STORAGE_FILES_DATALAKE_URL}") +else() + set_urls(AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL + "https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-files-datalake_${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_VERSION}.tar.gz" + ) +endif() + if(DEFINED ENV{ARROW_BOOST_URL}) set(BOOST_SOURCE_URL "$ENV{ARROW_BOOST_URL}") else() @@ -5049,6 +5090,162 @@ if(ARROW_S3) endif() endif() +macro(build_azuresdk) + message(STATUS "Building Azure C++ SDK from source") + + find_curl() + find_package(LibXml2 REQUIRED) + find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED) + + set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install") + set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include") + set(AZURESDK_LIB_DIR "lib") + + # provide hint for Azure SDK to link with the already located openssl + get_filename_component(OPENSSL_ROOT_HINT "${OPENSSL_INCLUDE_DIR}" DIRECTORY) + + set(AZURESDK_COMMON_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${AZURESDK_PREFIX}" + "-DCMAKE_PREFIX_PATH=${AZURESDK_PREFIX}" + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_INSTALL_LIBDIR=${AZURESDK_LIB_DIR} + -DDISABLE_AZURE_CORE_OPENTELEMETRY=ON + -DENABLE_TESTING=OFF + -DENABLE_UNITY_BUILD=ON + -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_HINT} + -DWARNINGS_AS_ERRORS=OFF) + + file(MAKE_DIRECTORY ${AZURESDK_INCLUDE_DIR}) + + set(AZURE_CORE_STATIC_LIBRARY + "${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-core${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + externalproject_add(azure_core_ep + ${EP_LOG_OPTIONS} + INSTALL_DIR ${AZURESDK_PREFIX} + URL ${AZURE_CORE_SOURCE_URL} + URL_HASH "SHA256=${ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS} + BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY}) + add_library(Azure::azure-core STATIC IMPORTED) + set_target_properties(Azure::azure-core + PROPERTIES IMPORTED_LOCATION "${AZURE_CORE_STATIC_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES + "${AZURESDK_INCLUDE_DIR}") + target_link_libraries(Azure::azure-core INTERFACE CURL::libcurl LibXml2::LibXml2) + add_dependencies(Azure::azure-core azure_core_ep) + + set(AZURE_IDENTITY_STATIC_LIBRARY + "${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-identity${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + externalproject_add(azure_identity_ep + ${EP_LOG_OPTIONS} + INSTALL_DIR ${AZURESDK_PREFIX} + URL ${AZURE_IDENTITY_SOURCE_URL} + URL_HASH "SHA256=${ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS} + BUILD_BYPRODUCTS ${AZURE_IDENTITY_STATIC_LIBRARY}) + add_library(Azure::azure-identity STATIC IMPORTED) + set_target_properties(Azure::azure-identity + PROPERTIES IMPORTED_LOCATION "${AZURE_IDENTITY_STATIC_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES + "${AZURESDK_INCLUDE_DIR}") + target_link_libraries(Azure::azure-identity INTERFACE CURL::libcurl LibXml2::LibXml2) + add_dependencies(Azure::azure-identity azure_identity_ep) + + set(AZURE_STORAGE_BLOBS_STATIC_LIBRARY + "${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-blobs${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + externalproject_add(azure_storage_blobs_ep + ${EP_LOG_OPTIONS} + INSTALL_DIR ${AZURESDK_PREFIX} + URL ${AZURE_STORAGE_BLOBS_SOURCE_URL} + URL_HASH "SHA256=${ARROW_AZURE_STORAGE_BLOBS_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS} + BUILD_BYPRODUCTS ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY}) + add_library(Azure::azure-storage-blobs STATIC IMPORTED) + set_target_properties(Azure::azure-storage-blobs + PROPERTIES IMPORTED_LOCATION + "${AZURE_STORAGE_BLOBS_STATIC_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES + "${AZURESDK_INCLUDE_DIR}") + target_link_libraries(Azure::azure-storage-blobs + INTERFACE Azure::azure-core CURL::libcurl LibXml2::LibXml2) + add_dependencies(Azure::azure-storage-blobs azure_storage_blobs_ep) + + set(AZURE_STORAGE_COMMON_STATIC_LIBRARY + "${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-common${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + externalproject_add(azure_storage_common_ep + ${EP_LOG_OPTIONS} + INSTALL_DIR ${AZURESDK_PREFIX} + URL ${AZURE_STORAGE_COMMON_SOURCE_URL} + URL_HASH "SHA256=${ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS} + BUILD_BYPRODUCTS ${AZURE_STORAGE_COMMON_STATIC_LIBRARY}) + add_library(Azure::azure-storage-common STATIC IMPORTED) + set_target_properties(Azure::azure-storage-common + PROPERTIES IMPORTED_LOCATION + "${AZURE_STORAGE_COMMON_STATIC_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES + "${AZURESDK_INCLUDE_DIR}") + target_link_libraries(Azure::azure-storage-common INTERFACE CURL::libcurl + LibXml2::LibXml2) + add_dependencies(Azure::azure-storage-common azure_storage_common_ep) + set_property(TARGET Azure::azure-storage-common PROPERTY INTERFACE_LINK_LIBRARIES + OpenSSL::Crypto) + + set(AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY + "${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-files-datalake${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + externalproject_add(azure_storage_files_datalake_ep + ${EP_LOG_OPTIONS} + INSTALL_DIR ${AZURESDK_PREFIX} + URL ${AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL} + URL_HASH "SHA256=${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM}" + CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS} + BUILD_BYPRODUCTS ${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY}) + add_library(Azure::azure-storage-files-datalake STATIC IMPORTED) + set_target_properties(Azure::azure-storage-files-datalake + PROPERTIES IMPORTED_LOCATION + "${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES + "${AZURESDK_INCLUDE_DIR}") + target_link_libraries(Azure::azure-storage-files-datalake + INTERFACE Azure::azure-core + Azure::azure-identity + Azure::azure-storage-blobs + Azure::azure-storage-common + CURL::libcurl + LibXml2::LibXml2) + add_dependencies(Azure::azure-storage-files-datalake azure_storage_files_datalake_ep) + + set(AZURESDK_LIBRARIES) + list(APPEND + AZURESDK_LIBRARIES + Azure::azure-core + Azure::azure-storage-blobs + Azure::azure-identity + Azure::azure-storage-common + Azure::azure-storage-files-datalake) + list(APPEND + ARROW_BUNDLED_STATIC_LIBS + Azure::azure-core + Azure::azure-storage-blobs + Azure::azure-identity + Azure::azure-storage-common + Azure::azure-storage-files-datalake) + + set(AZURESDK_LINK_LIBRARIES ${AZURESDK_LIBRARIES}) +endmacro() + +if(ARROW_AZURE) + build_azuresdk() + message(STATUS "Found Azure SDK headers: ${AZURESDK_INCLUDE_DIR}") + message(STATUS "Found Azure SDK libraries: ${AZURESDK_LINK_LIBRARIES}") +endif() + # ---------------------------------------------------------------------- # ucx - communication framework for modern, high-bandwidth and low-latency networks diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 8edaa422b3dcf..06cf0964fe3c7 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -53,6 +53,16 @@ ARROW_AWS_LC_BUILD_VERSION=v1.3.0 ARROW_AWS_LC_BUILD_SHA256_CHECKSUM=ae96a3567161552744fc0cae8b4d68ed88b1ec0f3d3c98700070115356da5a37 ARROW_AWSSDK_BUILD_VERSION=1.10.55 ARROW_AWSSDK_BUILD_SHA256_CHECKSUM=2d552fb1a84bef4a9b65e34aa7031851ed2aef5319e02cc6e4cb735c48aa30de +ARROW_AZURE_CORE_BUILD_VERSION=1.7.1 +ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM=ae6f03e65d9773d11cf3b9619d0bc7f567272974cf31b9e1c8ca2fa0ea4fb4c6 +ARROW_AZURE_IDENTITY_BUILD_VERSION=1.3.0 +ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM=46701acd8000f317d1c4b33263d5d3203924fadcfa5af4860ae9187046a72c45 +ARROW_AZURE_STORAGE_BLOBS_BUILD_VERSION=12.5.0 +ARROW_AZURE_STORAGE_BLOBS_BUILD_SHA256_CHECKSUM=12394d864144ced9fc3562ad48cfe3426604e871b5aa72853ca398e086f0c594 +ARROW_AZURE_STORAGE_COMMON_BUILD_VERSION=12.2.4 +ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM=7644b4355b492ba2039236b9fd56c3e7bb80aad983d8bac6a731d74aaf64e03f +ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_VERSION=12.3.1 +ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM=a5b74076a751d7cfaf7c56674a40ce2792c4fab9add18758fab1fe091d00baff ARROW_BOOST_BUILD_VERSION=1.81.0 ARROW_BOOST_BUILD_SHA256_CHECKSUM=9e0ffae35528c35f90468997bc8d99500bf179cbae355415a89a600c38e13574 ARROW_BROTLI_BUILD_VERSION=v1.0.9 diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index 71855dafdea35..37afc531aba81 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -14,6 +14,11 @@ "transfer" ] }, + "azure-core-cpp", + "azure-identity-cpp", + "azure-storage-blobs-cpp", + "azure-storage-common-cpp", + "azure-storage-files-datalake-cpp", "benchmark", "boost-filesystem", "boost-multiprecision",