Skip to content

Commit

Permalink
Shefali pr rebased (#6)
Browse files Browse the repository at this point in the history
* ARROW-2034: [C++] Filesystem implementation for AzureBlobFileSystem

* ARROW-2034: [C++] Fixed formatting issues

* ARROW-2034: [C++] Fixed formatting issues

* Added -DARROW_AZURE in ci

* Added CXX_STANDARD and CXX_STANDARD_REQUIRED

* Added mocked test file

* Turned -DARROW_AZURE=OFF in appveyor-cpp-build

* Changed default C++ version

* Changed LibXml2 target

* Fixing CMake styling issues

* Enabling ARROW_AZURE flag

* Added OpenSSL dependency

* Disabling ARROW_AZURE in windows-mingw

* Fixing lint issues

* Fixing azurefs_test

* Added Azurite

* Added azurefs_objlib

* Reverting azure object library changes

* Added permissions to install_azurite.sh

* chmod +x ci/scripts/install_azurite.sh

* Don't specify CMAKE_CXX_STANDARD by default

* Fix system detection

* Fix syntax

* Fix style

* Fix style

* Running azurite through boost::process

* Fixed naming in azurefs_test.cc

* Fixed naming in azurefs.cc

* Fixed OpenOutputStream

* Added uri.Parse()

* Updated versions.txt

* Fixed ARROW_AZURE_STORAGE_BLOBS_URL

* Added libxml2-dev

* Fixed build errors

---------

Co-authored-by: shefali singh <shefalisingh@microsoft.com>
Co-authored-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
3 people committed Jul 22, 2023
1 parent a2a3b95 commit eca15b9
Show file tree
Hide file tree
Showing 9 changed files with 267 additions and 0 deletions.
1 change: 1 addition & 0 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ pushd cpp\build

cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
-DARROW_ACERO=ON ^
-DARROW_AZURE=OFF ^
-DARROW_BOOST_USE_SHARED=ON ^
-DARROW_BUILD_EXAMPLES=ON ^
-DARROW_BUILD_STATIC=OFF ^
Expand Down
5 changes: 5 additions & 0 deletions ci/docker/ubuntu-20.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,12 @@ RUN apt-get update -y -q && \
libssl-dev \
libthrift-dev \
libutf8proc-dev \
libxml2-dev \
libzstd-dev \
make \
ninja-build \
nlohmann-json3-dev \
npm \
pkg-config \
protobuf-compiler \
python3-dev \
Expand All @@ -123,6 +125,9 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_azurite.sh

COPY ci/scripts/install_ceph.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_ceph.sh

Expand Down
5 changes: 5 additions & 0 deletions ci/docker/ubuntu-22.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,12 @@ RUN apt-get update -y -q && \
libssl-dev \
libthrift-dev \
libutf8proc-dev \
libxml2-dev \
libzstd-dev \
make \
ninja-build \
nlohmann-json3-dev \
npm \
pkg-config \
protobuf-compiler \
protobuf-compiler-grpc \
Expand Down Expand Up @@ -156,6 +158,9 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_azurite.sh

# Prioritize system packages and local installation
# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
Expand Down
37 changes: 37 additions & 0 deletions ci/scripts/install_azurite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -e

case "$(uname)" in
Darwin)
npm install -g azurite
which azurite
;;
MINGW*)
choco install nodejs.install
npm install -g azurite
;;
Linux)
npm install -g azurite
which azurite
;;
esac
echo "node version = $(node --version)"
echo "azurite version = $(azurite --version)"
2 changes: 2 additions & 0 deletions cpp/Brewfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ brew "grpc"
brew "llvm@14"
brew "lz4"
brew "ninja"
brew "node"
brew "numpy"
brew "openssl@3"
brew "protobuf"
brew "python"
Expand Down
5 changes: 5 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -795,6 +795,11 @@ if(ARROW_WITH_OPENTELEMETRY)
list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl)
endif()

if(ARROW_AZURE)
list(APPEND ARROW_SHARED_LINK_LIBS ${AZURESDK_LINK_LIBRARIES})
list(APPEND ARROW_STATIC_LINK_LIBS ${AZURESDK_LINK_LIBRARIES})
endif()

if(ARROW_WITH_UTF8PROC)
list(APPEND ARROW_SHARED_LINK_LIBS utf8proc::utf8proc)
list(APPEND ARROW_STATIC_LINK_LIBS utf8proc::utf8proc)
Expand Down
197 changes: 197 additions & 0 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,47 @@ else()
"${THIRDPARTY_MIRROR_URL}/aws-sdk-cpp-${ARROW_AWSSDK_BUILD_VERSION}.tar.gz")
endif()

if(DEFINED ENV{ARROW_AZURE_CORE_URL})
set(AZURE_CORE_SOURCE_URL "$ENV{ARROW_AZURE_CORE_URL}")
else()
set_urls(AZURE_CORE_SOURCE_URL
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-core_${ARROW_AZURE_CORE_BUILD_VERSION}.tar.gz"
)
endif()

if(DEFINED ENV{ARROW_AZURE_IDENTITY_URL})
set(AZURE_IDENTITY_SOURCE_URL "$ENV{ARROW_AZURE_IDENTITY_URL}")
else()
set_urls(AZURE_IDENTITY_SOURCE_URL
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-identity_${ARROW_AZURE_IDENTITY_BUILD_VERSION}.tar.gz"
)
endif()

if(DEFINED ENV{ARROW_AZURE_STORAGE_BLOBS_URL})
set(AZURE_STORAGE_BLOBS_SOURCE_URL "$ENV{ARROW_AZURE_STORAGE_BLOBS_URL}")
else()
set_urls(AZURE_STORAGE_BLOBS_SOURCE_URL
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-blobs_${ARROW_AZURE_STORAGE_BLOBS_BUILD_VERSION}.tar.gz"
)
endif()

if(DEFINED ENV{ARROW_AZURE_STORAGE_COMMON_URL})
set(AZURE_STORAGE_COMMON_SOURCE_URL "$ENV{ARROW_AZURE_STORAGE_COMMON_URL}")
else()
set_urls(AZURE_STORAGE_COMMON_SOURCE_URL
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-common_${ARROW_AZURE_STORAGE_COMMON_BUILD_VERSION}.tar.gz"
)
endif()

if(DEFINED ENV{ARROW_AZURE_STORAGE_FILES_DATALAKE_URL})
set(AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL
"$ENV{ARROW_AZURE_STORAGE_FILES_DATALAKE_URL}")
else()
set_urls(AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL
"https://github.com/Azure/azure-sdk-for-cpp/archive/azure-storage-files-datalake_${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_VERSION}.tar.gz"
)
endif()

if(DEFINED ENV{ARROW_BOOST_URL})
set(BOOST_SOURCE_URL "$ENV{ARROW_BOOST_URL}")
else()
Expand Down Expand Up @@ -5042,6 +5083,162 @@ if(ARROW_S3)
endif()
endif()

macro(build_azuresdk)
message(STATUS "Building Azure C++ SDK from source")

find_curl()
find_package(LibXml2 REQUIRED)
find_package(OpenSSL ${ARROW_OPENSSL_REQUIRED_VERSION} REQUIRED)

set(AZURESDK_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/azuresdk_ep-install")
set(AZURESDK_INCLUDE_DIR "${AZURESDK_PREFIX}/include")
set(AZURESDK_LIB_DIR "lib")

# provide hint for Azure SDK to link with the already located openssl
get_filename_component(OPENSSL_ROOT_HINT "${OPENSSL_INCLUDE_DIR}" DIRECTORY)

set(AZURESDK_COMMON_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS}
"-DCMAKE_INSTALL_PREFIX=${AZURESDK_PREFIX}"
"-DCMAKE_PREFIX_PATH=${AZURESDK_PREFIX}"
-DBUILD_SHARED_LIBS=OFF
-DCMAKE_INSTALL_LIBDIR=${AZURESDK_LIB_DIR}
-DDISABLE_AZURE_CORE_OPENTELEMETRY=ON
-DENABLE_TESTING=OFF
-DENABLE_UNITY_BUILD=ON
-DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_HINT}
-DWARNINGS_AS_ERRORS=OFF)

file(MAKE_DIRECTORY ${AZURESDK_INCLUDE_DIR})

set(AZURE_CORE_STATIC_LIBRARY
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-core${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
externalproject_add(azure_core_ep
${EP_LOG_OPTIONS}
INSTALL_DIR ${AZURESDK_PREFIX}
URL ${AZURE_CORE_SOURCE_URL}
URL_HASH "SHA256=${ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM}"
CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
BUILD_BYPRODUCTS ${AZURE_CORE_STATIC_LIBRARY})
add_library(Azure::azure-core STATIC IMPORTED)
set_target_properties(Azure::azure-core
PROPERTIES IMPORTED_LOCATION "${AZURE_CORE_STATIC_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES
"${AZURESDK_INCLUDE_DIR}")
target_link_libraries(Azure::azure-core INTERFACE CURL::libcurl LibXml2::LibXml2)
add_dependencies(Azure::azure-core azure_core_ep)

set(AZURE_IDENTITY_STATIC_LIBRARY
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-identity${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
externalproject_add(azure_identity_ep
${EP_LOG_OPTIONS}
INSTALL_DIR ${AZURESDK_PREFIX}
URL ${AZURE_IDENTITY_SOURCE_URL}
URL_HASH "SHA256=${ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM}"
CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
BUILD_BYPRODUCTS ${AZURE_IDENTITY_STATIC_LIBRARY})
add_library(Azure::azure-identity STATIC IMPORTED)
set_target_properties(Azure::azure-identity
PROPERTIES IMPORTED_LOCATION "${AZURE_IDENTITY_STATIC_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES
"${AZURESDK_INCLUDE_DIR}")
target_link_libraries(Azure::azure-identity INTERFACE CURL::libcurl LibXml2::LibXml2)
add_dependencies(Azure::azure-identity azure_identity_ep)

set(AZURE_STORAGE_BLOBS_STATIC_LIBRARY
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-blobs${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
externalproject_add(azure_storage_blobs_ep
${EP_LOG_OPTIONS}
INSTALL_DIR ${AZURESDK_PREFIX}
URL ${AZURE_STORAGE_BLOBS_SOURCE_URL}
URL_HASH "SHA256=${ARROW_AZURE_STORAGE_BLOBS_BUILD_SHA256_CHECKSUM}"
CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
BUILD_BYPRODUCTS ${AZURE_STORAGE_BLOBS_STATIC_LIBRARY})
add_library(Azure::azure-storage-blobs STATIC IMPORTED)
set_target_properties(Azure::azure-storage-blobs
PROPERTIES IMPORTED_LOCATION
"${AZURE_STORAGE_BLOBS_STATIC_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES
"${AZURESDK_INCLUDE_DIR}")
target_link_libraries(Azure::azure-storage-blobs
INTERFACE Azure::azure-core CURL::libcurl LibXml2::LibXml2)
add_dependencies(Azure::azure-storage-blobs azure_storage_blobs_ep)

set(AZURE_STORAGE_COMMON_STATIC_LIBRARY
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-common${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
externalproject_add(azure_storage_common_ep
${EP_LOG_OPTIONS}
INSTALL_DIR ${AZURESDK_PREFIX}
URL ${AZURE_STORAGE_COMMON_SOURCE_URL}
URL_HASH "SHA256=${ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM}"
CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
BUILD_BYPRODUCTS ${AZURE_STORAGE_COMMON_STATIC_LIBRARY})
add_library(Azure::azure-storage-common STATIC IMPORTED)
set_target_properties(Azure::azure-storage-common
PROPERTIES IMPORTED_LOCATION
"${AZURE_STORAGE_COMMON_STATIC_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES
"${AZURESDK_INCLUDE_DIR}")
target_link_libraries(Azure::azure-storage-common INTERFACE CURL::libcurl
LibXml2::LibXml2)
add_dependencies(Azure::azure-storage-common azure_storage_common_ep)
set_property(TARGET Azure::azure-storage-common PROPERTY INTERFACE_LINK_LIBRARIES
OpenSSL::Crypto)

set(AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY
"${AZURESDK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}azure-storage-files-datalake${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
externalproject_add(azure_storage_files_datalake_ep
${EP_LOG_OPTIONS}
INSTALL_DIR ${AZURESDK_PREFIX}
URL ${AZURE_STORAGE_FILES_DATALAKE_SOURCE_URL}
URL_HASH "SHA256=${ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM}"
CMAKE_ARGS ${AZURESDK_COMMON_CMAKE_ARGS}
BUILD_BYPRODUCTS ${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY})
add_library(Azure::azure-storage-files-datalake STATIC IMPORTED)
set_target_properties(Azure::azure-storage-files-datalake
PROPERTIES IMPORTED_LOCATION
"${AZURE_STORAGE_FILES_DATALAKE_STATIC_LIBRARY}"
INTERFACE_INCLUDE_DIRECTORIES
"${AZURESDK_INCLUDE_DIR}")
target_link_libraries(Azure::azure-storage-files-datalake
INTERFACE Azure::azure-core
Azure::azure-identity
Azure::azure-storage-blobs
Azure::azure-storage-common
CURL::libcurl
LibXml2::LibXml2)
add_dependencies(Azure::azure-storage-files-datalake azure_storage_files_datalake_ep)

set(AZURESDK_LIBRARIES)
list(APPEND
AZURESDK_LIBRARIES
Azure::azure-core
Azure::azure-storage-blobs
Azure::azure-identity
Azure::azure-storage-common
Azure::azure-storage-files-datalake)
list(APPEND
ARROW_BUNDLED_STATIC_LIBS
Azure::azure-core
Azure::azure-storage-blobs
Azure::azure-identity
Azure::azure-storage-common
Azure::azure-storage-files-datalake)

set(AZURESDK_LINK_LIBRARIES ${AZURESDK_LIBRARIES})
endmacro()

if(ARROW_AZURE)
build_azuresdk()
message(STATUS "Found Azure SDK headers: ${AZURESDK_INCLUDE_DIR}")
message(STATUS "Found Azure SDK libraries: ${AZURESDK_LINK_LIBRARIES}")
endif()

# ----------------------------------------------------------------------
# ucx - communication framework for modern, high-bandwidth and low-latency networks

Expand Down
10 changes: 10 additions & 0 deletions cpp/thirdparty/versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ ARROW_AWS_LC_BUILD_VERSION=v1.3.0
ARROW_AWS_LC_BUILD_SHA256_CHECKSUM=ae96a3567161552744fc0cae8b4d68ed88b1ec0f3d3c98700070115356da5a37
ARROW_AWSSDK_BUILD_VERSION=1.10.55
ARROW_AWSSDK_BUILD_SHA256_CHECKSUM=2d552fb1a84bef4a9b65e34aa7031851ed2aef5319e02cc6e4cb735c48aa30de
ARROW_AZURE_CORE_BUILD_VERSION=1.7.1
ARROW_AZURE_CORE_BUILD_SHA256_CHECKSUM=ae6f03e65d9773d11cf3b9619d0bc7f567272974cf31b9e1c8ca2fa0ea4fb4c6
ARROW_AZURE_IDENTITY_BUILD_VERSION=1.3.0
ARROW_AZURE_IDENTITY_BUILD_SHA256_CHECKSUM=46701acd8000f317d1c4b33263d5d3203924fadcfa5af4860ae9187046a72c45
ARROW_AZURE_STORAGE_BLOBS_BUILD_VERSION=12.5.0
ARROW_AZURE_STORAGE_BLOBS_BUILD_SHA256_CHECKSUM=12394d864144ced9fc3562ad48cfe3426604e871b5aa72853ca398e086f0c594
ARROW_AZURE_STORAGE_COMMON_BUILD_VERSION=12.2.4
ARROW_AZURE_STORAGE_COMMON_BUILD_SHA256_CHECKSUM=7644b4355b492ba2039236b9fd56c3e7bb80aad983d8bac6a731d74aaf64e03f
ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_VERSION=12.3.1
ARROW_AZURE_STORAGE_FILES_DATALAKE_BUILD_SHA256_CHECKSUM=a5b74076a751d7cfaf7c56674a40ce2792c4fab9add18758fab1fe091d00baff
ARROW_BOOST_BUILD_VERSION=1.81.0
ARROW_BOOST_BUILD_SHA256_CHECKSUM=9e0ffae35528c35f90468997bc8d99500bf179cbae355415a89a600c38e13574
ARROW_BROTLI_BUILD_VERSION=v1.0.9
Expand Down
5 changes: 5 additions & 0 deletions cpp/vcpkg.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
"transfer"
]
},
"azure-core-cpp",
"azure-identity-cpp",
"azure-storage-blobs-cpp",
"azure-storage-common-cpp",
"azure-storage-files-datalake-cpp",
"benchmark",
"boost-filesystem",
"boost-multiprecision",
Expand Down

0 comments on commit eca15b9

Please sign in to comment.