Skip to content

Commit

Permalink
ARROW-17692: [R] Add support for building with system AWS SDK C++ (#1…
Browse files Browse the repository at this point in the history
…4235)

This PR uses "pkg-config --static ... arrow" to collect build flags. "pkg-config --static ... arrow" reports suitable build flags that depend on build options and used libraries for Apache Arrow C++. This works with the system AWS SDK C++.

Lead-authored-by: Sutou Kouhei <kou@clear-code.com>
Co-authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
thisisnic authored Dec 17, 2022
1 parent 89d9fa3 commit dca8c07
Show file tree
Hide file tree
Showing 17 changed files with 329 additions and 110 deletions.
5 changes: 1 addition & 4 deletions ci/docker/linux-r.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ ENV PATH "${RPREFIX}/bin:${PATH}"
# Patch up some of the docker images
COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/
COPY ci/etc/rprofile /arrow/ci/etc/
COPY ci/scripts/r_install_system_dependencies.sh /arrow/ci/scripts/
COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/r_docker_configure.sh
Expand All @@ -53,10 +54,6 @@ RUN /arrow/ci/scripts/r_docker_configure.sh
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# Set up Python 3 and its dependencies
RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
ln -s /usr/bin/pip3 /usr/local/bin/pip

COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
COPY r/DESCRIPTION /arrow/r/
RUN /arrow/ci/scripts/r_deps.sh /arrow
8 changes: 8 additions & 0 deletions ci/docker/ubuntu-18.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,19 @@ RUN apt-get update -y -q && \
libcurl4-openssl-dev \
libgflags-dev \
libgoogle-glog-dev \
libidn2-dev \
libkrb5-dev \
libldap-dev \
liblz4-dev \
libnghttp2-dev \
libprotobuf-dev \
libprotoc-dev \
libpsl-dev \
libre2-dev \
librtmp-dev \
libsnappy-dev \
libssh-dev \
libssh2-1-dev \
libssl-dev \
ninja-build \
pkg-config \
Expand Down
8 changes: 8 additions & 0 deletions ci/docker/ubuntu-20.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,20 @@ RUN apt-get update -y -q && \
libcurl4-openssl-dev \
libgflags-dev \
libgoogle-glog-dev \
libidn2-dev \
libkrb5-dev \
libldap-dev \
liblz4-dev \
libnghttp2-dev \
libprotobuf-dev \
libprotoc-dev \
libpsl-dev \
libradospp-dev \
libre2-dev \
librtmp-dev \
libsnappy-dev \
libssh-dev \
libssh2-1-dev \
libssl-dev \
libthrift-dev \
libutf8proc-dev \
Expand Down
10 changes: 9 additions & 1 deletion ci/docker/ubuntu-22.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,21 @@ RUN apt-get update -y -q && \
libgflags-dev \
libgoogle-glog-dev \
libgrpc++-dev \
libidn2-dev \
libkrb5-dev \
libldap-dev \
liblz4-dev \
libnghttp2-dev \
libprotobuf-dev \
libprotoc-dev \
libpsl-dev \
libre2-dev \
librtmp-dev \
libsnappy-dev \
libssl-dev \
libsqlite3-dev \
libssh-dev \
libssh2-1-dev \
libssl-dev \
libthrift-dev \
libutf8proc-dev \
libzstd-dev \
Expand Down
18 changes: 2 additions & 16 deletions ci/scripts/r_docker_configure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,8 @@ if [[ -n "$DEVTOOLSET_VERSION" ]]; then
fi
fi

if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
# Install curl and openssl for S3/GCS support
if [ "$PACKAGE_MANAGER" = "apt-get" ]; then
apt-get install -y libcurl4-openssl-dev libssl-dev
else
$PACKAGE_MANAGER install -y libcurl-devel openssl-devel
fi

# The Dockerfile should have put this file here
if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then
${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh latest /usr/local
fi

if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh" ] && [ "`which pip`" ]; then
${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh default
fi
if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh" ]; then
"${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh"
fi

# Install rsync for bundling cpp source and curl to make sure it is installed on all images
Expand Down
141 changes: 141 additions & 0 deletions ci/scripts/r_install_system_dependencies.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -ex

: ${ARROW_SOURCE_HOME:=/arrow}

if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
# Figure out what package manager we have
if [ "`which dnf`" ]; then
PACKAGE_MANAGER=dnf
elif [ "`which yum`" ]; then
PACKAGE_MANAGER=yum
elif [ "`which zypper`" ]; then
PACKAGE_MANAGER=zypper
else
PACKAGE_MANAGER=apt-get
apt-get update
fi

# Install curl and OpenSSL for S3/GCS support
#
# We need to install all dependencies explicitly to use "pkg-config
# --static --libs libcurl" result. Because libcurl-dev/libcurl-devel
# don't depend on packages that are only needed for "pkg-config
# --static".
case "$PACKAGE_MANAGER" in
apt-get)
# "pkg-config --static --libs libcurl" has
# * -lnghttp2
# * -lidn2
# * -lrtmp
# * -lssh or -lssh2
# * -lpsl
# * -lssl
# * -lcrypto
# * -lgssapi_krb5
# * -lkrb5
# * -lk5crypto
# * -lcom_err
# * -lldap
# * -llber
# * -lzstd
# * -lbrotlidec
# * -lz
apt-get install -y \
libbrotli-dev \
libcurl4-openssl-dev \
libidn2-dev \
libkrb5-dev \
libldap-dev \
libnghttp2-dev \
libpsl-dev \
librtmp-dev \
libssh-dev \
libssh2-1-dev \
libssl-dev \
libzstd-dev
;;
dnf|yum)
# "pkg-config --static --libs libcurl" has -lidl, -lssh2 and -lldap
$PACKAGE_MANAGER install -y \
libcurl-devel \
libidn-devel \
libssh2-devel \
openldap-devel \
openssl-devel
;;
zypper)
# "pkg-config --static --libs libcurl" has
# * -lnghttp2
# * -lidn2
# * -lssh
# * -lpsl
# * -lssl
# * -lcrypto
# * -lgssapi_krb5
# * -lkrb5
# * -lk5crypto
# * -lcom_err
# * -lldap
# * -llber
# * -lzstd
# * -lbrotlidec
# * -lz
$PACKAGE_MANAGER install -y \
krb5-devel \
libbrotli-devel \
libcurl-devel \
libidn2-devel \
libnghttp2-devel \
libpsl-devel \
libssh-devel \
libzstd-devel \
openldap2-devel \
openssl-devel
;;
*)
$PACKAGE_MANAGER install -y libcurl-devel openssl-devel
;;
esac

# The Dockerfile should have put this file here
if [ "$ARROW_S3" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then
"${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" latest /usr/local
fi

if [ "$ARROW_GCS" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh" ]; then
case "$PACKAGE_MANAGER" in
zypper)
# python3 is Python 3.6 on OpenSUSE 15.3.
# PyArrow supports Python 3.7 or later.
$PACKAGE_MANAGER install -y python39-pip
ln -s /usr/bin/python3.9 /usr/local/bin/python
ln -s /usr/bin/pip3.9 /usr/local/bin/pip
;;
*)
$PACKAGE_MANAGER install -y python3-pip
ln -s /usr/bin/python3 /usr/local/bin/python
ln -s /usr/bin/pip3 /usr/local/bin/pip
;;
esac
"${ARROW_SOURCE_HOME}/ci/scripts/install_gcs_testbench.sh" default
fi
fi
6 changes: 5 additions & 1 deletion cpp/cmake_modules/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -926,7 +926,11 @@ function(ARROW_INSTALL_ALL_HEADERS PATH)
endfunction()

function(ARROW_ADD_PKG_CONFIG MODULE)
configure_file(${MODULE}.pc.in "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc" @ONLY)
configure_file(${MODULE}.pc.in "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc.generate.in"
@ONLY)
file(GENERATE
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc"
INPUT "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc.generate.in")
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/${MODULE}.pc"
DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/")
endfunction()
Expand Down
30 changes: 29 additions & 1 deletion cpp/cmake_modules/FindzstdAlt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ else()
find_package(PkgConfig QUIET)
pkg_check_modules(ZSTD_PC libzstd)
if(ZSTD_PC_FOUND)
set(zstdAlt_VERSION "${ZSTD_PC_VERSION}")
set(ZSTD_INCLUDE_DIR "${ZSTD_PC_INCLUDEDIR}")

list(APPEND ZSTD_PC_LIBRARY_DIRS "${ZSTD_PC_LIBDIR}")
Expand All @@ -96,7 +97,34 @@ else()
endif()
endif()

find_package_handle_standard_args(zstdAlt REQUIRED_VARS ZSTD_LIB ZSTD_INCLUDE_DIR)
if("${zstdAlt_VERSION}" STREQUAL "" AND ZSTD_INCLUDE_DIR)
file(READ "${ZSTD_INCLUDE_DIR}/zstd.h" ZSTD_H_CONTENT)
string(REGEX MATCH "#define ZSTD_VERSION_MAJOR +([0-9]+)" ZSTD_VERSION_MAJOR_DEFINITION
"${ZSTD_H_CONTENT}")
string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" ZSTD_VERSION_MAJOR
"${ZSTD_VERSION_MAJOR_DEFINITION}")
string(REGEX MATCH "#define ZSTD_VERSION_MINOR +([0-9]+)" ZSTD_VERSION_MINOR_DEFINITION
"${ZSTD_H_CONTENT}")
string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" ZSTD_VERSION_MINOR
"${ZSTD_VERSION_MINOR_DEFINITION}")
string(REGEX MATCH "#define ZSTD_VERSION_RELEASE +([0-9]+)"
ZSTD_VERSION_RELEASE_DEFINITION "${ZSTD_H_CONTENT}")
string(REGEX REPLACE "^.+ ([0-9]+)$" "\\1" ZSTD_VERSION_RELEASE
"${ZSTD_VERSION_RELEASE_DEFINITION}")
if("${ZSTD_VERSION_MAJOR}" STREQUAL ""
OR "${ZSTD_VERSION_MINOR}" STREQUAL ""
OR "${ZSTD_VERSION_RELEASE}" STREQUAL "")
set(zstdAlt_VERSION "0.0.0")
else()
set(zstdAlt_VERSION
"${ZSTD_VERSION_MAJOR}.${ZSTD_VERSION_MINOR}.${ZSTD_VERSION_RELEASE}")
endif()
endif()

find_package_handle_standard_args(
zstdAlt
REQUIRED_VARS ZSTD_LIB ZSTD_INCLUDE_DIR
VERSION_VAR zstdAlt_VERSION)

if(zstdAlt_FOUND)
if(ARROW_ZSTD_USE_SHARED)
Expand Down
12 changes: 11 additions & 1 deletion cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4225,7 +4225,7 @@ macro(build_google_cloud_cpp_storage)
endmacro()

if(ARROW_WITH_GOOGLE_CLOUD_CPP)
resolve_dependency(google_cloud_cpp_storage)
resolve_dependency(google_cloud_cpp_storage PC_PACKAGE_NAMES google_cloud_cpp_storage)
get_target_property(google_cloud_cpp_storage_INCLUDE_DIR google-cloud-cpp::storage
INTERFACE_INCLUDE_DIRECTORIES)
message(STATUS "Found google-cloud-cpp::storage headers: ${google_cloud_cpp_storage_INCLUDE_DIR}"
Expand Down Expand Up @@ -4767,6 +4767,16 @@ if(ARROW_S3)
message(STATUS "Found AWS SDK headers: ${AWSSDK_INCLUDE_DIR}")
message(STATUS "Found AWS SDK libraries: ${AWSSDK_LINK_LIBRARIES}")

if(${AWSSDK_SOURCE} STREQUAL "SYSTEM")
foreach(AWSSDK_LINK_LIBRARY ${AWSSDK_LINK_LIBRARIES})
string(APPEND ARROW_PC_LIBS_PRIVATE " $<TARGET_FILE:${AWSSDK_LINK_LIBRARY}>")
endforeach()
endif()
if(UNIX)
string(APPEND ARROW_PC_REQUIRES_PRIVATE " libcurl")
endif()
string(APPEND ARROW_PC_REQUIRES_PRIVATE " openssl")

if(APPLE)
# CoreFoundation's path is hardcoded in the CMake files provided by
# aws-sdk-cpp to use the MacOSX SDK provided by XCode which makes
Expand Down
2 changes: 2 additions & 0 deletions dev/tasks/conda-recipes/r-arrow/build.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,7 @@ if [[ "${target_platform}" == osx-* ]]; then
export ARROW_R_CXXFLAGS="${ARROW_R_CXXFLAGS} -D_LIBCPP_DISABLE_AVAILABILITY"
fi

export PKG_CONFIG_PATH="${PREFIX}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"

# ${R_ARGS} necessary to support cross-compilation
${R} CMD INSTALL --build r/. ${R_ARGS}
14 changes: 6 additions & 8 deletions dev/tasks/r/github.linux.offline.build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,12 @@ jobs:
path: arrow/r/
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt install libcurl4-openssl-dev libssl-dev
sudo arrow/ci/scripts/r_install_system_dependencies.sh
arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
- name: Prepare PKG_CONFIG_PATH for Homebrew
run: |
# zstd is installed by Homebrew on GitHub Actions.
echo "PKG_CONFIG_PATH=$(brew --prefix)/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}" \
>> "$GITHUB_ENV"
env:
ARROW_GCS: "ON"
ARROW_S3: "ON"
ARROW_SOURCE_HOME: arrow
- name: Install dependencies
run: |
install.packages(c("remotes", "glue", "sys"))
Expand All @@ -83,7 +81,7 @@ jobs:
env:
TEST_OFFLINE_BUILD: true
LIBARROW_MINIMAL: false
{{ macros.github_set_sccache_envvars()|indent(8)}}
{{ macros.github_set_sccache_envvars()|indent(8)}}
run: |
cd arrow/r
R CMD INSTALL --install-tests --no-test-load --no-docs --no-help --no-byte-compile arrow_with_deps.tar.gz
Expand Down
Loading

0 comments on commit dca8c07

Please sign in to comment.