Skip to content

Commit

Permalink
Enable AWSSDK on Linux by statically linking OpenSSL and cURL (#421)
Browse files Browse the repository at this point in the history
Summary:
This PR adds all supports to enable awssdk for linux releases
- The linux release workflow starts to use `pytorch/manylinux-cpu` docker to align with compiler version with PyTorch Core
- Add shell script to install OpenSSL and cURL on docker image (This step can be improved if TorchData wants to own a new docker image with these two libraries pre-built)
- Update `torchdata/csrc/CMakeLists.txt` to respect the static OpenSSL and cURL
- Correct `thrid_party/CMakeLists.txt` without adding redundant dependencies for `_torchdata`
- Add `auditwheel show` to validate the binaries are `manylinux_2_17` (alias of `manylinux2014`)
- Update Readme for release
- Make our testing CI running with AWSSDK enabled by default to save more time/money to run CI tests
- Remove redundant `numpy` dependency introduced by `tf_record`. And, fix tests in this file

See the workflow for night release: https://github.com/pytorch/data/actions/runs/2391843245
See the `manylinux` version: https://github.com/pytorch/data/runs/6612861049?check_suite_focus=true#step:9:67

`manylinux_2_17_x86_64` is the alias of `manylinux_2014`

Pull Request resolved: #421

Reviewed By: NivekT

Differential Revision: D36641092

Pulled By: ejguan

fbshipit-source-id: 349bfd896ee0db01eea849580984f4000ca2bc3f
  • Loading branch information
ejguan authored and facebook-github-bot committed May 31, 2022
1 parent 3d967c6 commit 7fde5a7
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 45 deletions.
67 changes: 39 additions & 28 deletions .github/workflows/_build_test_upload.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
wheel_build_test:
needs: get_release_type
runs-on: ${{ matrix.os }}
# container: ${{ startsWith( matrix.os, 'ubuntu' ) && 'quay.io/pypa/manylinux2014_x86_64' || null }}
container: ${{ startsWith( matrix.os, 'ubuntu' ) && 'pytorch/manylinux-cpu' || null }}
strategy:
fail-fast: false
matrix:
Expand All @@ -69,7 +69,7 @@ jobs:
- "3.10"
steps:
- name: Setup Python ${{ matrix.python-version }}
# if: ${{ ! startsWith( matrix.os, 'ubuntu' ) }}
if: ${{ ! startsWith( matrix.os, 'ubuntu' ) }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
Expand All @@ -88,61 +88,72 @@ jobs:
arch: x64
- name: Install Build Dependency
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# yum -y install ninja-build
# yum -y install openssl-devel openssl-static curl-devel zlib-devel
# else
pip install cmake ninja
echo "/home/runner/.local/bin" >> $GITHUB_PATH
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
yum -y install ninja-build zlib-static
# Docker path is /__w by default
export WORKSPACE="/__w"
# Install static OpenSSL/libcrypto library
./packaging/manylinux/install_openssl_curl.sh
else
pip install cmake ninja
echo "/home/runner/.local/bin" >> $GITHUB_PATH
fi
- name: Install PyTorch and Build TorchData Wheel
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
PYTORCH_VERSION: ${{ inputs.pytorch_version }}
BUILD_S3: ${{ startsWith( matrix.os, 'ubuntu' ) && 'OFF' || 'ON' }}
BUILD_S3: 1
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# # See: https://github.com/actions/checkout/issues/760
# git config --global --add safe.directory /__w/data/data
# source packaging/manylinux_wheel_helper.sh
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
# See: https://github.com/actions/checkout/issues/760
git config --global --add safe.directory "$WORKSPACE/data/data"
# AWSSDK uses $CMAKE_PREFIX_PATH to find openssl
export OPENSSL_ROOT_DIR="/__w/ssl"
export CURL_ROOT_DIR="/__w/curl"
export CMAKE_PREFIX_PATH="$OPENSSL_ROOT_DIR:$CURL_ROOT_DIR:$CMAKE_PREFIX_PATH"
export STATIC_DEPS=TRUE
fi
packaging/build_wheel.sh
- name: Validate TorchData Wheel
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# source packaging/manylinux_wheel_helper.sh
# pip3 install auditwheel
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
pip3 install auditwheel
fi
pip3 install pkginfo
for pkg in dist/torchdata*.whl; do
echo "PkgInfo of $pkg:"
pkginfo $pkg
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# auditwheel show $pkg
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
auditwheel show $pkg
fi
done
- name: Install TorchData Wheel
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# source packaging/manylinux_wheel_helper.sh
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
fi
pip3 install dist/torchdata*.whl
- name: Run DataPipes Tests with pytest
shell: bash
env:
PYTHON_VERSION: ${{ matrix.python-version }}
run: |
# if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
# source packaging/manylinux_wheel_helper.sh
# fi
if ${{ startsWith( matrix.os, 'ubuntu' ) }}; then
source packaging/manylinux/python_helper.sh
fi
pip3 install -r test/requirements.txt
pytest --no-header -v test --ignore=test/test_period.py --ignore=test/test_text_examples.py --ignore=test/test_audio_examples.py
- name: Upload Wheels to Github
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ jobs:
- 3.7
- 3.8
- 3.9
with-s3:
- 1
- 0
steps:
- name: Setup additional system libraries
if: startsWith( matrix.os, 'ubuntu' )
Expand All @@ -44,10 +41,10 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
- name: Setup msbuild on Windows
if: matrix.with-s3 == 1 && matrix.os == 'windows-latest'
if: matrix.os == 'windows-latest'
uses: microsoft/setup-msbuild@v1.1
- name: Set up Visual Studio shell
if: matrix.with-s3 == 1 && matrix.os == 'windows-latest'
if: matrix.os == 'windows-latest'
uses: egor-tensin/vs-shell@v2
with:
arch: x64
Expand All @@ -65,7 +62,7 @@ jobs:
run: |
python setup.py install
env:
BUILD_S3: ${{ matrix.with-s3 }}
BUILD_S3: 1
- name: Install test requirements
run: pip3 install -r test/requirements.txt
- name: Run DataPipes tests with pytest
Expand Down
13 changes: 13 additions & 0 deletions packaging/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,16 @@ PYTHON_VERSION=3.9 PYTORCH_VERSION=1.11.0 packaging/build_wheel.sh
```bash
PYTHON_VERSION=3.9 packaging/build_wheel.sh
```
## [`AWSSDK`](https://github.com/aws/aws-sdk-cpp)

The following table is the corresponding `torchdata` binaries with pre-compiled `AWSSDK` extension on different operating systems.

| `torchdata` | `Wheel` | `Conda` |
| ------------------ | ------------------ | ------------------ |
| Linux | :heavy_check_mark: | :heavy_check_mark: |
| Windows | :heavy_check_mark: | :x: |
| MacOS | :heavy_check_mark: | :heavy_check_mark: |

### Manylinux

`AWSSDK` requires OpenSSL and cURL. In order to provide `manylinux2014_x86_64` wheels with `AWSSDK` enabled, `torchdata` distributions are bundled with OpenSSL(1.1.1o) and cURL(7.38.1). If anything is out of date, please open an issue to request upgrading them.
58 changes: 58 additions & 0 deletions packaging/manylinux/install_openssl_curl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

OPENSSL_URL="https://www.openssl.org/source/"
OPENSSL_NAME="openssl-1.1.1o"
OPENSSL_SHA256="9384a2b0570dd80358841464677115df785edb941c71211f75076d72fe6b438f"
OPENSSL_BUILD_FLAGS="no-ssl2 no-zlib no-shared no-comp no-dynamic-engine enable-ec_nistp_64_gcc_128"

CURL_URL="https://github.com/curl/curl/releases/download"
CURL_NAME="curl-7.83.1"
CURL_BUILD_FLAGS="--disable-shared"

function check_sha256sum {
local fname=$1
local sha256=$2
echo "${sha256} ${fname}" > ${fname}.sha256
sha256sum -c ${fname}.sha256
rm ${fname}.sha256
}

yum erase -y openssl-devel curl-devel

pushd ${WORKSPACE}

# OpenSSL
curl -fsSL -o ${OPENSSL_NAME}.tar.gz ${OPENSSL_URL}/${OPENSSL_NAME}.tar.gz
check_sha256sum ${OPENSSL_NAME}.tar.gz ${OPENSSL_SHA256}
tar zxf ${OPENSSL_NAME}.tar.gz

pushd ${OPENSSL_NAME}

./config $OPENSSL_BUILD_FLAGS --prefix=${WORKSPACE}/ssl --openssldir=${WORKSPACE}/ssl
make -j4 > /dev/null
# avoid installing the docs
# https://github.com/openssl/openssl/issues/6685#issuecomment-403838728
make install_sw > /dev/null

popd
rm -rf ${OPENSSL_NAME} ${OPENSSL_NAME}.tar.gz

# cURL
curl -fsSL -o ${CURL_NAME}.tar.gz ${CURL_URL}/${CURL_NAME//./_}/${CURL_NAME}.tar.gz
tar zxf ${CURL_NAME}.tar.gz

pushd ${CURL_NAME}

./configure ${CURL_BUILD_FLAGS} --with-openssl=${WORKSPACE}/ssl --prefix=${WORKSPACE}/curl
make -j4 > /dev/null
make install > /dev/null

popd
rm -rf ${CURL_NAME} ${CURL_NAME}.tar.gz

popd
File renamed without changes.
5 changes: 2 additions & 3 deletions packaging/torchdata/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,8 @@ test:
# The following packages are not on the default conda channel
# - iopath
# - rarfile
# TODO: Re-enable it after #386 is landed
# commands:
# - pytest --no-header -v test --ignore=test/test_period.py --ignore=test/test_text_examples.py --ignore=test/test_audio_examples.py
commands:
- pytest --no-header -v test --ignore=test/test_period.py --ignore=test/test_text_examples.py --ignore=test/test_audio_examples.py

about:
home: https://github.com/pytorch/data
Expand Down
7 changes: 1 addition & 6 deletions third_party/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,8 @@ else()
endif()
endforeach()

if(WIN32)
list(APPEND AWSSDK_LIBRARIES "Userenv;version;ws2_32;Bcrypt;Wininet;winhttp;Crypt32;Secur32;NCrypt;Shlwapi")
elseif(APPLE)
list(APPEND AWSSDK_LIBRARIES "pthread;curl")
else()
if(UNIX AND NOT APPLE)
list(APPEND AWSSDK_LIBRARIES "${aws_cpp_sdk_INSTALL}/lib/libs2n.a")
list(APPEND AWSSDK_LIBRARIES "pthread;crypto;ssl;z;curl")
endif()

include(ExternalProject)
Expand Down
24 changes: 22 additions & 2 deletions torchdata/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,32 @@ if(BUILD_S3)
# see https://github.com/actions/setup-python/issues/121#issuecomment-1014500503
set(Python_FIND_FRAMEWORK "LAST")

if (WIN32)
if(WIN32)
find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Interpreter Development)
set(ADDITIONAL_ITEMS Python3::Python)
else()
find_package(Python3 COMPONENTS Interpreter Development)
endif()

# AWSSDK Dependencies
if(WIN32)
set(AWSSDK_DEP_LIBRARIES "Userenv;version;ws2_32;Bcrypt;Wininet;winhttp;Crypt32;Secur32;NCrypt;Shlwapi")
elseif(APPLE)
set(AWSSDK_DEP_LIBRARIES pthread curl)
elseif(UNIX)
set(AWSSDK_DEP_LIBRARIES pthread)
if(STATIC_DEPS)
set(OPENSSL_USE_STATIC_LIBS TRUE)
endif()
include(FindZLIB)
list(APPEND AWSSDK_DEP_LIBRARIES ${ZLIB_LIBRARIES})
include(FindCURL)
list(APPEND AWSSDK_DEP_LIBRARIES ${CURL_LIBRARIES})
include(FindOpenSSL)
list(APPEND AWSSDK_DEP_LIBRARIES ${OPENSSL_SSL_LIBRARIES} ${OPENSSL_CRYPTO_LIBRARIES})
endif()
message(STATUS "AWSSDK DEPENDENCIES AWSSDK_DEP_LIBRARIES: ${AWSSDK_DEP_LIBRARIES}")

set(CMAKE_POSITION_INDEPENDENT_CODE ON)

set(
Expand All @@ -44,9 +63,10 @@ if(BUILD_S3)
target_link_libraries(
_torchdata
PRIVATE
${AWSSDK_LIBRARIES}
${Python_LIBRARIES}
${ADDITIONAL_ITEMS}
${AWSSDK_LIBRARIES}
${AWSSDK_DEP_LIBRARIES}
)

set_target_properties(_torchdata PROPERTIES PREFIX "")
Expand Down

0 comments on commit 7fde5a7

Please sign in to comment.