diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml index dcfd760f3..c07973ae1 100644 --- a/.github/workflows/build-manywheel-images.yml +++ b/.github/workflows/build-manywheel-images.yml @@ -13,6 +13,7 @@ on: - .github/workflows/build-manywheel-images.yml - manywheel/Dockerfile - manywheel/Dockerfile_aarch64 + - manywheel/Dockerfile_cuda_aarch64 - manywheel/Dockerfile_cxx11-abi - manywheel/build_docker.sh - 'common/*' @@ -21,6 +22,7 @@ on: - .github/workflows/build-manywheel-images.yml - manywheel/Dockerfile - manywheel/Dockerfile_aarch64 + - manywheel/Dockerfile_cuda_aarch64 - manywheel/Dockerfile_cxx11-abi - 'common/*' - manywheel/build_docker.sh @@ -54,6 +56,25 @@ jobs: - name: Build Docker Image run: | manywheel/build_docker.sh + build-docker-cuda-aarch64: + runs-on: linux.arm64.2xlarge + strategy: + matrix: + cuda_version: ["12.4"] + env: + GPU_ARCH_TYPE: cuda-aarch64 + GPU_ARCH_VERSION: ${{ matrix.cuda_version }} + steps: + - name: Checkout PyTorch + uses: actions/checkout@v3 + - name: Authenticate if WITH_PUSH + run: | + if [[ "${WITH_PUSH}" == true ]]; then + echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin + fi + - name: Build Docker Image + run: | + manywheel/build_docker.sh build-docker-rocm: runs-on: linux.12xlarge strategy: diff --git a/aarch64_linux/aarch64_ci_build.sh b/aarch64_linux/aarch64_ci_build.sh index 321287ff5..6d9a2f6b0 100644 --- a/aarch64_linux/aarch64_ci_build.sh +++ b/aarch64_linux/aarch64_ci_build.sh @@ -26,4 +26,10 @@ cd / git config --global --add safe.directory /pytorch pip install -r /pytorch/requirements.txt pip install auditwheel -python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn +if [ -n "$GPU_ARCH_VERSION" ]; then + echo "BASE_CUDA_VERSION is set to: $GPU_ARCH_VERSION" + python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda +else + echo "BASE_CUDA_VERSION is not set." + python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn +fi \ No newline at end of file diff --git a/aarch64_linux/aarch64_wheel_ci_build.py b/aarch64_linux/aarch64_wheel_ci_build.py index c474184ed..a7f513b26 100755 --- a/aarch64_linux/aarch64_wheel_ci_build.py +++ b/aarch64_linux/aarch64_wheel_ci_build.py @@ -9,88 +9,179 @@ def list_dir(path: str) -> List[str]: - '''' + """' Helper for getting paths for Python - ''' + """ return check_output(["ls", "-1", path]).decode().split("\n") def build_ArmComputeLibrary() -> None: - ''' + """ Using ArmComputeLibrary for aarch64 PyTorch - ''' - print('Building Arm Compute Library') - acl_build_flags=["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0", - "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"] - acl_install_dir="/acl" - acl_checkout_dir="ComputeLibrary" + """ + print("Building Arm Compute Library") + acl_build_flags = [ + "debug=0", + "neon=1", + "opencl=0", + "os=linux", + "openmp=1", + "cppthreads=0", + "arch=armv8a", + "multi_isa=1", + "fixed_format_kernels=1", + "build=native", + ] + acl_install_dir = "/acl" + acl_checkout_dir = "ComputeLibrary" os.makedirs(acl_install_dir) - check_call(["git", "clone", "https://github.com/ARM-software/ComputeLibrary.git", "-b", "v23.08", - "--depth", "1", "--shallow-submodules"]) - check_call(["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] + acl_build_flags, - cwd=acl_checkout_dir) + check_call( + [ + "git", + "clone", + "https://github.com/ARM-software/ComputeLibrary.git", + "-b", + "v23.08", + "--depth", + "1", + "--shallow-submodules", + ] + ) + check_call( + ["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] + + acl_build_flags, + cwd=acl_checkout_dir, + ) for d in ["arm_compute", "include", "utils", "support", "src"]: shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") +def update_wheel(wheel_path) -> None: + """ + Update the cuda wheel libraries + """ + folder = os.path.dirname(wheel_path) + wheelname = os.path.basename(wheel_path) + os.mkdir(f"{folder}/tmp") + os.system(f"unzip {wheel_path} -d {folder}/tmp") + libs_to_copy = [ + "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", + "/usr/local/cuda/lib64/libcudnn.so.8", + "/usr/local/cuda/lib64/libcublas.so.12", + "/usr/local/cuda/lib64/libcublasLt.so.12", + "/usr/local/cuda/lib64/libcudart.so.12", + "/usr/local/cuda/lib64/libcufft.so.11", + "/usr/local/cuda/lib64/libcusparse.so.12", + "/usr/local/cuda/lib64/libcusparseLt.so.0", + "/usr/local/cuda/lib64/libcusolver.so.11", + "/usr/local/cuda/lib64/libcurand.so.10", + "/usr/local/cuda/lib64/libnvToolsExt.so.1", + "/usr/local/cuda/lib64/libnvJitLink.so.12", + "/usr/local/cuda/lib64/libnvrtc.so.12", + "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4", + "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8", + "/usr/local/cuda/lib64/libcudnn_adv_train.so.8", + "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8", + "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8", + "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8", + "/usr/local/cuda/lib64/libcudnn_ops_train.so.8", + "/opt/conda/envs/aarch64_env/lib/libopenblas.so.0", + "/opt/conda/envs/aarch64_env/lib/libgfortran.so.5", + "/opt/conda/envs/aarch64_env/lib/libgomp.so.1", + "/acl/build/libarm_compute.so", + "/acl/build/libarm_compute_graph.so", + "/acl/build/libarm_compute_core.so", + ] + # Copy libraries to unzipped_folder/a/lib + for lib_path in libs_to_copy: + lib_name = os.path.basename(lib_path) + shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}") + os.system( + f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libtorch_cuda.so" + ) + os.mkdir(f"{folder}/cuda_wheel") + os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *") + shutil.move( + f"{folder}/cuda_wheel/{wheelname}", + f"/dist/{wheelname}", + copy_function=shutil.copy2, + ) + os.system(f"rm -rf {folder}/tmp {folder}/dist/cuda_wheel/") + + def complete_wheel(folder: str) -> str: - ''' + """ Complete wheel build and put in artifact location - ''' + """ wheel_name = list_dir(f"/{folder}/dist")[0] - if "pytorch" in folder: + if "pytorch" in folder and not enable_cuda: print("Repairing Wheel with AuditWheel") - check_call(["auditwheel","repair", f"dist/{wheel_name}"], cwd=folder) + check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder) repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0] print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist") - os.rename(f"/{folder}/wheelhouse/{repaired_wheel_name}", f"/{folder}/dist/{repaired_wheel_name}") + os.rename( + f"/{folder}/wheelhouse/{repaired_wheel_name}", + f"/{folder}/dist/{repaired_wheel_name}", + ) else: repaired_wheel_name = wheel_name - print(f"Copying {repaired_wheel_name} to artfacts") - shutil.copy2(f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}") + print(f"Copying {repaired_wheel_name} to artifacts") + shutil.copy2( + f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}" + ) return repaired_wheel_name def parse_arguments(): - ''' + """ Parse inline arguments - ''' + """ from argparse import ArgumentParser + parser = ArgumentParser("AARCH64 wheels python CD") parser.add_argument("--debug", action="store_true") parser.add_argument("--build-only", action="store_true") parser.add_argument("--test-only", type=str) parser.add_argument("--enable-mkldnn", action="store_true") + parser.add_argument("--enable-cuda", action="store_true") return parser.parse_args() -if __name__ == '__main__': - ''' +if __name__ == "__main__": + """ Entry Point - ''' + """ args = parse_arguments() enable_mkldnn = args.enable_mkldnn - repo = Repository('/pytorch') + enable_cuda = args.enable_cuda + repo = Repository("/pytorch") branch = repo.head.name - if branch == 'HEAD': - branch = 'master' - + if branch == "HEAD": + branch = "master" - print('Building PyTorch wheel') + print("Building PyTorch wheel") build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " os.system("python setup.py clean") override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") if override_package_version is not None: version = override_package_version - build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " - elif branch in ['nightly', 'master']: - build_date = check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '') - version = check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2] + build_vars += ( + f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " + ) + elif branch in ["nightly", "master"]: + build_date = ( + check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch") + .decode() + .replace("-", "") + ) + version = ( + check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2] + ) build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " elif branch.startswith(("v1.", "v2.")): build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " @@ -98,14 +189,21 @@ def parse_arguments(): if enable_mkldnn: build_ArmComputeLibrary() print("build pytorch with mkldnn+acl backend") - build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " \ - "ACL_ROOT_DIR=/acl " \ - "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \ - "ACL_INCLUDE_DIR=/acl/build " \ - "ACL_LIBRARY=/acl/build " + build_vars += ( + "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " + "ACL_ROOT_DIR=/acl " + "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " + "ACL_INCLUDE_DIR=/acl/build " + "ACL_LIBRARY=/acl/build " + ) else: print("build pytorch without mkldnn backend") os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") - pytorch_wheel_name = complete_wheel("pytorch") - print(f"Build Compelete. Created {pytorch_wheel_name}..") + if enable_cuda: + print("Updating Cuda Dependency") + filename = os.listdir("/pytorch/dist/") + wheel_path = f"/pytorch/dist/{filename[0]}" + update_wheel(wheel_path) + pytorch_wheel_name = complete_wheel("/pytorch/") + print(f"Build Complete. Created {pytorch_wheel_name}..") diff --git a/common/install_cuda_aarch64.sh b/common/install_cuda_aarch64.sh new file mode 100644 index 000000000..ba97385bd --- /dev/null +++ b/common/install_cuda_aarch64.sh @@ -0,0 +1,90 @@ +#!/bin/bash + +set -ex + +function install_cusparselt_052 { + # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html + mkdir tmp_cusparselt && pushd tmp_cusparselt + wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz + tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz + cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/ + cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/ + popd + rm -rf tmp_cusparselt +} + +function install_124 { + echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2" + rm -rf /usr/local/cuda-12.4 /usr/local/cuda + # install CUDA 12.4.0 in the same container + wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run + chmod +x cuda_12.4.0_550.54.14_linux_sbsa.run + ./cuda_12.4.0_550.54.14_linux_sbsa.run --toolkit --silent + rm -f cuda_12.4.0_550.54.14_linux_sbsa.run + rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda + + # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement + mkdir tmp_cudnn && cd tmp_cudnn + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz + tar xf cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz + cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_cudnn + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build + git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git + cd nccl && make -j src.build + cp -a build/include/* /usr/local/cuda/include/ + cp -a build/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf nccl + + install_cusparselt_052 + + ldconfig +} + +function prune_124 { + echo "Pruning CUDA 12.4" + ##################################################################################### + # CUDA 12.4 prune static libs + ##################################################################################### + export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64" + + export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + + if [[ -n "$OVERRIDE_GENCODE" ]]; then + export GENCODE=$OVERRIDE_GENCODE + fi + + # all CUDA libs except CuDNN and CuBLAS + ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ + | xargs -I {} bash -c \ + "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" + + # prune CuDNN and CuBLAS + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a + + ##################################################################################### + # CUDA 12.1 prune visual tools + ##################################################################################### + export CUDA_BASE="/usr/local/cuda-12.4/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ +} + +# idiomatic parameter and option handling in sh +while test $# -gt 0 +do + case "$1" in + 12.4) install_124; prune_124 + ;; + *) echo "bad argument $1"; exit 1 + ;; + esac + shift +done diff --git a/manywheel/Dockerfile_cuda_aarch64 b/manywheel/Dockerfile_cuda_aarch64 new file mode 100644 index 000000000..74c60b299 --- /dev/null +++ b/manywheel/Dockerfile_cuda_aarch64 @@ -0,0 +1,83 @@ +FROM quay.io/pypa/manylinux_2_28_aarch64 as base + +# Cuda ARM build needs gcc 11 +ARG DEVTOOLSET_VERSION=11 + +# Language variables +ENV LC_ALL=en_US.UTF-8 +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US.UTF-8 + +# Installed needed OS packages. This is to support all +# the binary builds (torch, vision, audio, text, data) +RUN yum -y install epel-release +RUN yum -y update +RUN yum install -y \ + autoconf \ + automake \ + bison \ + bzip2 \ + curl \ + diffutils \ + file \ + git \ + make \ + patch \ + perl \ + unzip \ + util-linux \ + wget \ + which \ + xz \ + yasm \ + less \ + zstd \ + libgomp \ + gcc-toolset-${DEVTOOLSET_VERSION}-toolchain + +# Ensure the expected devtoolset is used +ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH +ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH + +# git236+ would refuse to run git commands in repos owned by other users +# Which causes version check to fail, as pytorch repo is bind-mounted into the image +# Override this behaviour by treating every folder as safe +# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327 +RUN git config --global --add safe.directory "*" + + +FROM base as openssl +# Install openssl (this must precede `build python` step) +# (In order to have a proper SSL module, Python is compiled +# against a recent openssl [see env vars above], which is linked +# statically. We delete openssl afterwards.) +ADD ./common/install_openssl.sh install_openssl.sh +RUN bash ./install_openssl.sh && rm install_openssl.sh +ENV SSL_CERT_FILE=/opt/_internal/certs.pem + +FROM openssl as final +# remove unncessary python versions +RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2 +RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 +RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 +RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 + +FROM base as cuda +ARG BASE_CUDA_VERSION +# Install CUDA +ADD ./common/install_cuda_aarch64.sh install_cuda_aarch64.sh +RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh + +FROM base as magma +ARG BASE_CUDA_VERSION +# Install magma +ADD ./common/install_magma.sh install_magma.sh +RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh + +FROM final as cuda_final +ARG BASE_CUDA_VERSION +RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} +COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} +COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} +RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda +ENV PATH=/usr/local/cuda/bin:$PATH \ No newline at end of file diff --git a/manywheel/build_docker.sh b/manywheel/build_docker.sh index 7ba13b9fd..710690463 100755 --- a/manywheel/build_docker.sh +++ b/manywheel/build_docker.sh @@ -52,6 +52,14 @@ case ${GPU_ARCH_TYPE} in GPU_IMAGE=centos:7 DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=9" ;; + cuda-aarch64) + TARGET=cuda_final + DOCKER_TAG=cuda-aarch64 + LEGACY_DOCKER_IMAGE=${DOCKER_REGISTRY}/pytorch/manylinux-cuda-aarch64 + GPU_IMAGE=arm64v8/centos:7 + DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11" + MANY_LINUX_VERSION="aarch64" + ;; rocm) TARGET=rocm_final DOCKER_TAG=rocm${GPU_ARCH_VERSION}