From f450b55225e455fdfa32aa365b6ef9383e92a4bc Mon Sep 17 00:00:00 2001 From: Isuru Fernando Date: Wed, 3 Jul 2024 15:15:59 -0500 Subject: [PATCH] Revert "Cleanup and consolidate CUDA images" --- .github/workflows/ci.yaml | 6 - README.md | 10 +- linux-anvil-aarch64-cuda/Dockerfile | 107 +++++++++++++++++ linux-anvil-aarch64-cuda/entrypoint_source | 5 + linux-anvil-cuda/Dockerfile | 71 ++++++------ linux-anvil-ppc64le-cuda/Dockerfile | 128 +++++++++++++++++++++ linux-anvil-ppc64le-cuda/entrypoint_source | 5 + scripts/fix_rpm | 2 - 8 files changed, 287 insertions(+), 47 deletions(-) create mode 100644 linux-anvil-aarch64-cuda/Dockerfile create mode 100644 linux-anvil-aarch64-cuda/entrypoint_source create mode 100644 linux-anvil-ppc64le-cuda/Dockerfile create mode 100644 linux-anvil-ppc64le-cuda/entrypoint_source diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index cf9e9df8..3684ebce 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -36,28 +36,22 @@ jobs: SHORT_DESCRIPTION: "conda-forge build image for Cent0S 7 on aarch64" - DOCKERIMAGE: linux-anvil-cuda - DOCKERFILE: linux-anvil-cuda DOCKERTAG: "11.8" CUDA_VER: "11.8.0" - DISTRO_ARCH: "amd64" DISTRO_NAME: "centos" DISTRO_VER: "7" SHORT_DESCRIPTION: "conda-forge build image for Cent0S 7 on x86_64 with CUDA" - DOCKERIMAGE: linux-anvil-ppc64le-cuda - DOCKERFILE: linux-anvil-cuda DOCKERTAG: "11.8" CUDA_VER: "11.8.0" - DISTRO_ARCH: "ppc64le" DISTRO_NAME: "ubi" DISTRO_VER: "8" SHORT_DESCRIPTION: "conda-forge build image for Cent0S 8 on ppc64le with CUDA" - DOCKERIMAGE: linux-anvil-aarch64-cuda - DOCKERFILE: linux-anvil-cuda DOCKERTAG: "11.8" CUDA_VER: "11.8.0" - DISTRO_ARCH: "arm64" DISTRO_NAME: "ubi" DISTRO_VER: "8" SHORT_DESCRIPTION: "conda-forge build image for Cent0S 8 on aarch64 with CUDA" diff --git a/README.md b/README.md index 36bd8901..224718d9 100644 --- a/README.md +++ b/README.md @@ -18,15 +18,15 @@ environment variables passed in to be able to build. In this case, you will want to use a command similar to the following: ```sh -docker build --rm --build-arg DISTRO_NAME=centos --build-arg DISTRO_VER=6 --build-arg CUDA_VER=11.8.0 -f linux-anvil-cuda/Dockerfile . +docker build --rm --build-arg DISTRO_NAME=centos --build-arg DISTRO_VER=6 --build-arg CUDA_VER=10.2 -f linux-anvil-cuda/Dockerfile . ``` ## Environment variables -* `CUDA_VER`: This is the cuda & cudatoolkit version that will be used. The - value of this variable should be in major-minor-patch format, e.g. `11.8.0`. -* `DISTRO_ARCH`: This is the Linux architecture that the image should use. - Should match the upstream Docker image, e.g. `amd64`. +* `$CUDA_VER`: This is the cuda & cudatoolkit version that will be used. The + value of this variable should be in major-minor for, e.g. `9.2` for versions + `9.x` and `10.x`. For versions `11.x` the variable should be in + major-minor-patch format, e.g. `11.2.0`. * `DISTRO_NAME`: This is the Linux distro image name that should be built with. Should match the upstream Docker image, e.g. `centos`. * `DISTRO_VER`: This is version of Linux distro (typical CentOS) that the image diff --git a/linux-anvil-aarch64-cuda/Dockerfile b/linux-anvil-aarch64-cuda/Dockerfile new file mode 100644 index 00000000..fed7ccbe --- /dev/null +++ b/linux-anvil-aarch64-cuda/Dockerfile @@ -0,0 +1,107 @@ +ARG CUDA_VER +ARG DISTRO_NAME +ARG DISTRO_VER +FROM --platform=linux/arm64 nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER} + +LABEL maintainer="conda-forge " + +ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static + +# Set CUDA_VER during runtime. +ARG CUDA_VER +ARG DISTRO_NAME +ARG DISTRO_VER +ENV CUDA_VER=${CUDA_VER} \ + DISTRO_NAME=${DISTRO_NAME} \ + DISTRO_VER=${DISTRO_VER} + +# Set an encoding to make things work smoothly. +ENV LANG en_US.UTF-8 +ENV LANGUAGE=en_US.UTF-8 + +# Set path to CUDA install. +ENV CUDA_HOME /usr/local/cuda + +# we want to persist a path in ldconfig (to avoid having to always set LD_LIBRARY_PATH), but *after* the existing entries; +# since entries in ld.so.conf.d have precedence before the preconfigured directories, we first add the latter to the former +# the upstream images for 10.x all have libcuda.so under $CUDA_HOME/compat; +# add this to the ldconfig so it will be found correctly. +# don't forget to update settings by running ldconfig +RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \ + echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \ + ldconfig + +# bust the docker cache so that we always rerun the installs below +ADD https://loripsum.net/api /opt/docker/etc/gibberish + +# Resolves a nasty NOKEY warning that appears when using yum. +# Naming convention changed with cos8 - see: +# * https://lists.centos.org/pipermail/centos-devel/2019-September/017847.html +# * https://www.centos.org/keys/#project-keys +RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos7" ]; then \ + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 && \ + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-7-aarch64; \ + elif [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ]; then \ + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-centosofficial; \ + elif [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \ + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release; \ + fi + +# Add custom `yum_clean_all` script before using `yum` +COPY scripts/yum_clean_all /opt/docker/bin/ + +# Fallback to CentOS vault for CentOS 8 support. +RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ]; then \ + find /etc/yum.repos.d/ -name "CentOS-*.repo" -exec \ + sed -i 's/mirrorlist/#mirrorlist/g;s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' {} \; && \ + yum update -y --disablerepo=cuda && \ + /opt/docker/bin/yum_clean_all; \ + fi + +# Install basic requirements. +RUN yum update -y --disablerepo=cuda && \ + yum install -y \ + bzip2 \ + sudo \ + tar \ + which \ + && \ + /opt/docker/bin/yum_clean_all + +# Fix locale in CentOS8 images +# See https://github.com/CentOS/sig-cloud-instance-images/issues/154 +RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ] || [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \ + yum install -y glibc-langpack-en \ + && \ + /opt/docker/bin/yum_clean_all; \ + fi + +# Remove preinclude system compilers +RUN rpm -e --nodeps --verbose gcc gcc-c++ + +# Run common commands +COPY scripts/run_commands /opt/docker/bin/run_commands +RUN /opt/docker/bin/run_commands + +# Download and cache CUDA related packages. +RUN source /opt/conda/etc/profile.d/conda.sh && \ + conda activate && \ + conda create -n test --yes --quiet --download-only \ + conda-forge::cudatoolkit=${CUDA_VER} \ + && \ + conda remove --yes --quiet -n test --all && \ + conda clean -tiy && \ + chgrp -R lucky /opt/conda && \ + chmod -R g=u /opt/conda + +# Add a file for users to source to activate the `conda` +# environment `root`. Also add a file that wraps that for +# use with the `ENTRYPOINT`. +COPY linux-anvil-aarch64-cuda/entrypoint_source /opt/docker/bin/entrypoint_source +COPY scripts/entrypoint /opt/docker/bin/entrypoint + +# Ensure that all containers start with tini and the user selected process. +# Activate the `conda` environment `root`. +# Provide a default command (`bash`), which will start if the user doesn't specify one. +ENTRYPOINT [ "/opt/conda/bin/tini", "--", "/opt/docker/bin/entrypoint" ] +CMD [ "/bin/bash" ] diff --git a/linux-anvil-aarch64-cuda/entrypoint_source b/linux-anvil-aarch64-cuda/entrypoint_source new file mode 100644 index 00000000..55d440c2 --- /dev/null +++ b/linux-anvil-aarch64-cuda/entrypoint_source @@ -0,0 +1,5 @@ +# Add `CUDA_HOME` binaries to `PATH`. +export PATH="${PATH}:${CUDA_HOME}/bin" + +# Activate the `base` conda environment. +conda activate base diff --git a/linux-anvil-cuda/Dockerfile b/linux-anvil-cuda/Dockerfile index 582e1168..028d71c0 100644 --- a/linux-anvil-cuda/Dockerfile +++ b/linux-anvil-cuda/Dockerfile @@ -1,72 +1,63 @@ # Set environment variables during runtime. ARG CUDA_VER -ARG DISTRO_ARCH ARG DISTRO_NAME ARG DISTRO_VER -FROM --platform=linux/${DISTRO_ARCH} nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER} +FROM --platform=linux/amd64 nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER} LABEL maintainer="conda-forge " -# Set `ARG`s during runtime. ARG CUDA_VER -ARG DISTRO_ARCH ARG DISTRO_NAME ARG DISTRO_VER ENV CUDA_VER=${CUDA_VER} \ - DISTRO_ARCH=${DISTRO_ARCH} \ DISTRO_NAME=${DISTRO_NAME} \ DISTRO_VER=${DISTRO_VER} # Set an encoding to make things work smoothly. ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US.UTF-8 # Set path to CUDA install (this is a symlink to /usr/local/cuda-${CUDA_VER}) ENV CUDA_HOME /usr/local/cuda -# bust the docker cache so that we always rerun the installs below -ADD https://loripsum.net/api /opt/docker/etc/gibberish - -# Add qemu in here so that we can use this image on regular linux hosts with qemu user installed -ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static -ADD qemu-ppc64le-static /usr/bin/qemu-ppc64le-static - # we want to persist a path in ldconfig (to avoid having to always set LD_LIBRARY_PATH), but *after* the existing entries; # since entries in ld.so.conf.d have precedence before the preconfigured directories, we first add the latter to the former -# the upstream images all have libcuda.so under $CUDA_HOME/compat; -# add this to the ldconfig so it will be found correctly. -# don't forget to update settings by running ldconfig -RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \ - echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf && \ - ldconfig +RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf \ + && if [ ${CUDA_VER} != "9.2" ]; then \ + # the upstream images for 10.x all have libcuda.so under $CUDA_HOME/compat; + # add this to the ldconfig so it will be found correctly. + echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf ; \ + else \ + # For 9.2, the image nvidia/cuda:9.2-devel-centos6 contains neither + # $CUDA_HOME/compat, nor any (non-stub) libcuda.so. We fix this by + # adding cuda-compat-10.0 (which is not used for building, but to + # test if loading the respective library/package works). However, + # due to licensing reasons, these cannot be part of the conda-forge + # docker images, but are instead added for CI purposes in: + # github.com/conda-forge/conda-forge-ci-setup-feedstock/blob/master/recipe/run_conda_forge_build_setup_linux + # Here we only set the ldconfig accordingly. + echo "/usr/local/cuda-10.0/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf ; \ + fi \ + # don't forget to update settings by running ldconfig + && ldconfig + +# bust the docker cache so that we always rerun the installs below +ADD https://loripsum.net/api /opt/docker/etc/gibberish # Add the archived repo URL and fix RPM imports ADD centos7-repos /tmp/centos7-repos ADD scripts/fix_rpm /opt/docker/bin/fix_rpm RUN /opt/docker/bin/fix_rpm -# Add custom `yum_clean_all` script before using `yum` -COPY scripts/yum_clean_all /opt/docker/bin/ - # Install basic requirements. +COPY scripts/yum_clean_all /opt/docker/bin/ RUN yum update -y --disablerepo=cuda && \ yum install -y \ bzip2 \ sudo \ tar \ - which \ - && \ + which && \ /opt/docker/bin/yum_clean_all -# Fix locale in UBI 8 images -# See https://github.com/CentOS/sig-cloud-instance-images/issues/154 -RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \ - yum install -y \ - glibc-langpack-en \ - && \ - /opt/docker/bin/yum_clean_all; \ - fi - # Remove preinclude system compilers RUN rpm -e --nodeps --verbose gcc gcc-c++ @@ -85,6 +76,18 @@ RUN source /opt/conda/etc/profile.d/conda.sh && \ chgrp -R lucky /opt/conda && \ chmod -R g=u /opt/conda +# Symlink CUDA headers that were moved from $CUDA_HOME/include to /usr/include +# in CUDA 10.1. +RUN for HEADER_FILE in cublas_api.h cublas.h cublasLt.h cublas_v2.h cublasXt.h nvblas.h; do \ + if [[ ! -f "${CUDA_HOME}/include/${HEADER_FILE}" ]]; \ + then ln -s "/usr/include/${HEADER_FILE}" "${CUDA_HOME}/include/${HEADER_FILE}"; \ + fi; \ + done + +# Add qemu in here so that we can use this image on regular linux hosts with qemu user installed +ADD qemu-aarch64-static /usr/bin/qemu-aarch64-static +ADD qemu-ppc64le-static /usr/bin/qemu-ppc64le-static + # Add a file for users to source to activate the `conda` # environment `base`. Also add a file that wraps that for # use with the `ENTRYPOINT`. @@ -92,7 +95,7 @@ COPY linux-anvil-cuda/entrypoint_source /opt/docker/bin/entrypoint_source COPY scripts/entrypoint /opt/docker/bin/entrypoint # Ensure that all containers start with tini and the user selected process. -# Activate the `conda` environment `base`. +# Activate the `conda` environment `base` and the devtoolset compiler. # Provide a default command (`bash`), which will start if the user doesn't specify one. ENTRYPOINT [ "/opt/conda/bin/tini", "--", "/opt/docker/bin/entrypoint" ] CMD [ "/bin/bash" ] diff --git a/linux-anvil-ppc64le-cuda/Dockerfile b/linux-anvil-ppc64le-cuda/Dockerfile new file mode 100644 index 00000000..a6faf5a7 --- /dev/null +++ b/linux-anvil-ppc64le-cuda/Dockerfile @@ -0,0 +1,128 @@ +# Note that this image doesn't cache cudatoolkit as it is not packaged by defaults. +# This docker image is meant for packages using CUDA driver and not for packages +# using the CUDA runtime. + +ARG CUDA_VER +ARG DISTRO_NAME +ARG DISTRO_VER +FROM --platform=linux/ppc64le nvidia/cuda:${CUDA_VER}-devel-${DISTRO_NAME}${DISTRO_VER} + +LABEL maintainer="conda-forge " + +# Add qemu in here so that we can use this image on regular linux hosts with qemu user installed +ADD qemu-ppc64le-static /usr/bin/qemu-ppc64le-static + +# Set CUDA_VER during runtime. +ARG CUDA_VER +ARG DISTRO_NAME +ARG DISTRO_VER +ENV CUDA_VER=${CUDA_VER} \ + DISTRO_NAME=${DISTRO_NAME} \ + DISTRO_VER=${DISTRO_VER} + +# Set an encoding to make things work smoothly. +ENV LANG en_US.UTF-8 +ENV LANGUAGE=en_US.UTF-8 + +# Set path to CUDA install. +ENV CUDA_HOME /usr/local/cuda + +# we want to persist a path in ldconfig (to avoid having to always set LD_LIBRARY_PATH), but *after* the existing entries; +# since entries in ld.so.conf.d have precedence before the preconfigured directories, we first add the latter to the former +RUN ldconfig -v 2>/dev/null | grep -v ^$'\t' | cut -f1 -d":" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf \ + && if [ ${CUDA_VER} != "9.2" ]; then \ + # the upstream images for 10.x all have libcuda.so under $CUDA_HOME/compat; + # add this to the ldconfig so it will be found correctly. + echo "$CUDA_HOME/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf ; \ + else \ + # For 9.2, the image nvidia/cuda:9.2-devel-centos6 contains neither + # $CUDA_HOME/compat, nor any (non-stub) libcuda.so. We fix this by + # adding cuda-compat-10.0 (which is not used for building, but to + # test if loading the respective library/package works). However, + # due to licensing reasons, these cannot be part of the conda-forge + # docker images, but are instead added for CI purposes in: + # github.com/conda-forge/conda-forge-ci-setup-feedstock/blob/master/recipe/run_conda_forge_build_setup_linux + # Here we only set the ldconfig accordingly. + echo "/usr/local/cuda-10.0/compat" >> /etc/ld.so.conf.d/cuda-$CUDA_VER.conf ; \ + fi \ + # don't forget to update settings by running ldconfig + && ldconfig + +# bust the docker cache so that we always rerun the installs below +ADD https://loripsum.net/api /opt/docker/etc/gibberish + +# Resolves a nasty NOKEY warning that appears when using yum. +# Naming convention changed with cos8 - see: +# * https://lists.centos.org/pipermail/centos-devel/2019-September/017847.html +# * https://www.centos.org/keys/#project-keys +RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos7" ]; then \ + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7 && \ + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-SIG-AltArch-7-ppc64le; \ + elif [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ]; then \ + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-centosofficial; \ + elif [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \ + rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release; \ + fi + +# Add custom `yum_clean_all` script before using `yum` +COPY scripts/yum_clean_all /opt/docker/bin/ + +# Fallback to CentOS vault for CentOS 8 support. +RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ]; then \ + find /etc/yum.repos.d/ -name "CentOS-*.repo" -exec \ + sed -i 's/mirrorlist/#mirrorlist/g;s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' {} \; && \ + yum update -y --disablerepo=cuda && \ + /opt/docker/bin/yum_clean_all; \ + fi + +# Install basic requirements. +RUN yum update -y --disablerepo=cuda && \ + yum install -y \ + bzip2 \ + sudo \ + tar \ + which \ + && \ + /opt/docker/bin/yum_clean_all + +# Fix locale in CentOS8 images +# See https://github.com/CentOS/sig-cloud-instance-images/issues/154 +RUN if [ "${DISTRO_NAME}${DISTRO_VER}" = "centos8" ] || [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then \ + yum install -y glibc-langpack-en \ + && \ + /opt/docker/bin/yum_clean_all; \ + fi + +# Remove preinclude system compilers +RUN rpm -e --nodeps --verbose gcc gcc-c++ + +# Run common commands +COPY scripts/run_commands /opt/docker/bin/run_commands +RUN /opt/docker/bin/run_commands + +# Download and cache CUDA related packages. +RUN if [[ "$CUDA_VER" == "9.2" || "$CUDA_VER" == "10.0" || "$CUDA_VER" == "10.1" ]]; then \ + echo "`cudatoolkit` not available for CUDA_VER<10.2"; \ + else \ + source /opt/conda/etc/profile.d/conda.sh && \ + conda activate && \ + conda create -n test --yes --quiet --download-only \ + conda-forge::cudatoolkit=${CUDA_VER} \ + && \ + conda remove --yes --quiet -n test --all && \ + conda clean -tiy && \ + chgrp -R lucky /opt/conda && \ + chmod -R g=u /opt/conda; \ + fi + +# Add a file for users to source to activate the `conda` +# environment `root`. Also add a file that wraps that for +# use with the `ENTRYPOINT`. +COPY linux-anvil-ppc64le-cuda/entrypoint_source /opt/docker/bin/entrypoint_source +COPY scripts/entrypoint /opt/docker/bin/entrypoint + +# Ensure that all containers start with tini and the user selected process. +# Activate the `conda` environment `root`. +# Provide a default command (`bash`), which will start if the user doesn't specify one. +ENTRYPOINT [ "/opt/conda/bin/tini", "--", "/opt/docker/bin/entrypoint" ] +CMD [ "/bin/bash" ] diff --git a/linux-anvil-ppc64le-cuda/entrypoint_source b/linux-anvil-ppc64le-cuda/entrypoint_source new file mode 100644 index 00000000..55d440c2 --- /dev/null +++ b/linux-anvil-ppc64le-cuda/entrypoint_source @@ -0,0 +1,5 @@ +# Add `CUDA_HOME` binaries to `PATH`. +export PATH="${PATH}:${CUDA_HOME}/bin" + +# Activate the `base` conda environment. +conda activate base diff --git a/scripts/fix_rpm b/scripts/fix_rpm index d64b8bc8..788c3766 100755 --- a/scripts/fix_rpm +++ b/scripts/fix_rpm @@ -14,8 +14,6 @@ if [[ "${DISTRO_NAME}${DISTRO_VER}" == "centos7" ]]; then elif [[ "$(uname -m)" == "aarch64" ]]; then rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7-aarch64 fi -elif [ "${DISTRO_NAME}${DISTRO_VER}" = "ubi8" ]; then - rpm --import /etc/pki/rpm-gpg/RPM-GPG-KEY-redhat-release fi rm -rf "/tmp/centos7-repos"