Skip to content

Commit

Permalink
Add slc9-gpu-builder
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrohr authored and ktf committed Aug 9, 2024
1 parent dc14045 commit fcbf97f
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 3 deletions.
4 changes: 2 additions & 2 deletions slc8-gpu-builder/packer.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"_comment": "Alma 8.7 GPU builder X-enabled CUDA12.2-enabled AMD ROCm 5.5.3-enabled",
"_comment": "Alma 8.7 GPU builder X-enabled CUDA12.6-enabled AMD ROCm 5.5.3-enabled",
"variables": {
"REPO": "registry.cern.ch/alisw/slc8-gpu-builder",
"TAG": "latest",
"CUDA_PKG_VERSION": "12-2-12.2.*",
"CUDA_PKG_VERSION": "12-6-12.6.*",
"NVIDIA_GPGKEY_SUM": "d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87"
},
"builders": [
Expand Down
3 changes: 2 additions & 1 deletion slc8-gpu-builder/provision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64
sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
echo "${NVIDIA_GPGKEY_SUM} /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -

rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux
# Install requirements for GPU event display, NVIDIA CUDA and AMD ROCm stacks
yum install -y freeglut-devel lsof "cuda-cudart-$CUDA_PKG_VERSION" 'cuda-compat-12-0-*' \
"cuda-libraries-$CUDA_PKG_VERSION" "cuda-nvtx-$CUDA_PKG_VERSION" \
Expand All @@ -30,7 +31,7 @@ yum clean all
rm -rf /var/cache/yum

# Set up NVIDIA CUDA stack
ln -s cuda-12.2 /usr/local/cuda
ln -s cuda-12.6 /usr/local/cuda
echo /usr/local/nvidia/lib >> /etc/ld.so.conf.d/nvidia.conf
echo /usr/local/nvidia/lib64 >> /etc/ld.so.conf.d/nvidia.conf
export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
Expand Down
5 changes: 5 additions & 0 deletions slc9-gpu-builder/amdgpu.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[amdgpu]
name=amdgpu
baseurl=http://repo.radeon.com/amdgpu/6.2/rhel/9.4/main/x86_64/
enabled=1
gpgcheck=0
6 changes: 6 additions & 0 deletions slc9-gpu-builder/cuda.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[cuda]
name=cuda
baseurl=http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64
enabled=1
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
58 changes: 58 additions & 0 deletions slc9-gpu-builder/packer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
{
"_comment": "Alma 9.4 GPU builder X-enabled CUDA12.6-enabled AMD ROCm 6.2.0-enabled",
"variables": {
"REPO": "registry.cern.ch/alisw/slc8-gpu-builder",
"TAG": "latest",
"CUDA_PKG_VERSION": "12-6-12.6.*",
"NVIDIA_GPGKEY_SUM": "d0664fbbdb8c32356d45de36c5984617217b2d0bef41b93ccecd326ba3b80c87"
},
"builders": [
{
"type": "docker",
"image": "registry.cern.ch/alisw/slc9-builder:latest",
"commit": true,
"changes": [
"ENV CMAKE_PREFIX_PATH=/opt/rocm/lib/cmake:/opt/clang/lib/cmake",
"ENV AMDAPPSDKROOT=/opt/amd-app/",
"ENV PATH=\"${PATH}:/usr/local/cuda/bin\"",
"ENV ALIBUILD_O2_FORCE_GPU=1"
]
}
],
"provisioners": [
{
"type": "file",
"source": "cuda.repo",
"destination": "/etc/yum.repos.d/cuda.repo"
},
{
"type": "file",
"source": "rocm.repo",
"destination": "/etc/yum.repos.d/rocm.repo"
},
{
"type": "file",
"source": "amdgpu.repo",
"destination": "/etc/yum.repos.d/amdgpu.repo"
},
{
"type": "shell",
"environment_vars": [
"CUDA_PKG_VERSION={{user `CUDA_PKG_VERSION`}}",
"NVIDIA_GPGKEY_SUM={{user `NVIDIA_GPGKEY_SUM`}}",
"GIT_VERSION={{user `GIT_VERSION`}}"
],
"script": "provision.sh"
}
],
"post-processors": [
[
{
"type": "docker-tag",
"repository": "{{user `REPO`}}",
"tag": "{{user `TAG`}}"
},
"docker-push"
]
]
}
50 changes: 50 additions & 0 deletions slc9-gpu-builder/provision.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/sh -ex

wipednf () {
rpmdb --rebuilddb
dnf clean all
rm -rf /var/cache/yum
}

# Install AMD APP Stack
# Old version no longer available from AMD but the newer versions will not work
curl -fsSL https://s3.cern.ch/swift/v1/alibuild-repo/slc8-gpu-builder-reqs/amdappsdk.tar.bz2 | tar -xjv
./AMD-APP-SDK-v3.0.130.136-GA-linux64.sh --noexec --target /opt/amd-app
rm -v AMD-APP-SDK-v3.0.130.136-GA-linux64.sh
# Avoid file collisions between AMD APP and AMD ROCm stack
mkdir -p /etc/OpenCL/vendors
echo /opt/amd-app/lib/x86_64/sdk/libamdocl64-app.so > /etc/OpenCL/vendors/amdocl64-app.icd
mv -v /opt/amd-app/lib/x86_64/sdk/libamdocl64.so \
/opt/amd-app/lib/x86_64/sdk/libamdocl64-app.so
echo /opt/amd-app/lib/x86_64/ > /etc/ld.so.conf.d/amd-app-sdk.conf

# Install NVIDIA GPG key
curl -fsSL https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/D42D0685.pub |
sed '/^Version/d' > /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA
echo "${NVIDIA_GPGKEY_SUM} /etc/pki/rpm-gpg/RPM-GPG-KEY-NVIDIA" | sha256sum -c --strict -

# rpm --import https://repo.almalinux.org/almalinux/RPM-GPG-KEY-AlmaLinux
dnf update -y
# Install requirements for GPU event display, NVIDIA CUDA and AMD ROCm stacks
dnf install -y freeglut-devel lsof "cuda-cudart-$CUDA_PKG_VERSION" 'cuda-compat-12-0-*' \
"cuda-libraries-$CUDA_PKG_VERSION" "cuda-nvtx-$CUDA_PKG_VERSION" \
"cuda-libraries-devel-$CUDA_PKG_VERSION" "cuda-nvml-devel-$CUDA_PKG_VERSION" \
"cuda-minimal-build-$CUDA_PKG_VERSION" "cuda-command-line-tools-$CUDA_PKG_VERSION" \
hip-rocclr ocl-icd ocl-icd-devel hipcub rocthrust rocm-dev hipify-clang
# ROCm: Notice we do not need the version for ROCM because we target a specific distribution in rocm.repo

# Set up NVIDIA CUDA stack
ln -s cuda-12.6 /usr/local/cuda
echo /usr/local/nvidia/lib >> /etc/ld.so.conf.d/nvidia.conf
echo /usr/local/nvidia/lib64 >> /etc/ld.so.conf.d/nvidia.conf
export PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
LIBRARY_PATH=/usr/local/cuda/lib64/stubs ldconfig

# Fix some errors in current ROCm
sed -i "s/amdgpu-function-calls=false/amdgpu-function-calls=true/g" /opt/rocm/bin/hipcc* /opt/rocm/lib/cmake/hip/*.cmake

# Remove clang-ocl binary, since it is currently broken, to avoid automatic pick-up
rm -fv /opt/rocm/bin/clang-ocl /usr/bin/clang-ocl

wipednf
5 changes: 5 additions & 0 deletions slc9-gpu-builder/rocm.repo
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[ROCm]
name=ROCm
baseurl=http://repo.radeon.com/rocm/rhel9/6.2/main/
enabled=1
gpgcheck=0

0 comments on commit fcbf97f

Please sign in to comment.