Skip to content

Commit

Permalink
[Woptim] Shared CI 2023.12.0 and rocm 5.7.1 (#864)
Browse files Browse the repository at this point in the history
Update the Shared CI infrastructure to its latest version and update the external packages and compilers.

We now test with rocm 5.7.1 on tioga, and rocm 5.7.0 on corona, and cce 16.0.1 on both.
  • Loading branch information
adrienbernede authored and kab163 committed Aug 1, 2024
1 parent a333c84 commit bcdee68
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 25 deletions.
4 changes: 2 additions & 2 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ stages:
include:
- local: '.gitlab/custom-jobs-and-variables.yml'
- project: 'radiuss/radiuss-shared-ci'
ref: 'v2023.09.0'
ref: 'v2023.12.1'
file: 'pipelines/${CI_MACHINE}.yml'
- artifact: '${CI_MACHINE}-jobs.yml'
job: 'generate-job-lists'
Expand All @@ -82,7 +82,7 @@ stages:
include:
# [Optional] checks preliminary to running the actual CI test
#- project: 'radiuss/radiuss-shared-ci'
# ref: 'v2023.09.0'
# ref: 'v2023.12.1'
# file: 'preliminary-ignore-draft-pr.yml'
# pipelines subscribed by the project
- local: '.gitlab/subscribed-pipelines.yml'
25 changes: 18 additions & 7 deletions .gitlab/custom-jobs-and-variables.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,40 @@ variables:

# Ruby
# Arguments for top level allocation
RUBY_SHARED_ALLOC: "--exclusive --reservation=ci --qos=ci_ruby --time=10 --nodes=1"
RUBY_SHARED_ALLOC: "--exclusive --reservation=ci --time=10 --nodes=2"
# Arguments for job level allocation
RUBY_JOB_ALLOC: "--overlap --reservation=ci --qos=ci_ruby --time=10 --nodes=1"
# Note: We repeat the reservation, necessary when jobs are manually re-triggered.
RUBY_JOB_ALLOC: "--overlap --reservation=ci --nodes=1"
# Project specific variants for ruby
PROJECT_RUBY_VARIANTS: "~shared +fortran +tools tests=basic "
# Project specific deps for ruby
PROJECT_RUBY_DEPS: ""

# Poodle
# Arguments for top level allocation
POODLE_SHARED_ALLOC: "--exclusive --partition=pdebug --time=8 --nodes=1"
# Arguments for job level allocation
POODLE_JOB_ALLOC: "--overlap --nodes=1"
# Project specific variants for poodle
PROJECT_POODLE_VARIANTS: "~shared +fortran +tools tests=basic"
# Project specific deps for poodle
PROJECT_POODLE_DEPS: ""

# Corona
# Arguments for top level allocation
CORONA_SHARED_ALLOC: "--exclusive --time-limit=15m --nodes=1"
CORONA_SHARED_ALLOC: "--exclusive --time-limit=10m --nodes=1"
# Arguments for job level allocation
CORONA_JOB_ALLOC: "--time-limit=10m --nodes=1 --begin-time=+5s"
CORONA_JOB_ALLOC: "--nodes=1 --begin-time=+5s"
# Project specific variants for corona
PROJECT_CORONA_VARIANTS: "~shared +fortran +device_alloc tests=basic "
# Project specific deps for corona
PROJECT_CORONA_DEPS: ""

# Tioga
# Arguments for top level allocation
TIOGA_SHARED_ALLOC: "--exclusive --time-limit=20m --nodes=1"
TIOGA_SHARED_ALLOC: "--exclusive --time-limit=15m --nodes=1"
# Arguments for job level allocation
TIOGA_JOB_ALLOC: "--time-limit=15m --nodes=1 --begin-time=+5s"
TIOGA_JOB_ALLOC: "--nodes=1 --begin-time=+5s"
# Project specific variants for tioga
PROJECT_TIOGA_VARIANTS: "~shared +fortran +device_alloc tests=basic "
# Project specific deps for tioga
Expand All @@ -45,7 +56,7 @@ variables:
# Lassen and Butte use a different job scheduler (spectrum lsf) that does not
# allow pre-allocation the same way slurm does.
# Arguments for job level allocation
LASSEN_JOB_ALLOC: "1 -W 18"
LASSEN_JOB_ALLOC: "1 -W 10 -q pci"
# Project specific variants for lassen
PROJECT_LASSEN_VARIANTS: "~shared +fortran +tools tests=basic "
# Project specific deps for lassen
Expand Down
11 changes: 9 additions & 2 deletions .gitlab/jobs/corona.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
# SPDX-License-Identifier: (MIT)
###############################################################################

# Override reproducer section to define Umpire specific variables.
.corona_reproducer_vars:
script:
- |
echo -e "export MODULE_LIST=\"${MODULE_LIST}\""
echo -e "export SPEC=\"${SPEC//\"/\\\"}\""
########################
# Overridden shared jobs
########################
Expand All @@ -25,8 +32,8 @@
# This job intentionally tests our umpire package.py because although this job does not
# explicitly have the ~tools, the package.py should still disable tools from being built.
###
rocmcc_5_6_1_hip_openmp_device_alloc:
rocmcc_5_7_0_hip_openmp_device_alloc:
variables:
SPEC: "~shared +fortran +openmp +rocm +device_alloc tests=basic amdgpu_target=gfx906 %rocmcc@5.6.1 ^hip@5.6.1"
SPEC: "~shared +fortran +openmp +rocm +device_alloc tests=basic amdgpu_target=gfx906 %rocmcc@5.7.0 ^hip@5.7.0"
extends: .job_on_corona

16 changes: 16 additions & 0 deletions .gitlab/jobs/lassen.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,28 @@
# SPDX-License-Identifier: (MIT)
###############################################################################

# Override reproducer section to define Umpire specific variables.
.lassen_reproducer_vars:
script:
- |
echo -e "export MODULE_LIST=\"${MODULE_LIST}\""
echo -e "export SPEC=\"${SPEC//\"/\\\"}\""
########################
# Overridden shared jobs
########################
# We duplicate the shared jobs description and add necessary changes for RAJA.
# We keep ${PROJECT_<MACHINE>_VARIANTS} and ${PROJECT_<MACHINE>_DEPS} So that
# the comparison with the original job is easier.

# Overriden to increase allocation
xl_2022_08_19_gcc_8_3_1_cuda_11_2_0:
variables:
SPEC: "${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12.gcc.8.3.1 ^cuda@11.2.0+allow-unsupported-compilers ${PROJECT_LASSEN_DEPS}"
MODULE_LIST: "cuda/11.2.0"
LASSEN_JOB_ALLOC: "1 -W 20 -q pci"
extends: .job_on_lassen


############
# Extra jobs
Expand Down Expand Up @@ -92,4 +107,5 @@ xl_2022_08_19_gcc_8_3_1_cuda_11_2_tpls:
variables:
SPEC: "~shared +fortran +cuda +tools tests=basic %xl@16.1.1.12.gcc.8.3.1 ^cuda@11.7.0+allow-unsupported-compilers"
MODULE_LIST: "cuda/11.7.0"
LASSEN_JOB_ALLOC: "1 -W 20 -q pci"
extends: .job_on_lassen
60 changes: 60 additions & 0 deletions .gitlab/jobs/poodle.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
###############################################################################
# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
# project contributors. See the COPYRIGHT file for details.
#
# SPDX-License-Identifier: (MIT)
###############################################################################

# Override reproducer section to define Umpire specific variables.
.poodle_reproducer_vars:
script:
- |
echo -e "export MODULE_LIST=\"${MODULE_LIST}\""
echo -e "export SPEC=\"${SPEC//\"/\\\"}\""
########################
# Overridden shared jobs
########################
# We duplicate the shared jobs description and add necessary changes for RAJA.
# We keep ${PROJECT_<MACHINE>_VARIANTS} and ${PROJECT_<MACHINE>_DEPS} So that
# the comparison with the original job is easier.

# Allow failure due to compiler internal error building wrapfumpire.f
intel_2022_1_0:
variables:
SPEC: "${PROJECT_RUBY_VARIANTS} %intel@2022.1.0 ${PROJECT_RUBY_DEPS}"
extends: .job_on_poodle
allow_failure: true

############
# Extra jobs
############
# We do not recommend using ${PROJECT_<MACHINE>_VARIANTS} and
# ${PROJECT_<MACHINE>_DEPS} in the extra jobs. There is not reason not to fully
# describe the spec here.

gcc_10_3_1_numa:
variables:
SPEC: "~shared +fortran +numa +tools tests=basic %gcc@10.3.1"
extends: .job_on_poodle

clang_14_0_6_gcc_10_3_1_sqlite_experimental:
variables:
SPEC: "~shared +sqlite_experimental +tools tests=basic %clang@14.0.6.gcc.10.3.1"
extends: .job_on_poodle

# Develop builds against specific tpl version.
clang_14_0_6_gcc_10_3_1_tpls:
variables:
SPEC: "~shared +fortran +tools tests=basic %clang@14.0.6.gcc.10.3.1"
extends: .job_on_poodle

gcc_10_3_1_tpls:
variables:
SPEC: "~shared +fortran +tools tests=basic %gcc@10.3.1"
extends: .job_on_poodle

gcc_10_3_1_ipc_no_mpi:
variables:
SPEC: "~shared +ipc_shmem tests=basic %gcc@10.3.1"
extends: .job_on_poodle
19 changes: 9 additions & 10 deletions .gitlab/jobs/ruby.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
###############################################################################
# Copyright (c) 2022, Lawrence Livermore National Security, LLC and RADIUSS
# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
# project contributors. See the COPYRIGHT file for details.
#
# SPDX-License-Identifier: (MIT)
###############################################################################

# Override reproducer section to define UMPIRE specific variables.
.ruby_reproducer_vars:
script:
- |
echo -e "export MODULE_LIST=\"${MODULE_LIST}\""
echo -e "export SPEC=\"${SPEC//\"/\\\"}\""
########################
# Overridden shared jobs
########################
Expand All @@ -15,7 +22,7 @@
# Allow failure due to compiler internal error building wrapfumpire.f
intel_2022_1_0:
variables:
SPEC: "~shared +fortran +tools tests=basic %intel@2022.1.0"
SPEC: "${PROJECT_RUBY_VARIANTS} %intel@2022.1.0 ${PROJECT_RUBY_DEPS}"
extends: .job_on_ruby
allow_failure: true

Expand Down Expand Up @@ -51,11 +58,3 @@ gcc_10_3_1_ipc_no_mpi:
variables:
SPEC: "~shared +ipc_shmem tests=basic %gcc@10.3.1"
extends: .job_on_ruby

# Oneapi is not available on ruby@toss4 (rhel8)
## We deactivate this job as it is known to fail with Umpire: needs gcc toolchain.
#intel_2022_1_0:
# variables:
# ON_RUBY: "OFF"
# SPEC: "${PROJECT_RUBY_VARIANTS} %intel@2022.1.0 ${PROJECT_RUBY_DEPS}"
# extends: .job_on_ruby
11 changes: 9 additions & 2 deletions .gitlab/jobs/tioga.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
# SPDX-License-Identifier: (MIT)
###############################################################################

# Override reproducer section to define Umpire specific variables.
.tioga_reproducer_vars:
script:
- |
echo -e "export MODULE_LIST=\"${MODULE_LIST}\""
echo -e "export SPEC=\"${SPEC//\"/\\\"}\""
########################
# Overridden shared jobs
########################
Expand All @@ -29,8 +36,8 @@ cce_16_0_1:
# This job intentionally tests our umpire package.py because although this job does not
# explicitly have the ~tools, the package.py should still disable tools from being built.
###
rocmcc_5_6_1_hip_openmp_device_alloc:
rocmcc_5_7_1_hip_openmp_device_alloc:
variables:
SPEC: "~shared +fortran +openmp +rocm +device_alloc tests=basic amdgpu_target=gfx90a %rocmcc@5.6.1 ^hip@5.6.1"
SPEC: "~shared +fortran +openmp +rocm +device_alloc tests=basic amdgpu_target=gfx90a %rocmcc@5.7.1 ^hip@5.7.1"
extends: .job_on_tioga

14 changes: 14 additions & 0 deletions .gitlab/subscribed-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@ generate-job-lists:
LOCAL_JOBS_PATH: ".gitlab/jobs"
script:
- cat ${RADIUSS_JOBS_PATH}/ruby.yml ${LOCAL_JOBS_PATH}/ruby.yml > ruby-jobs.yml
- cat ${RADIUSS_JOBS_PATH}/poodle.yml ${LOCAL_JOBS_PATH}/poodle.yml > poodle-jobs.yml
- cat ${RADIUSS_JOBS_PATH}/lassen.yml ${LOCAL_JOBS_PATH}/lassen.yml > lassen-jobs.yml
- cat ${RADIUSS_JOBS_PATH}/corona.yml ${LOCAL_JOBS_PATH}/corona.yml > corona-jobs.yml
- cat ${RADIUSS_JOBS_PATH}/tioga.yml ${LOCAL_JOBS_PATH}/tioga.yml > tioga-jobs.yml
artifacts:
paths:
- ruby-jobs.yml
- poodle-jobs.yml
- lassen-jobs.yml
- corona-jobs.yml
- tioga-jobs.yml
Expand All @@ -60,6 +62,18 @@ ruby-build-and-test:
needs: [ruby-up-check, generate-job-lists]
extends: [.build-and-test]

# POODLE
poodle-up-check:
variables:
CI_MACHINE: "poodle"
extends: [.machine-check]

poodle-build-and-test:
variables:
CI_MACHINE: "poodle"
needs: [poodle-up-check, generate-job-lists]
extends: [.build-and-test]

# CORONA
corona-up-check:
variables:
Expand Down
2 changes: 1 addition & 1 deletion .uberenv_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"package_final_phase" : "initconfig",
"package_source_dir" : "../..",
"spack_url": "https://github.com/spack/spack.git",
"spack_branch": "v0.20.1",
"spack_branch": "develop-2024-01-21",
"spack_activate" : {},
"spack_configs_path": "scripts/radiuss-spack-configs",
"spack_packages_path": "scripts/radiuss-spack-configs/packages",
Expand Down
2 changes: 1 addition & 1 deletion scripts/radiuss-spack-configs

0 comments on commit bcdee68

Please sign in to comment.