Skip to content

Commit

Permalink
Extend Levante CI with NAG (#260)
Browse files Browse the repository at this point in the history
This adds CI tests with the NAG compiler on Levante with compiler version and flags currently used to build ICON by default.
 
Both DP and SP floating models are tested, as are default and accelerator kernels, but not all possible combinations. Accelerator kernels fail with a run-time error and are marked as experimental.
  • Loading branch information
skosukhin authored Jan 26, 2024
1 parent 13440f0 commit 1949a8a
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 19 deletions.
19 changes: 17 additions & 2 deletions .github/workflows/gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
# Check out GitHub repository
#
- name: Check out GitHub repository
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
fetch-depth: 0
#
Expand Down Expand Up @@ -59,10 +59,24 @@ jobs:
levante:
runs-on: ubuntu-latest
needs: levante-init
continue-on-error: ${{ matrix.experimental }}
strategy:
fail-fast: false
matrix:
config-name: [nvhpc-gpu-openacc-DP, nvhpc-gpu-openacc-SP]
config-name:
- nvhpc-gpu-openacc-DP
- nvhpc-gpu-openacc-SP
#- nag-cpu-default-DP
- nag-cpu-default-SP
- nag-cpu-accel-DP
#- nag-cpu-accel-SP
include:
# The tests are not experimental by default:
- experimental: false
- config-name: nag-cpu-accel-DP
experimental: true
#- config-name: nag-cpu-accel-SP
# experimental: true
steps:
#
# Build, run and check (fetch the log)
Expand All @@ -87,3 +101,4 @@ jobs:
password: ${{ secrets.DKRZ_GITLAB_TOKEN }}
ref-type: tag
ref-name: ${{ needs.levante-init.outputs.ref-name }}
force: true
113 changes: 96 additions & 17 deletions .gitlab/levante.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,42 +9,74 @@ include:
variables:
SCHEDULER_PARAMETERS: >-
--account=mh0287
--partition=gpu
--gpus=1
--time=05:00
${EXTRA_SCHEDULER_PARAMETERS}
EXTRA_SCHEDULER_PARAMETERS:

.build-common:
.gpu:
extends: .default
variables:
EXTRA_SCHEDULER_PARAMETERS: >-
--partition=gpu
--gpus=1
.cpu:
extends: .default
variables:
EXTRA_SCHEDULER_PARAMETERS: >-
--partition=shared
.nvhpc:
variables:
# Core variables:
FC: /sw/spack-levante/nvhpc-22.5-v4oky3/Linux_x86_64/22.5/compilers/bin/nvfortran
# Production flags for ICON model:
FCFLAGS: -g -O2 -Mrecursive -Mallocatable=03 -Mstack_arrays -Minfo=accel,inline -acc=gpu,verystrict -gpu=cc80,cuda11.7 -DRTE_USE_${FPMODEL}
# Convenience variables:
VERSION_FCFLAGS: --version
NFHOME: /sw/spack-levante/netcdf-fortran-4.5.4-syv4qr
NCHOME: /sw/spack-levante/netcdf-c-4.9.0-gc7kgj

.nag:
variables:
# Core variables:
FC: /sw/spack-levante/nag-7.1-lqjbej/bin/nagfor
# Convenience variables:
VERSION_FCFLAGS: -V
NFHOME: /sw/spack-levante/netcdf-fortran-4.5.3-5di6qe
NCHOME: /sw/spack-levante/netcdf-c-4.8.1-vbnli5

.dp:
variables:
FPMODEL: DP
FAILURE_THRESHOLD: "7.e-4"

.sp:
variables:
FPMODEL: SP
FAILURE_THRESHOLD: "3.5e-1"

.common:
variables:
PYHOME: /sw/spack-levante/mambaforge-22.9.0-2-Linux-x86_64-kptncg
# Suppress an irrelevant but annoying error message:
# Suppress an irrelevant but annoying error message:
PROJ_LIB: ${PYHOME}/share/proj
# Make variables:
FCINCLUDE: -I${NFHOME}/include
LDFLAGS: -L${NFHOME}/lib -L${NCHOME}/lib
RRTMGP_ROOT: ${CI_PROJECT_DIR}
RRTMGP_DATA: ${CI_PROJECT_DIR}/rrtmgp-data
RTE_KERNELS: accel
before_script:
- module purge
- module load git
# Extend the existing environment variables:
- export PATH="${PYHOME}/bin:${PATH}"
- export LD_LIBRARY_PATH="${NFHOME}/lib:${NCHOME}/lib:${LD_LIBRARY_PATH-}"
# The -Mstack_arrays compiler flag requires a large stack:
# Some tests require a large stack:
- ulimit -s unlimited
script:
#
# Build libraries, examples and tests
#
- ${FC} --version
- ${FC} ${VERSION_FCFLAGS}
- make libs
- make -C build separate-libs
#
Expand All @@ -60,14 +92,61 @@ variables:
#
- make check

nvhpc-gpu-openacc-DP:
extends: .build-common
.nvhpc-gpu-openacc:
extends:
- .gpu
- .nvhpc
- .common
variables:
FPMODEL: DP
FAILURE_THRESHOLD: "7.e-4"
# Compiler flags used for ICON model:
FCFLAGS: -g -O2 -Mrecursive -Mallocatable=03 -Mstack_arrays -Minfo=accel,inline -acc=gpu,verystrict -gpu=cc80,cuda11.7 -DRTE_USE_${FPMODEL}
RTE_KERNELS: accel

nvhpc-gpu-openacc-SP:
extends: .build-common
.nag-cpu:
extends:
- .cpu
- .nag
- .common
variables:
FPMODEL: SP
FAILURE_THRESHOLD: "3.5e-1"
# Compiler flags used for ICON model:
FCFLAGS: -Wc=/sw/spack-levante/gcc-11.2.0-bcn7mb/bin/gcc -f2008 -colour -w=uep -g -gline -O0 -float-store -nan -Wc,-g -Wc,-pipe -Wc,--param,max-vartrack-size=200000000 -Wc,-mno-fma -C=all -DRTE_USE_CBOOL -DRTE_USE_${FPMODEL}

.nag-cpu-default:
extends: .nag-cpu
variables:
RTE_KERNELS: default

.nag-cpu-accel:
extends: .nag-cpu
variables:
RTE_KERNELS: accel

nvhpc-gpu-openacc-DP:
extends:
- .dp
- .nvhpc-gpu-openacc

nvhpc-gpu-openacc-SP:
extends:
- .sp
- .nvhpc-gpu-openacc

#nag-cpu-default-DP:
# extends:
# - .dp
# - .nag-cpu-default

nag-cpu-default-SP:
extends:
- .sp
- .nag-cpu-default

nag-cpu-accel-DP:
extends:
- .dp
- .nag-cpu-accel

#nag-cpu-accel-SP:
# extends:
# - .sp
# - .nag-cpu-accel

0 comments on commit 1949a8a

Please sign in to comment.