From c43eae763b7e0be75d2c9d37f5bb0ae21cbfde28 Mon Sep 17 00:00:00 2001 From: Woo-Sun Yang Date: Thu, 18 Jul 2024 10:31:28 -0700 Subject: [PATCH 1/5] Collect and process HtoD and DtoH transfer data --- testcases/conus_12-km/setup_rundir_WRF.sh | 1 + testcases/conus_12-km/sub_wrf_pm_testcase.sh | 7 +++++++ testcases/conus_12-km/updown.sh | 8 ++++++++ 3 files changed, 16 insertions(+) create mode 100755 testcases/conus_12-km/updown.sh diff --git a/testcases/conus_12-km/setup_rundir_WRF.sh b/testcases/conus_12-km/setup_rundir_WRF.sh index 0825ae1ed0..236d21cbfa 100755 --- a/testcases/conus_12-km/setup_rundir_WRF.sh +++ b/testcases/conus_12-km/setup_rundir_WRF.sh @@ -41,6 +41,7 @@ cp ${scriptdir}/namelist_v4.5.2_conus12km_restart.input ${rundir}/namelist.input #copy sbatch script cp ${scriptdir}/sub_wrf_pm_testcase.sh ${rundir}/sub_testcase.sh +cp ${scriptdir}/updown.sh ${rundir}/updown.sh #edit the email address and job name in the sbatch script sed -i "s/elvis@nersc.gov/${myemail}/" sub_testcase.sh diff --git a/testcases/conus_12-km/sub_wrf_pm_testcase.sh b/testcases/conus_12-km/sub_wrf_pm_testcase.sh index e6146354fb..d3404d6f6c 100644 --- a/testcases/conus_12-km/sub_wrf_pm_testcase.sh +++ b/testcases/conus_12-km/sub_wrf_pm_testcase.sh @@ -29,6 +29,13 @@ n=64 # number of MPI ranks #Modules -------------------------------------------------------------------- if [[ $use_gpu -eq 1 ]]; then module load gpu + if [[ $peenv == gnu ]]; then + export GOMP_DEBUG=1 + elif [[ $peenv == nvidia ]]; then + export NVCOMPILER_ACC_NOTIFY=3 + elif [[ $peenv == cray ]]; then + export CRAY_ACC_DEBUG=3 + fi else module load cpu fi diff --git a/testcases/conus_12-km/updown.sh b/testcases/conus_12-km/updown.sh new file mode 100755 index 0000000000..f90c5bf34c --- /dev/null +++ b/testcases/conus_12-km/updown.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# Process upload and download data transfers collected with NVCOMPILER_ACC_NOTIFY + set -e + + f=$1 # file name where the data is + awk '/^.*load/ {f=$5; gsub(/.*=/,"",f); l=$6; gsub(/.*=/,"",l); + v=$9; gsub(/.*=/,"",v); b=$10; gsub(/.*=/,"",b); + printf("%-8s %-13s %s %-34s %s\n", $1, f, l, v, b)}' $f | sort | uniq -c From 44671bcce03ca058230717cc49b8abeada2216c9 Mon Sep 17 00:00:00 2001 From: Woo-Sun Yang Date: Thu, 18 Jul 2024 20:13:30 -0700 Subject: [PATCH 2/5] Replace SBM input files with the ones that Koich generated to correct netCDF warnings --- testcases/conus_12-km/setup_rundir_WRF.sh | 2 ++ testcases/conus_12-km/sub_wrf_pm_testcase.sh | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/testcases/conus_12-km/setup_rundir_WRF.sh b/testcases/conus_12-km/setup_rundir_WRF.sh index 236d21cbfa..0b9b11c7ec 100755 --- a/testcases/conus_12-km/setup_rundir_WRF.sh +++ b/testcases/conus_12-km/setup_rundir_WRF.sh @@ -29,6 +29,8 @@ cp /global/cfs/cdirs/m4232/intern_2024/input_sbm/namelist.input.cold namelist cp /global/cfs/cdirs/m4232/intern_2024/input_sbm/CESM_RCP4.5_Current_Aerosol_Data.dat \ /global/cfs/cdirs/m4232/intern_2024/input_sbm/CESM_RCP4.5_Future_Aerosol_Data.dat . cp /global/cfs/cdirs/m4232/intern_2024/input_sbm/wrfrst_d01_2008-07-15_00_00_00 wrfrst_d01_2008-07-15_00_00_00.sbm +cp /global/cfs/cdirs/m4232/intern_2024/input_sbm/wrfbdy_d01 wrfbdy_d01.sbm +cp /global/cfs/cdirs/m4232/intern_2024/input_sbm/wrfinput_d01 wrfinput_d01.sbm cp -r /global/cfs/cdirs/m4232/intern_2024/input_sbm/{SBM_input_33,SBM_input_43} \ /global/cfs/cdirs/m4232/intern_2024/input_sbm/scattering_tables_2layer_high_quad_1dT_1%fw_110 . diff --git a/testcases/conus_12-km/sub_wrf_pm_testcase.sh b/testcases/conus_12-km/sub_wrf_pm_testcase.sh index d3404d6f6c..a33131f870 100644 --- a/testcases/conus_12-km/sub_wrf_pm_testcase.sh +++ b/testcases/conus_12-km/sub_wrf_pm_testcase.sh @@ -53,6 +53,11 @@ ml -t #1. don't load the wrf module #2. the modified executable (wrf.exe) has to be placed in the rundir +#To run an SBM case, remame namelist.input.sbm, wrfbdy_d01.sbm, +#wrfinput_d01.sbm and wrfrst_d01_2008-07-15_00_00_00.sbm to +#namelist.input, wrfbdy_d01, wrfinput_d01 and wrfrst_d01_2008-07-15_00_00_00, +#respectively. + #OpenMP settings: export OMP_NUM_THREADS=$ntile export OMP_PLACES=threads From 619bb637797f664cd3902989b4d2360ac2d2b9f1 Mon Sep 17 00:00:00 2001 From: Woo-Sun Yang Date: Wed, 24 Jul 2024 10:35:31 -0700 Subject: [PATCH 3/5] Add a wrapper script for using Nsight Compute --- testcases/conus_12-km/setup_rundir_WRF.sh | 1 + testcases/conus_12-km/sub_wrf_pm_testcase.sh | 4 +++- testcases/conus_12-km/wrapper-ncu.sh | 10 ++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100755 testcases/conus_12-km/wrapper-ncu.sh diff --git a/testcases/conus_12-km/setup_rundir_WRF.sh b/testcases/conus_12-km/setup_rundir_WRF.sh index 0b9b11c7ec..a1f93fd199 100755 --- a/testcases/conus_12-km/setup_rundir_WRF.sh +++ b/testcases/conus_12-km/setup_rundir_WRF.sh @@ -44,6 +44,7 @@ cp ${scriptdir}/namelist_v4.5.2_conus12km_restart.input ${rundir}/namelist.input #copy sbatch script cp ${scriptdir}/sub_wrf_pm_testcase.sh ${rundir}/sub_testcase.sh cp ${scriptdir}/updown.sh ${rundir}/updown.sh +cp ${scriptdir}/wrapper-ncu.sh ${rundir}/wrapper-ncu.sh #edit the email address and job name in the sbatch script sed -i "s/elvis@nersc.gov/${myemail}/" sub_testcase.sh diff --git a/testcases/conus_12-km/sub_wrf_pm_testcase.sh b/testcases/conus_12-km/sub_wrf_pm_testcase.sh index a33131f870..a74f25467d 100644 --- a/testcases/conus_12-km/sub_wrf_pm_testcase.sh +++ b/testcases/conus_12-km/sub_wrf_pm_testcase.sh @@ -71,7 +71,9 @@ if [[ $use_gpu -eq 0 ]]; then srun -n $n -c $c --cpu-bind=cores /global/common/software/m4232/pm/v4.5.2/wrf.exe else (( c = (64 / (n / SLURM_JOB_NUM_NODES)) * 2 )) - srun -n $n -c $c --cpu-bind=cores --gpus-per-task=1 -gpu-bind=none /global/common/software/m4232/pm/v4.5.2/wrf.exe + srun -n $n -c $c --cpu-bind=cores --gpus-per-task=1 --gpu-bind=none /global/common/software/m4232/pm/v4.5.2/wrf.exe +# Profile with Nsight Compute: + srun -n $n -c $c --cpu_bind=cores --gpus-per-task=1 --gpu-bind=none ./wrapper-ncu.sh /global/common/software/m4232/pm/v4.5.2/wrf.exe fi #capture error code diff --git a/testcases/conus_12-km/wrapper-ncu.sh b/testcases/conus_12-km/wrapper-ncu.sh new file mode 100755 index 0000000000..daee2e3766 --- /dev/null +++ b/testcases/conus_12-km/wrapper-ncu.sh @@ -0,0 +1,10 @@ +#!/bin/bash + output=report.%q{SLURM_PROCID}.%q{SLURM_JOBID} + + if [[ ${SLURM_PROCID} == "0" ]] ; then + dcgmi profile --pause + ncu --target-processes all --kernel-id :::1 -o ${output} "$@" + dcgmi profile --resume + else + "$@" + fi From 933f8aab200bdaa2a8328d48823d09e491e17620 Mon Sep 17 00:00:00 2001 From: Woo-Sun Yang Date: Thu, 25 Jul 2024 21:17:38 -0700 Subject: [PATCH 4/5] Add a shell script for using codee commands; minor correction in sub_wrf_pm_testcase.sh --- runcodee | 121 +++++++++++++++++++ testcases/conus_12-km/sub_wrf_pm_testcase.sh | 2 +- 2 files changed, 122 insertions(+), 1 deletion(-) create mode 100755 runcodee diff --git a/runcodee b/runcodee new file mode 100755 index 0000000000..bee080e22c --- /dev/null +++ b/runcodee @@ -0,0 +1,121 @@ +#!/bin/bash + date + + ml cpu +#ml use /global/common/software/m4232/modulefiles_test/pm + ml codee + ml -t + + RPWD=$(realpath $PWD) # if not in HOME, can use RPWD=$PWD + +# (1) All + +#codee screening --config compile_commands.json \ +# --exclude ${RPWD}/phys/module_sf_noahmplsm.f90 \ +# --exclude ${RPWD}/phys/module_shcu_deng.f90 \ +# --exclude ${RPWD}/phys/module_cu_kf.f90 \ +# --verbose + +# (2) show failures + +#codee screening --show-failures=all \ +# --config compile_commands.json \ +# --exclude ${RPWD}/phys/module_sf_noahmplsm.f90 \ +# --exclude ${RPWD}/phys/module_shcu_deng.f90 \ +# --exclude ${RPWD}/phys/module_cu_kf.f90 \ +# --verbose + +# (3) Screening report of the entire phys directory + +#codee screening --config compile_commands.json \ +## --brief ${RPWD}/phys/ \ +## --exclude ${RPWD}/phys/module_sf_noahmplsm.f90 \ +## --exclude ${RPWD}/phys/module_shcu_deng.f90 \ +## --exclude ${RPWD}/phys/module_cu_kf.f90 \ +## --verbose +#codee screening --config compile_commands.json --brief ${RPWD}/phys/ \ +# --exclude ${RPWD}/phys/module_sf_noahmplsm.f90 \ +# --exclude ${RPWD}/phys/module_shcu_deng.f90 \ +# --exclude ${RPWD}/phys/module_cu_kf.f90 + +# + +#codee screening --config compile_commands.json \ +## --exclude ${RPWD}/phys/module_sf_noahmplsm.f90 \ +## --exclude ${RPWD}/phys/module_shcu_deng.f90 \ +## --exclude ${RPWD}/phys/module_cu_kf.f90 \ +## --verbose + +# (4) screeing report of Thompson + +#codee screening --config compile_commands.json \ +# --brief ${RPWD}/phys/module_mp_thompson.f90 + +#codee screening --config compile_commands.json \ +# --brief ${RPWD}/phys/module_mp_thompson.f90 --target-arch gpu + +# Checks report of Thompson (add --verbose to see more detals) + +#codee checks --config compile_commands.json \ +# --brief ${RPWD}/phys/module_mp_thompson.f90 + +#codee checks --config compile_commands.json \ +# --brief ${RPWD}/phys/module_mp_thompson.f90 --target-arch gpu + +# Codee diagnose report of Thompson + +#codee diagnose --summary \ +# --config compile_commands.json \ +# --brief ${RPWD}/phys/module_mp_thompson.f90 + +#codee diagnose --summary \ +# --config compile_commands.json \ +# --brief ${RPWD}/phys/module_mp_thompson.f90 --target-arch gpu + +# (5) Focus on a single function, e.g., Kernals_KS + +#codee screening --config compile_commands.json \ +# ${RPWD}/phys/module_mp_fast_sbm.f90:Kernals_KS --target-arch gpu + +# (6) Check the lines + +#codee checks --verbose \ +# --config compile_commands.json \ +# ${RPWD}/phys/module_mp_fast_sbm.f90:Kernals_KS --target-arch gpu + +# (7) Apply fix with codee: loop interchange + +#codee rewrite --memory loop-interchange --in-place \ +# --config compile_commands.json \ +# ${RPWD}/phys/module_mp_fast_sbm.f90:6225:3 + +# (8) Check again with the modified file: + + codee checks --verbose \ + --config compile_commands.json \ + ${RPWD}/phys/module_mp_fast_sbm.f90:Kernals_KS --target-arch gpu + +# (8) Apply the fix with codee: OpenMP offloading + +#codee rewrite --offload omp-teams --in-place \ +# --config compile_commands.json \ +# ${RPWD}/phys/module_mp_fast_sbm.f90:6293:4 +#codee rewrite --offload omp-teams --in-place \ +# --config compile_commands.json \ +# ${RPWD}/phys/module_mp_fast_sbm.f90:6225:3,6236:4,6263:4,6291:4,6379:7 + +# (9) Check again with the modified file: + +#codee checks --verbose \ +# --config compile_commands.json \ +# ${RPWD}/phys/module_mp_fast_sbm.f90:Kernals_KS --target-arch gpu + +# (9) Apply fix with codee: OpenMP offloading + +#codee rewrite --offload omp-teams --in-place \ +# --config compile_commands.json \ +# ${RPWD}/phys/module_mp_fast_sbm.f90:6238:4,6265:4,6392:7 + +#codee rewrite --help + + date diff --git a/testcases/conus_12-km/sub_wrf_pm_testcase.sh b/testcases/conus_12-km/sub_wrf_pm_testcase.sh index a74f25467d..80d2d6e3dd 100644 --- a/testcases/conus_12-km/sub_wrf_pm_testcase.sh +++ b/testcases/conus_12-km/sub_wrf_pm_testcase.sh @@ -73,7 +73,7 @@ else (( c = (64 / (n / SLURM_JOB_NUM_NODES)) * 2 )) srun -n $n -c $c --cpu-bind=cores --gpus-per-task=1 --gpu-bind=none /global/common/software/m4232/pm/v4.5.2/wrf.exe # Profile with Nsight Compute: - srun -n $n -c $c --cpu_bind=cores --gpus-per-task=1 --gpu-bind=none ./wrapper-ncu.sh /global/common/software/m4232/pm/v4.5.2/wrf.exe +# srun -n $n -c $c --cpu_bind=cores --gpus-per-task=1 --gpu-bind=none ./wrapper-ncu.sh /global/common/software/m4232/pm/v4.5.2/wrf.exe fi #capture error code From 11df36630f60cd3192fe4631a160429cc23fc2ac Mon Sep 17 00:00:00 2001 From: Woo-Sun Yang Date: Sun, 28 Jul 2024 09:42:40 -0700 Subject: [PATCH 5/5] Update scripts for using Nsight Compute --- testcases/conus_12-km/sub_wrf_pm_testcase.sh | 2 ++ testcases/conus_12-km/wrapper-ncu.sh | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/testcases/conus_12-km/sub_wrf_pm_testcase.sh b/testcases/conus_12-km/sub_wrf_pm_testcase.sh index 80d2d6e3dd..8520d85d35 100644 --- a/testcases/conus_12-km/sub_wrf_pm_testcase.sh +++ b/testcases/conus_12-km/sub_wrf_pm_testcase.sh @@ -73,7 +73,9 @@ else (( c = (64 / (n / SLURM_JOB_NUM_NODES)) * 2 )) srun -n $n -c $c --cpu-bind=cores --gpus-per-task=1 --gpu-bind=none /global/common/software/m4232/pm/v4.5.2/wrf.exe # Profile with Nsight Compute: +# srun --ntasks-per-node=1 dcgmi profile --pause # srun -n $n -c $c --cpu_bind=cores --gpus-per-task=1 --gpu-bind=none ./wrapper-ncu.sh /global/common/software/m4232/pm/v4.5.2/wrf.exe +# srun --ntasks-per-node=1 dcgmi profile --resume fi #capture error code diff --git a/testcases/conus_12-km/wrapper-ncu.sh b/testcases/conus_12-km/wrapper-ncu.sh index daee2e3766..868dd69e1e 100755 --- a/testcases/conus_12-km/wrapper-ncu.sh +++ b/testcases/conus_12-km/wrapper-ncu.sh @@ -2,9 +2,7 @@ output=report.%q{SLURM_PROCID}.%q{SLURM_JOBID} if [[ ${SLURM_PROCID} == "0" ]] ; then - dcgmi profile --pause ncu --target-processes all --kernel-id :::1 -o ${output} "$@" - dcgmi profile --resume else "$@" fi