diff --git a/build.sh b/build.sh index c1cfe43d8d..3f82a882da 100755 --- a/build.sh +++ b/build.sh @@ -1,17 +1,10 @@ #!/bin/bash set -eu -uname_s=$(uname -s) -if [[ ${uname_s} == Darwin ]]; then - UFS_MODEL_DIR=$(greadlink -f -n "${BASH_SOURCE[0]}") - UFS_MODEL_DIR=$(dirname "${UFS_MODEL_DIR}") - UFS_MODEL_DIR=$(cd "${UFS_MODEL_DIR}" && pwd -P) -else - UFS_MODEL_DIR=$(readlink -f -n "${BASH_SOURCE[0]}") - UFS_MODEL_DIR=$(dirname "${UFS_MODEL_DIR}") - UFS_MODEL_DIR=$(cd "${UFS_MODEL_DIR}" && pwd -P) -fi -echo "UFS MODEL DIR: ${UFS_MODEL_DIR}" + +SCRIPT_REALPATH=$(realpath "${BASH_SOURCE[0]}") +UFS_MODEL_DIR=$(dirname "${SCRIPT_REALPATH}") readonly UFS_MODEL_DIR +echo "UFS MODEL DIR: ${UFS_MODEL_DIR}" export CC=${CC:-mpicc} export CXX=${CXX:-mpicxx} @@ -26,4 +19,4 @@ for i in ${CMAKE_FLAGS}; do ARR_CMAKE_FLAGS+=("${i}") ; done cmake "${UFS_MODEL_DIR}" "${ARR_CMAKE_FLAGS[@]}" # Turn off OpenMP threading for parallel builds # to avoid exhausting the number of user processes -OMP_NUM_THREADS=1 make -j "${BUILD_JOBS:-4}" "VERBOSE=${BUILD_VERBOSE:-}" \ No newline at end of file +OMP_NUM_THREADS=1 make -j "${BUILD_JOBS:-4}" "VERBOSE=${BUILD_VERBOSE:-}" diff --git a/tests/compile.sh b/tests/compile.sh index 458d985a88..8ab0f60b82 100755 --- a/tests/compile.sh +++ b/tests/compile.sh @@ -12,14 +12,8 @@ function trim { SECONDS=0 -uname_s=$(uname -s) -if [[ ${uname_s} == Darwin ]]; then - greadlnk=$(greadlink -f -n "${BASH_SOURCE[0]}" ) - MYDIR=$(cd "$(dirname "${greadlnk}" )" && pwd -P) -else - readlnk=$(readlink -f -n "${BASH_SOURCE[0]}" ) - MYDIR=$(cd "$(dirname "${readlnk}" )" && pwd -P) -fi +SCRIPT_REALPATH=$(realpath "${BASH_SOURCE[0]}") +MYDIR=$(dirname "${SCRIPT_REALPATH}") readonly MYDIR # ---------------------------------------------------------------------- diff --git a/tests/error-test.conf b/tests/error-test.conf index 2382c59a9e..3e931e67dc 100644 --- a/tests/error-test.conf +++ b/tests/error-test.conf @@ -11,6 +11,9 @@ COMPILE | atm_dyn32 | intel | -DAPP=ATM -DCCPP_SUITES=FV3_GFS_v16,FV3_GFS_v16_fl # This should succeed RUN | control_c48.v2.sfc | | baseline | +# This should fail due to wall clock timeout +RUN | control_c48.v2.sfc_timeout | | baseline | + # These tests should always fail, and prevent the workflow from completing. RUN | fail_to_copy | | baseline | RUN | fail_to_run | | baseline | diff --git a/tests/rt.sh b/tests/rt.sh index ff3808ac0e..5d3b6d96e3 100755 --- a/tests/rt.sh +++ b/tests/rt.sh @@ -1041,6 +1041,7 @@ if [[ ${skip_check_results} == true ]]; then else REGRESSIONTEST_LOG=${PATHRT}/logs/RegressionTests_${MACHINE_ID}.log fi +rm -f "${REGRESSIONTEST_LOG}" TEST_START_TIME="$(date '+%Y%m%d %T')" export TEST_START_TIME diff --git a/tests/rt_utils.sh b/tests/rt_utils.sh index 6f049cf161..3fb1070547 100755 --- a/tests/rt_utils.sh +++ b/tests/rt_utils.sh @@ -124,10 +124,6 @@ submit_and_wait() { local -r job_card=$1 - ROCOTO=${ROCOTO:-false} - ECFLOW=${ECFLOW:-false} - - local test_status='PASS' case ${SCHEDULER} in pbs) qsubout=$( qsub "${job_card}" ) @@ -187,26 +183,38 @@ submit_and_wait() { set +e job_info=$( qstat "${jobid}" ) set -e + if grep -q "${jobid}" <<< "${job_info}"; then + job_running=true + # Getting the status letter from scheduler info + status=$( grep "${jobid}" <<< "${job_info}" ) + status=$( awk '{print $5}' <<< "${status}" ) + else + job_running=false + status='COMPLETED' + set +e + exit_status=$( qstat "${jobid}" -x -f | grep Exit_status | awk '{print $3}') + set -e + if [[ ${exit_status} != 0 ]]; then + status='FAILED' + fi + fi ;; slurm) - job_info=$( squeue -u "${USER}" -j "${jobid}" ) + job_info=$( squeue -u "${USER}" -j "${jobid}" -o '%i %T' ) + if grep -q "${jobid}" <<< "${job_info}"; then + job_running=true + else + job_running=false + job_info=$( sacct -n -j "${jobid}" --format=JobID,state%20,Jobname%64 | grep "^${jobid}" | grep "${JBNME}" ) + fi + # Getting the status letter from scheduler info + status=$( grep "${jobid}" <<< "${job_info}" ) + status=$( awk '{print $2}' <<< "${status}" ) ;; *) ;; esac - - if grep -q "${jobid}" <<< "${job_info}"; then - job_running=true - else - job_running=false - continue - fi - - # Getting the status letter from scheduler info - status=$( grep "${jobid}" <<< "${job_info}" ) - status=$( awk '{print $5}' <<< "${status}" ) - case ${status} in #waiting cases #pbs: Q @@ -217,7 +225,7 @@ submit_and_wait() { #running cases #pbs: R #slurm: (old: R, new: RUNNING) - R|RUNNING) + R|RUNNING|COMPLETING) status_label='Job running' ;; #held cases @@ -229,14 +237,15 @@ submit_and_wait() { #fail/completed cases #slurm: F/FAILED TO/TIMEOUT CA/CANCELLED F|TO|CA|FAILED|TIMEOUT|CANCELLED) - echo "rt_utils.sh: !!!!!!!!!!JOB TERMINATED!!!!!!!!!!" + echo "rt_utils.sh: !!!!!!!!!!JOB TERMINATED!!!!!!!!!! status=${status}" job_running=false #Trip the loop to end with these status flags interrupt_job exit 1 ;; #completed - #pbs only: C-Complete E-Exiting - C|E) + #pbs: C-Complete E-Exiting + #slurm: CD/COMPLETED + C|E|CD|COMPLETED) status_label='Completed' ;; *) @@ -253,140 +262,6 @@ submit_and_wait() { done } -check_results() { - echo "rt_utils.sh: Checking results of the regression test: ${TEST_ID}" - - ROCOTO=${ROCOTO:-false} - ECFLOW=${ECFLOW:-false} - - local test_status='PASS' - - # Give one minute for data to show up on file system - #sleep 60 - - { - echo - echo "baseline dir = ${RTPWD}/${CNTL_DIR}_${RT_COMPILER}" - echo "working dir = ${RUNDIR}" - echo "Checking test ${TEST_ID} results ...." - } > "${RT_LOG}" - echo - echo "baseline dir = ${RTPWD}/${CNTL_DIR}_${RT_COMPILER}" - echo "working dir = ${RUNDIR}" - echo "Checking test ${TEST_ID} results ...." - - if [[ ${CREATE_BASELINE} = false ]]; then - # - # --- regression test comparison - # - for i in ${LIST_FILES} ; do - printf %s " Comparing ${i} ....." >> "${RT_LOG}" - printf %s " Comparing ${i} ....." - - if [[ ! -f ${RUNDIR}/${i} ]] ; then - - echo ".......MISSING file" >> "${RT_LOG}" - echo ".......MISSING file" - test_status='FAIL' - - elif [[ ! -f ${RTPWD}/${CNTL_DIR}_${RT_COMPILER}/${i} ]] ; then - - echo ".......MISSING baseline" >> "${RT_LOG}" - echo ".......MISSING baseline" - test_status='FAIL' - - else - if [[ ${i##*.} == nc* ]] ; then - if [[ " orion hercules hera wcoss2 acorn derecho gaea jet s4 noaacloud " =~ ${MACHINE_ID} ]]; then - printf "USING NCCMP.." >> "${RT_LOG}" - printf "USING NCCMP.." - if [[ ${CMP_DATAONLY} == false ]]; then - nccmp -d -S -q -f -g -B --Attribute=checksum --warn=format "${RTPWD}/${CNTL_DIR}_${RT_COMPILER}/${i}" "${RUNDIR}/${i}" > "${i}_nccmp.log" 2>&1 && d=$? || d=$? - else - nccmp -d -S -q -f -B --Attribute=checksum --warn=format "${RTPWD}/${CNTL_DIR}_${RT_COMPILER}/${i}" "${RUNDIR}/${i}" > "${i}_nccmp.log" 2>&1 && d=$? || d=$? - fi - if [[ ${d} -ne 0 && ${d} -ne 1 ]]; then - printf "....ERROR" >> "${RT_LOG}" - printf "....ERROR" - test_status='FAIL' - fi - fi - else - printf "USING CMP.." >> "${RT_LOG}" - printf "USING CMP.." - cmp "${RTPWD}/${CNTL_DIR}_${RT_COMPILER}/${i}" "${RUNDIR}/${i}" >/dev/null 2>&1 && d=$? || d=$? - if [[ ${d} -eq 2 ]]; then - printf "....ERROR" >> "${RT_LOG}" - printf "....ERROR" - test_status='FAIL' - fi - - fi - - if [[ ${d} -ne 0 ]]; then - echo "....NOT IDENTICAL" >> "${RT_LOG}" - echo "....NOT IDENTICAL" - test_status='FAIL' - else - echo "....OK" >> "${RT_LOG}" - echo "....OK" - fi - - fi - - done - - else - # - # --- create baselines - # - echo;echo "Moving baseline ${TEST_ID} files ...." - echo;echo "Moving baseline ${TEST_ID} files ...." >> "${RT_LOG}" - - for i in ${LIST_FILES} ; do - printf %s " Moving ${i} ....." - printf %s " Moving ${i} ....." >> "${RT_LOG}" - if [[ -f ${RUNDIR}/${i} ]] ; then - mkdir -p "${NEW_BASELINE}/${CNTL_DIR}_${RT_COMPILER}/$(dirname "${i}")" - cp "${RUNDIR}/${i}" "${NEW_BASELINE}/${CNTL_DIR}_${RT_COMPILER}/${i}" - echo "....OK" >> "${RT_LOG}" - echo "....OK" - else - echo "....NOT OK. Missing ${RUNDIR}/${i}" >> "${RT_LOG}" - echo "....NOT OK. Missing ${RUNDIR}/${i}" - test_status='FAIL' - fi - done - - fi - - { - echo - grep "The total amount of wall time" "${RUNDIR}/out" - grep "The maximum resident set size" "${RUNDIR}/out" - echo - } >> "${RT_LOG}" - - TRIES='' - if [[ ${ECFLOW} == true ]]; then - if [[ ${ECF_TRYNO} -gt 1 ]]; then - TRIES=" Tries: ${ECF_TRYNO}" - fi - fi - echo "Test ${TEST_ID} ${test_status}${TRIES}" >> "${RT_LOG}" - echo >> "${RT_LOG}" - echo "Test ${TEST_ID} ${test_status}${TRIES}" - echo - - if [[ ${test_status} = 'FAIL' ]]; then - echo "${TEST_ID} failed in check_result" >> "${PATHRT}/fail_test_${TEST_ID}" - return 1 - else - return 0 - fi -} - - kill_job() { echo "rt_utils.sh: Killing job: ${jobid} on ${SCHEDULER}..." [[ -z $1 ]] && exit 1 @@ -580,14 +455,16 @@ ecflow_create_compile_task() { cat << EOF > "${ECFLOW_RUN}/${ECFLOW_SUITE}/compile_${COMPILE_ID}.ecf" %include