CI: Run test (and test.summary) locally

The test.summary rule was causing errors in our Gitlab testing due to multiple runs (concurrent or otherwise) in the same workspace directory. This patch removes the WORKSPACE directory variable, and each .testing run happens in its own directory. Other minor changes: - The script to generate the summary was moved out of the Makefile and into a separate script. - Unrelated to these changes, error output was extended from 20 to 40 lines, to provide more readable backtrace output.
NOAA-GFDL · Sep 11, 2023 · d342b29 · d342b29
1 parent 1bb8852
commit d342b29
Show file tree

Hide file tree

Showing 3 changed files with 55 additions and 33 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -10,7 +10,6 @@ stages:
 # We use the "fetch" strategy to speed up the startup of stages
 variables:
   JOB_DIR: "/lustre/f2/scratch/oar.gfdl.ogrp-account/runner/builds/$CI_PIPELINE_ID"
-  WORKSPACE: "/lustre/f2/scratch/oar.gfdl.ogrp-account/runner/$CI_RUNNER_ID"
   GIT_STRATEGY: fetch
 
 # Always eport value of $JOB_DIR
@@ -185,9 +184,9 @@ actions:gnu:
     - make -s -j
     - MPIRUN= make preproc -s -j
     - echo -e "\e[0Ksection_end:`date +%s`:compile\r\e[0K"
-    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" WORKSPACE=$WORKSPACE test -s -j') > job.sh
-    - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.gnu.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make WORKSPACE=$WORKSPACE test -s
-    - make WORKSPACE=$WORKSPACE test.summary
+    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" test -s -j') > job.sh
+    - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.gnu.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make test -s
+    - make test.summary
 
 actions:intel:
   stage: tests
@@ -205,9 +204,9 @@ actions:intel:
     - make -s -j
     - MPIRUN= make preproc -s -j
     - echo -e "\e[0Ksection_end:`date +%s`:compile\r\e[0K"
-    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" WORKSPACE=$WORKSPACE test -s -j') > job.sh
-    - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.intel.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make WORKSPACE=$WORKSPACE test -s
-    - make WORKSPACE=$WORKSPACE test.summary
+    - (echo '#!/bin/bash';echo 'make MPIRUN="srun -mblock --exclusive" test -s -j') > job.sh
+    - sbatch --clusters=c5 --nodes=2 --time=0:10:00 --account=gfdl_o --qos=debug --job-name=MOM6.intel.testing --output=log.$CI_JOB_ID --wait job.sh || ( cat log.$CI_JOB_ID ; exit 911 ) && make test -s
+    - make test.summary
 
 # Tests
 #

diff --git a/.testing/Makefile b/.testing/Makefile
@@ -554,8 +554,8 @@ $(WORKSPACE)/work/%/$(1)/ocean.stats $(WORKSPACE)/work/%/$(1)/chksum_diag: build
 	  && $(TIME) $(5) $(MPIRUN) -n $(6) $(abspath $$<) 2> std.err > std.out \
 	  || !( \
 	    mkdir -p ../../../results/$$*/ ; \
-	    cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 20 ; \
-	    cat std.err | tee ../../../results/$$*/std.$(1).err | tail -n 20 ; \
+	    cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 40 ; \
+	    cat std.err | tee ../../../results/$$*/std.$(1).err | tail -n 40 ; \
 	    rm ocean.stats chksum_diag ; \
 	    echo -e "$(FAIL): $$*.$(1) failed at runtime." \
 	  )
@@ -630,8 +630,8 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	# Run the first half-period
 	cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std1.err > std1.out \
 	  || !( \
-	    cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 20 ; \
-	    cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 20 ; \
+	    cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 40 ; \
+	    cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 40 ; \
 	    echo -e "$(FAIL): $*.restart failed at runtime." \
 	  )
 	# Setup the next inputs
@@ -641,8 +641,8 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	# Run the second half-period
 	cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std2.err > std2.out \
 	  || !( \
-	    cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 20 ; \
-	    cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 20 ; \
+	    cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 40 ; \
+	    cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 40 ; \
 	    echo -e "$(FAIL): $*.restart failed at runtime." \
 	  )
 
@@ -652,26 +652,7 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 # Not a true rule; only call this after `make test` to summarize test results.
 .PHONY: test.summary
 test.summary:
-	@if ls $(WORKSPACE)/results/*/* &> /dev/null; then \
-	  if ls $(WORKSPACE)/results/*/std.*.err &> /dev/null; then \
-	    echo "The following tests failed to complete:" ; \
-	    ls $(WORKSPACE)/results/*/std.*.out \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \
-	  fi; \
-	  if ls $(WORKSPACE)/results/*/ocean.stats.*.diff &> /dev/null; then \
-	    echo "The following tests report solution regressions:" ; \
-	    ls $(WORKSPACE)/results/*/ocean.stats.*.diff \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[3]; if(length(t)>4) v=v"."t[4]; print a[2],":",v}'; \
-	  fi; \
-	  if ls $(WORKSPACE)/results/*/chksum_diag.*.diff &> /dev/null; then \
-	    echo "The following tests report diagnostic regressions:" ; \
-	    ls $(WORKSPACE)/results/*/chksum_diag.*.diff \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \
-	  fi; \
-	  false ; \
-	else \
-	  echo -e "$(PASS): All tests passed!"; \
-	fi
+	@./tools/report_test_results.sh $(WORKSPACE)/results
 
 
 #---

diff --git a/.testing/tools/report_test_results.sh b/.testing/tools/report_test_results.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+RESULTS=${1:-${PWD}/results}
+
+GREEN="\033[0;32m"
+RESET="\033[0m"
+PASS="${GREEN}PASS${RESET}"
+
+if [ -d ${RESULTS} ]; then
+  if ls ${RESULTS}/*/std.*.err &> /dev/null; then
+    echo "The following tests failed to complete:"
+	ls ${RESULTS}/*/std.*.out \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[2]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  if ls ${RESULTS}/*/ocean.stats.*.diff &> /dev/null; then
+    echo "The following tests report solution regressions:"
+    ls ${RESULTS}/*/ocean.stats.*.diff \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[3]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  if ls ${RESULTS}/*/chksum_diag.*.diff &> /dev/null; then
+    echo "The following tests report diagnostic regressions:"
+    ls ${RESULTS}/*/chksum_diag.*.diff \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[2]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  exit 1
+else
+  printf "${PASS}: All tests passed!\n"
+fi