From 3cbe71f7f0ba97bec4a6563fb84cf103f92c48de Mon Sep 17 00:00:00 2001 From: david huber Date: Tue, 24 Sep 2024 11:36:17 +0000 Subject: [PATCH 1/4] Initial port to Acorn. --- modulefiles/gsi_acorn.intel.lua | 50 ++++++++++ regression/regression_param.sh | 41 ++++---- regression/regression_var.sh | 10 +- ush/detect_machine.sh | 4 +- ush/module-setup.sh | 4 + ush/sub_acorn | 171 ++++++++++++++++++++++++++++++++ 6 files changed, 257 insertions(+), 23 deletions(-) create mode 100644 modulefiles/gsi_acorn.intel.lua create mode 100755 ush/sub_acorn diff --git a/modulefiles/gsi_acorn.intel.lua b/modulefiles/gsi_acorn.intel.lua new file mode 100644 index 0000000000..401a2a98e3 --- /dev/null +++ b/modulefiles/gsi_acorn.intel.lua @@ -0,0 +1,50 @@ +help([[ +]]) + + +local PrgEnv_intel_ver=os.getenv("PrgEnv_intel_ver") or "8.1.0" +local intel_ver=os.getenv("intel_ver") or "19.1.3.304" +local craype_ver=os.getenv("craype_ver") or "2.7.8" +local cray_mpich_ver=os.getenv("cray_mpich_ver") or "8.1.7" +local cmake_ver= os.getenv("cmake_ver") or "3.20.2" +local python_ver=os.getenv("python_ver") or "3.8.6" +local prod_util_ver=os.getenv("prod_util_ver") or "2.0.10" + +local netcdf_ver=os.getenv("netcdf_ver") or "4.7.4" +local bufr_ver=os.getenv("bufr_ver") or "11.7.0" +local bacio_ver=os.getenv("bacio_ver") or "2.4.1" +local w3emc_ver=os.getenv("w3emc_ver") or "2.9.2" +local sp_ver=os.getenv("sp_ver") or "2.3.3" +local ip_ver=os.getenv("ip_ver") or "3.3.3" +local sigio_ver=os.getenv("sigio_ver") or "2.3.2" +local sfcio_ver=os.getenv("sfcio_ver") or "1.4.1" +local nemsio_ver=os.getenv("nemsio_ver") or "2.5.4" +local wrf_io_ver=os.getenv("wrf_io_ver") or "1.2.0" +local ncio_ver=os.getenv("ncio_ver") or "1.1.2" +local crtm_ver=os.getenv("crtm_ver") or "2.4.0" +local ncdiag_ver=os.getenv("ncdiag_ver") or "1.1.1" + +load("PrgEnv-intel") +load("intel") +load("craype") +load("cray-mpich") +load(pathJoin("cmake", cmake_ver)) +load(pathJoin("python", python_ver)) +load(pathJoin("prod_util", prod_util_ver)) +load(pathJoin("netcdf", netcdf_ver)) +load(pathJoin("bufr", bufr_ver)) +load(pathJoin("bacio", bacio_ver)) +load(pathJoin("w3emc", w3emc_ver)) +load(pathJoin("sp", sp_ver)) +load(pathJoin("ip", ip_ver)) +load(pathJoin("sigio", sigio_ver)) +load(pathJoin("sfcio", sfcio_ver)) +load(pathJoin("nemsio", nemsio_ver)) +load(pathJoin("wrf_io", wrf_io_ver)) +load(pathJoin("ncio", ncio_ver)) +load(pathJoin("crtm", crtm_ver)) +load(pathJoin("ncdiag",ncdiag_ver)) + +pushenv("GSI_BINARY_SOURCE_DIR", "/lfs/h2/emc/global/noscrub/emc.global/FIX/fix/gsi/20230911") + +whatis("Description: GSI environment on WCOSS2 Acorn") diff --git a/regression/regression_param.sh b/regression/regression_param.sh index 209762569b..34d5964d87 100755 --- a/regression/regression_param.sh +++ b/regression/regression_param.sh @@ -4,8 +4,8 @@ regtest=$1 case $machine in - Hera) - sub_cmd="sub_hera" + Hera) + sub_cmd="sub_hera" memnode=96 numcore=40 ;; @@ -19,23 +19,28 @@ case $machine in memnode=512 numcore=40 ;; - Jet) - sub_cmd="sub_jet" + Jet) + sub_cmd="sub_jet" memnode=96 numcore=40 ;; - Gaea) - sub_cmd="sub_gaea" + Gaea) + sub_cmd="sub_gaea" memnode=251 numcore=128 ;; - wcoss2) - sub_cmd="sub_wcoss2" + wcoss2) + sub_cmd="sub_wcoss2" memnode=512 numcore=128 ;; - Discover) - sub_cmd="sub_discover" + acorn) + sub_cmd="sub_acorn" + memnode=512 + numcore=128 + ;; + Discover) + sub_cmd="sub_discover" ;; *) # EXIT out for unresolved machine echo "unknown $machine" @@ -71,7 +76,7 @@ case $regtest in elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:10:00" ; popts[1]="12/8/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="12/10/" ; ropts[2]="/2" - elif [[ "$machine" = "wcoss2" ]]; then + elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then topts[1]="0:10:00" ; popts[1]="12/8/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="12/10/" ; ropts[2]="/2" fi @@ -101,7 +106,7 @@ case $regtest in elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" - elif [[ "$machine" = "wcoss2" ]]; then + elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" fi @@ -131,7 +136,7 @@ case $regtest in elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" - elif [[ "$machine" = "wcoss2" ]]; then + elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" fi @@ -160,7 +165,7 @@ case $regtest in elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" - elif [[ "$machine" = "wcoss2" ]]; then + elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="128/2/" ; ropts[2]="/1" fi @@ -190,7 +195,7 @@ case $regtest in elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:15:00" ; popts[1]="28/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="28/2/" ; ropts[2]="/1" - elif [[ "$machine" = "wcoss2" ]]; then + elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then topts[1]="0:15:00" ; popts[1]="64/1/" ; ropts[1]="/1" topts[2]="0:15:00" ; popts[2]="64/2/" ; ropts[2]="/1" fi @@ -220,7 +225,7 @@ case $regtest in elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:30:00" ; popts[1]="14/8/" ; ropts[1]="/1" topts[2]="0:30:00" ; popts[2]="14/14/" ; ropts[2]="/1" - elif [[ "$machine" = "wcoss2" ]]; then + elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then topts[1]="0:30:00" ; popts[1]="14/8/" ; ropts[1]="/1" topts[2]="0:30:00" ; popts[2]="14/14/" ; ropts[2]="/2" fi @@ -250,7 +255,7 @@ case $regtest in elif [[ "$machine" = "Gaea" ]]; then topts[1]="0:10:00" ; popts[1]="16/2/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="16/4/" ; ropts[2]="/2" - elif [[ "$machine" = "wcoss2" ]]; then + elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then topts[1]="0:10:00" ; popts[1]="16/2/" ; ropts[1]="/1" topts[2]="0:10:00" ; popts[2]="16/4/" ; ropts[2]="/2" fi @@ -316,7 +321,7 @@ elif [[ "$machine" = "Gaea" ]]; then export MPI_BUFS_PER_HOST=256 export MPI_GROUP_MAX=256 export APRUN="srun --export=ALL -n \$ntasks" -elif [[ "$machine" = "wcoss2" ]]; then +elif [[ "$machine" = "wcoss2" || "$machine" = "acorn" ]]; then export OMP_PLACES=cores export OMP_STACKSIZE=2G export FORT_BUFFERED=true diff --git a/regression/regression_var.sh b/regression/regression_var.sh index 4a2bc85874..d4e7d99aa0 100755 --- a/regression/regression_var.sh +++ b/regression/regression_var.sh @@ -47,8 +47,12 @@ elif [[ -d /work ]]; then # Orion or Hercules else export machine="Orion" fi -elif [[ -d /lfs/h2 ]]; then # wcoss2 - export machine="wcoss2" +elif [[ -d /lfs/h2 ]]; then # wcoss2 or acorn + if [[ $(hostname -f) =~ "alogin" ]]; then + export machine="acorn" + else + export machine="wcoss2" + fi fi echo "Running Regression Tests on '$machine'"; @@ -63,7 +67,7 @@ case $machine in export check_resource="no" export accnt="ufs-ard" ;; - wcoss2) + wcoss2 | acorn) export local_or_default="${local_or_default:-/lfs/h2/emc/da/noscrub/$LOGNAME}" if [ -d $local_or_default ]; then export noscrub="$local_or_default/noscrub" diff --git a/ush/detect_machine.sh b/ush/detect_machine.sh index 0beb937f7e..e06775dbbe 100755 --- a/ush/detect_machine.sh +++ b/ush/detect_machine.sh @@ -14,8 +14,8 @@ # First detect w/ hostname case $(hostname -f) in - adecflow0[12].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn - alogin0[12].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn + adecflow0[1-3].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn + alogin0[1-3].acorn.wcoss2.ncep.noaa.gov) MACHINE_ID=acorn ;; ### acorn clogin0[1-9].cactus.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### cactus01-9 clogin10.cactus.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### cactus10 dlogin0[1-9].dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood01-9 diff --git a/ush/module-setup.sh b/ush/module-setup.sh index 299e13aa4e..41d0d7c655 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -40,6 +40,10 @@ elif [[ $MACHINE_ID = wcoss2 ]]; then # We are on WCOSS2 module reset +elif [[ $MACHINE_ID = acorn ]]; then + # We are on WCOSS2-Acorn + module reset + elif [[ $MACHINE_ID = stampede* ]] ; then # We are on TACC Stampede if ( ! eval module help > /dev/null 2>&1 ) ; then diff --git a/ush/sub_acorn b/ush/sub_acorn new file mode 100755 index 0000000000..ed0f74c7cc --- /dev/null +++ b/ush/sub_acorn @@ -0,0 +1,171 @@ +#!/bin/sh --login +set -x +echo "starting sub_acorn" +usage="\ +Usage: $0 [options] executable [args] + where the options are: + -a account account (default: none) + -b binding run smt binding or not (default:NO) + -d dirin initial directory (default: cwd) + -e envars copy comma-separated environment variables + -g group group name + -i append standard input to command file + -j jobname specify jobname (default: executable basename) + -m machine machine on which to run (default: current) + -n write command file to stdout rather than submitting it + -o output specify output file (default: jobname.out) + -p procs[/nodes[/ppreq] + number of MPI tasks and optional nodes or Bblocking and + ppreq option (N or S) (defaults: serial, Bunlimited, S) + -q queue[/qpreq] queue name and optional requirement, e.g. dev/P + (defaults: 1 if serial or dev if parallel and none) + (queue 3 or 4 is dev or prod with twice tasks over ip) + (options: P=parallel, B=bigmem, b=batch) + -r rmem[/rcpu] resources memory and cpus/task (default: '1024 mb', 1) + -t timew wall time limit in [[hh:]mm:]ss format (default: 900) + -u userid userid to run under (default: self) + -v verbose mode + -w when when to run, in yyyymmddhh[mm], +hh[mm], thh[mm], or + Thh[mm] (full, incremental, today or tomorrow) format + (default: now) +Function: This command submits a job to the batch queue." +subcmd="$*" +stdin=NO +nosub=NO +account="" +binding="NO" +dirin="" +envars="" +group="" +jobname="" +machine="" +output="" +procs=0 +nodes="" +ppreq="" +queue="" +qpreq="" +rmem="1024" +rcpu="1" +timew="900" +userid="" +verbose=NO +when="" +while getopts a:b:d:e:g:ij:m:no:p:q:r:t:u:vw: opt;do + case $opt in + a) account="$OPTARG";; + b) binding="$OPTARG";; + d) dirin="$OPTARG";; + e) envars="$OPTARG";; + g) group="$OPTARG";; + i) stdin=YES;; + j) jobname=$OPTARG;; + m) machine="$OPTARG";; + n) nosub=YES;; + o) output=$OPTARG;; + p) procs=$(echo $OPTARG/|cut -d/ -f1);nodes=$(echo $OPTARG/|cut -d/ -f2);ppreq=$(echo $OPTARG/|cut -d/ -f3);; + q) queue=$(echo $OPTARG/|cut -d/ -f1);qpreq=$(echo $OPTARG/|cut -d/ -f2);; + r) rmem=$(echo $OPTARG/|cut -d/ -f1);rcpu=$(echo $OPTARG/|cut -d/ -f2);; + t) timew=$OPTARG;; + u) userid=$OPTARG;; + v) verbose=YES;; + w) when=$OPTARG;; + \?) echo $0: invalid option >&2;echo "$usage" >&2;exit 1;; + esac +done +shift $(($OPTIND-1)) +if [[ $# -eq 0 ]];then + echo $0: missing executable name >&2;echo "$usage" >&2;exit 1 +fi +exec=$1 +if [[ ! -s $exec ]]&&which $exec >/dev/null 2>&1;then + exec=$(which $exec) +fi +shift +args="$*" +bn=$(basename $exec) +export jobname=${jobname:-$bn} +output=${output:-$jobname.out} +myuser=$LOGNAME +myhost=$(hostname) + +DATA=/lfs/h2/emc/stmp/$LOGNAME/tmp +mkdir -p $DATA + +queue=${queue:-dev} +timew=${timew:-01:20:00} +task_node=${task_node:-$procs} +size=$((nodes*task_node)) +envars=$envars +threads=${rcpu:-1} +ncpus=$((procs*threads)) + +export TZ=GMT +cfile=$DATA/sub$$ +> $cfile +echo "#!/bin/bash" >> $cfile +echo "" >> $cfile +echo "#PBS -o $output" >> $cfile +echo "#PBS -N $jobname" >> $cfile +echo "#PBS -q $queue" >> $cfile +echo "#PBS -l walltime=$timew" >> $cfile +echo "#PBS -l select=$nodes:mpiprocs=$procs:ompthreads=$threads:ncpus=$ncpus" >> $cfile +echo "#PBS -l place=vscatter:exclhost" >> $cfile +echo "#PBS -j oe" >> $cfile +echo "#PBS -A "$accnt >> $cfile + +echo "" >> $cfile +echo "export OMP_NUM_THREADS=$threads" >> $cfile +echo "export ntasks=$(( $nodes * $procs ))" >> $cfile +echo "export ppn=$procs" >> $cfile +echo "export threads=$threads" >> $cfile +echo "" >> $cfile +echo ". "$(awk '{ print $1, $2, $3, $4, $5, $6, $7, $8, $9 }' $regdir/regression_var.out) >>$cfile +echo "" >> $cfile + +echo "module reset" >> $cfile +echo "module use $modulefiles" >> $cfile +echo "module load gsi_acorn.intel" >> $cfile +echo "module load envvar/1.0" >> $cfile +echo "module load cray-pals/1.2.2" >> $cfile +echo "module -t list 2>&1 | while read line;do module show $line 2>&1 | sed -n -e '2p';done | sort" >> $cfile +echo "module avail" >> $cfile + +echo "" >> $cfile + +cat $exec >> $cfile + +if [[ $nosub = YES ]];then + cat $cfile + exit +elif [[ $verbose = YES ]];then + set -x + cat $cfile +fi + +if [[ $stdin = YES ]];then + cat +fi >>$cfile +if [[ $nosub = YES ]];then + cat $cfile + exit +elif [[ $verbose = YES ]];then + set -x + cat $cfile +fi +qsub=${qsub:-qsub} + +ofile=$DATA/subout$$ +>$ofile +chmod 777 $ofile +$qsub -V $cfile >$ofile +rc=$? +cat $ofile +if [[ -w $SUBLOG ]];then + jobn=$(grep -i submitted $ofile|head -n1|cut -d\" -f2) + date -u +"%Y%m%d%H%M%S : $subcmd : $jobn" >>$SUBLOG +fi +##rm $cfile $ofile +##[[ $MKDATA = YES ]] && rmdir $DATA +echo "ending sub_acorn" +exit $rc From 542b6f7dd741e2de0ab2948aa194cf13df515c5d Mon Sep 17 00:00:00 2001 From: david huber Date: Wed, 2 Oct 2024 20:13:36 +0000 Subject: [PATCH 2/4] Use EMC CRTM v2.4.0.1. --- modulefiles/gsi_acorn.intel.lua | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modulefiles/gsi_acorn.intel.lua b/modulefiles/gsi_acorn.intel.lua index 401a2a98e3..ee966a937c 100644 --- a/modulefiles/gsi_acorn.intel.lua +++ b/modulefiles/gsi_acorn.intel.lua @@ -21,7 +21,6 @@ local sfcio_ver=os.getenv("sfcio_ver") or "1.4.1" local nemsio_ver=os.getenv("nemsio_ver") or "2.5.4" local wrf_io_ver=os.getenv("wrf_io_ver") or "1.2.0" local ncio_ver=os.getenv("ncio_ver") or "1.1.2" -local crtm_ver=os.getenv("crtm_ver") or "2.4.0" local ncdiag_ver=os.getenv("ncdiag_ver") or "1.1.1" load("PrgEnv-intel") @@ -42,9 +41,13 @@ load(pathJoin("sfcio", sfcio_ver)) load(pathJoin("nemsio", nemsio_ver)) load(pathJoin("wrf_io", wrf_io_ver)) load(pathJoin("ncio", ncio_ver)) -load(pathJoin("crtm", crtm_ver)) load(pathJoin("ncdiag",ncdiag_ver)) +-- Lastly, load CRTM from the EMC location +append_path("MODULEPATH", "/lfs/h1/emc/nceplibs/noscrub/hpc-stack/libs/hpc-stack/modulefiles/compiler/intel/19.1.3.304") +local crtm_ver=os.getenv("crtm_ver") or "2.4.0.1" +load(pathJoin("crtm", crtm_ver)) + pushenv("GSI_BINARY_SOURCE_DIR", "/lfs/h2/emc/global/noscrub/emc.global/FIX/fix/gsi/20230911") whatis("Description: GSI environment on WCOSS2 Acorn") From f45748d63d08e43e1058dcb1c9f085e4b94ace09 Mon Sep 17 00:00:00 2001 From: david huber Date: Thu, 3 Oct 2024 14:48:00 +0000 Subject: [PATCH 3/4] Use all module versions, update FIX version for acorn --- modulefiles/gsi_acorn.intel.lua | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modulefiles/gsi_acorn.intel.lua b/modulefiles/gsi_acorn.intel.lua index ee966a937c..03928be822 100644 --- a/modulefiles/gsi_acorn.intel.lua +++ b/modulefiles/gsi_acorn.intel.lua @@ -1,7 +1,6 @@ help([[ ]]) - local PrgEnv_intel_ver=os.getenv("PrgEnv_intel_ver") or "8.1.0" local intel_ver=os.getenv("intel_ver") or "19.1.3.304" local craype_ver=os.getenv("craype_ver") or "2.7.8" @@ -21,15 +20,18 @@ local sfcio_ver=os.getenv("sfcio_ver") or "1.4.1" local nemsio_ver=os.getenv("nemsio_ver") or "2.5.4" local wrf_io_ver=os.getenv("wrf_io_ver") or "1.2.0" local ncio_ver=os.getenv("ncio_ver") or "1.1.2" +local crtm_ver=os.getenv("crtm_ver") or "2.4.0.1" local ncdiag_ver=os.getenv("ncdiag_ver") or "1.1.1" -load("PrgEnv-intel") -load("intel") -load("craype") -load("cray-mpich") +load(pathJoin("PrgEnv-intel", PrgEnv_intel_ver)) +load(pathJoin("intel", intel_ver)) +load(pathJoin("craype", craype_ver)) +load(pathJoin("cray-mpich", cray_mpich_ver)) load(pathJoin("cmake", cmake_ver)) load(pathJoin("python", python_ver)) + load(pathJoin("prod_util", prod_util_ver)) + load(pathJoin("netcdf", netcdf_ver)) load(pathJoin("bufr", bufr_ver)) load(pathJoin("bacio", bacio_ver)) @@ -45,9 +47,8 @@ load(pathJoin("ncdiag",ncdiag_ver)) -- Lastly, load CRTM from the EMC location append_path("MODULEPATH", "/lfs/h1/emc/nceplibs/noscrub/hpc-stack/libs/hpc-stack/modulefiles/compiler/intel/19.1.3.304") -local crtm_ver=os.getenv("crtm_ver") or "2.4.0.1" load(pathJoin("crtm", crtm_ver)) -pushenv("GSI_BINARY_SOURCE_DIR", "/lfs/h2/emc/global/noscrub/emc.global/FIX/fix/gsi/20230911") +pushenv("GSI_BINARY_SOURCE_DIR", "/lfs/h2/emc/global/noscrub/emc.global/FIX/fix/gsi/20240208") whatis("Description: GSI environment on WCOSS2 Acorn") From b0aa72d2e127db4e5988a61b8a965a33d08a361e Mon Sep 17 00:00:00 2001 From: david huber Date: Thu, 3 Oct 2024 20:14:55 +0000 Subject: [PATCH 4/4] Combine acorn/wcoss2 scripts as much as possible --- regression/regression_driver.sh | 2 +- ush/module-setup.sh | 8 +- ush/sub_acorn | 172 +------------------------------- ush/sub_wcoss2 | 6 +- 4 files changed, 7 insertions(+), 181 deletions(-) mode change 100755 => 120000 ush/sub_acorn diff --git a/regression/regression_driver.sh b/regression/regression_driver.sh index 805a9dd1fb..6667d483f1 100755 --- a/regression/regression_driver.sh +++ b/regression/regression_driver.sh @@ -45,7 +45,7 @@ for jn in `seq ${RSTART} ${REND}`; do fi rm -f ${job[$jn]}.out - /bin/sh $ush/$sub_cmd -q $queue -j ${job[$jn]} -t ${topts[$jn]} -p ${popts[$jn]} -r ${ropts[$jn]} $scripts/${regtest}.sh + /bin/sh $ush/$sub_cmd -m ${machine} -q $queue -j ${job[$jn]} -t ${topts[$jn]} -p ${popts[$jn]} -r ${ropts[$jn]} $scripts/${regtest}.sh if [ $debug == ".true." ]; then break; fi $scripts/regression_wait.sh ${job[$jn]} ${rcname} $check_resource diff --git a/ush/module-setup.sh b/ush/module-setup.sh index 41d0d7c655..1f7a4462e2 100755 --- a/ush/module-setup.sh +++ b/ush/module-setup.sh @@ -36,12 +36,8 @@ elif [[ $MACHINE_ID = s4* ]] ; then fi module purge -elif [[ $MACHINE_ID = wcoss2 ]]; then - # We are on WCOSS2 - module reset - -elif [[ $MACHINE_ID = acorn ]]; then - # We are on WCOSS2-Acorn +elif [[ $MACHINE_ID = wcoss2 || $MACHINE_ID = acorn ]]; then + # We are on WCOSS2 (cactus, dogwood, or acorn) module reset elif [[ $MACHINE_ID = stampede* ]] ; then diff --git a/ush/sub_acorn b/ush/sub_acorn deleted file mode 100755 index ed0f74c7cc..0000000000 --- a/ush/sub_acorn +++ /dev/null @@ -1,171 +0,0 @@ -#!/bin/sh --login -set -x -echo "starting sub_acorn" -usage="\ -Usage: $0 [options] executable [args] - where the options are: - -a account account (default: none) - -b binding run smt binding or not (default:NO) - -d dirin initial directory (default: cwd) - -e envars copy comma-separated environment variables - -g group group name - -i append standard input to command file - -j jobname specify jobname (default: executable basename) - -m machine machine on which to run (default: current) - -n write command file to stdout rather than submitting it - -o output specify output file (default: jobname.out) - -p procs[/nodes[/ppreq] - number of MPI tasks and optional nodes or Bblocking and - ppreq option (N or S) (defaults: serial, Bunlimited, S) - -q queue[/qpreq] queue name and optional requirement, e.g. dev/P - (defaults: 1 if serial or dev if parallel and none) - (queue 3 or 4 is dev or prod with twice tasks over ip) - (options: P=parallel, B=bigmem, b=batch) - -r rmem[/rcpu] resources memory and cpus/task (default: '1024 mb', 1) - -t timew wall time limit in [[hh:]mm:]ss format (default: 900) - -u userid userid to run under (default: self) - -v verbose mode - -w when when to run, in yyyymmddhh[mm], +hh[mm], thh[mm], or - Thh[mm] (full, incremental, today or tomorrow) format - (default: now) -Function: This command submits a job to the batch queue." -subcmd="$*" -stdin=NO -nosub=NO -account="" -binding="NO" -dirin="" -envars="" -group="" -jobname="" -machine="" -output="" -procs=0 -nodes="" -ppreq="" -queue="" -qpreq="" -rmem="1024" -rcpu="1" -timew="900" -userid="" -verbose=NO -when="" -while getopts a:b:d:e:g:ij:m:no:p:q:r:t:u:vw: opt;do - case $opt in - a) account="$OPTARG";; - b) binding="$OPTARG";; - d) dirin="$OPTARG";; - e) envars="$OPTARG";; - g) group="$OPTARG";; - i) stdin=YES;; - j) jobname=$OPTARG;; - m) machine="$OPTARG";; - n) nosub=YES;; - o) output=$OPTARG;; - p) procs=$(echo $OPTARG/|cut -d/ -f1);nodes=$(echo $OPTARG/|cut -d/ -f2);ppreq=$(echo $OPTARG/|cut -d/ -f3);; - q) queue=$(echo $OPTARG/|cut -d/ -f1);qpreq=$(echo $OPTARG/|cut -d/ -f2);; - r) rmem=$(echo $OPTARG/|cut -d/ -f1);rcpu=$(echo $OPTARG/|cut -d/ -f2);; - t) timew=$OPTARG;; - u) userid=$OPTARG;; - v) verbose=YES;; - w) when=$OPTARG;; - \?) echo $0: invalid option >&2;echo "$usage" >&2;exit 1;; - esac -done -shift $(($OPTIND-1)) -if [[ $# -eq 0 ]];then - echo $0: missing executable name >&2;echo "$usage" >&2;exit 1 -fi -exec=$1 -if [[ ! -s $exec ]]&&which $exec >/dev/null 2>&1;then - exec=$(which $exec) -fi -shift -args="$*" -bn=$(basename $exec) -export jobname=${jobname:-$bn} -output=${output:-$jobname.out} -myuser=$LOGNAME -myhost=$(hostname) - -DATA=/lfs/h2/emc/stmp/$LOGNAME/tmp -mkdir -p $DATA - -queue=${queue:-dev} -timew=${timew:-01:20:00} -task_node=${task_node:-$procs} -size=$((nodes*task_node)) -envars=$envars -threads=${rcpu:-1} -ncpus=$((procs*threads)) - -export TZ=GMT -cfile=$DATA/sub$$ -> $cfile -echo "#!/bin/bash" >> $cfile -echo "" >> $cfile -echo "#PBS -o $output" >> $cfile -echo "#PBS -N $jobname" >> $cfile -echo "#PBS -q $queue" >> $cfile -echo "#PBS -l walltime=$timew" >> $cfile -echo "#PBS -l select=$nodes:mpiprocs=$procs:ompthreads=$threads:ncpus=$ncpus" >> $cfile -echo "#PBS -l place=vscatter:exclhost" >> $cfile -echo "#PBS -j oe" >> $cfile -echo "#PBS -A "$accnt >> $cfile - -echo "" >> $cfile -echo "export OMP_NUM_THREADS=$threads" >> $cfile -echo "export ntasks=$(( $nodes * $procs ))" >> $cfile -echo "export ppn=$procs" >> $cfile -echo "export threads=$threads" >> $cfile -echo "" >> $cfile -echo ". "$(awk '{ print $1, $2, $3, $4, $5, $6, $7, $8, $9 }' $regdir/regression_var.out) >>$cfile -echo "" >> $cfile - -echo "module reset" >> $cfile -echo "module use $modulefiles" >> $cfile -echo "module load gsi_acorn.intel" >> $cfile -echo "module load envvar/1.0" >> $cfile -echo "module load cray-pals/1.2.2" >> $cfile -echo "module -t list 2>&1 | while read line;do module show $line 2>&1 | sed -n -e '2p';done | sort" >> $cfile -echo "module avail" >> $cfile - -echo "" >> $cfile - -cat $exec >> $cfile - -if [[ $nosub = YES ]];then - cat $cfile - exit -elif [[ $verbose = YES ]];then - set -x - cat $cfile -fi - -if [[ $stdin = YES ]];then - cat -fi >>$cfile -if [[ $nosub = YES ]];then - cat $cfile - exit -elif [[ $verbose = YES ]];then - set -x - cat $cfile -fi -qsub=${qsub:-qsub} - -ofile=$DATA/subout$$ ->$ofile -chmod 777 $ofile -$qsub -V $cfile >$ofile -rc=$? -cat $ofile -if [[ -w $SUBLOG ]];then - jobn=$(grep -i submitted $ofile|head -n1|cut -d\" -f2) - date -u +"%Y%m%d%H%M%S : $subcmd : $jobn" >>$SUBLOG -fi -##rm $cfile $ofile -##[[ $MKDATA = YES ]] && rmdir $DATA -echo "ending sub_acorn" -exit $rc diff --git a/ush/sub_acorn b/ush/sub_acorn new file mode 120000 index 0000000000..c2df784934 --- /dev/null +++ b/ush/sub_acorn @@ -0,0 +1 @@ +sub_wcoss2 \ No newline at end of file diff --git a/ush/sub_wcoss2 b/ush/sub_wcoss2 index cd21e932f8..e0b4a03ccb 100755 --- a/ush/sub_wcoss2 +++ b/ush/sub_wcoss2 @@ -1,6 +1,6 @@ #!/bin/sh --login set -x -echo "starting sub_wcoss2" +echo "starting sub_${machine}" # wcoss2 or acorn usage="\ Usage: $0 [options] executable [args] where the options are: @@ -125,7 +125,7 @@ echo "" >> $cfile echo "module reset" >> $cfile echo "module use $modulefiles" >> $cfile -echo "module load gsi_wcoss2.intel" >> $cfile +echo "module load gsi_${machine}.intel" >> $cfile echo "module load envvar/1.0" >> $cfile echo "module load cray-pals/1.2.2" >> $cfile echo "module -t list 2>&1 | while read line;do module show $line 2>&1 | sed -n -e '2p';done | sort" >> $cfile @@ -167,5 +167,5 @@ if [[ -w $SUBLOG ]];then fi ##rm $cfile $ofile ##[[ $MKDATA = YES ]] && rmdir $DATA -echo "ending sub_wcoss2" +echo "ending sub_${machine}" exit $rc