Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Rome detection at NAS, fixes for AWS and unknown #460

Merged
merged 5 commits into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 79 additions & 72 deletions gcm_setup
Original file line number Diff line number Diff line change
Expand Up @@ -401,50 +401,28 @@ if ( $SITE == 'NCCS' ) then

else if ( $SITE == 'NAS' ) then

set BUILT_ON_ROME = @BUILT_ON_ROME@

if ( $BUILT_ON_ROME == "TRUE") then

echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
echo " ${C2}rom (AMD Rome) (default)${CN}"
echo " "
echo " NOTE GEOS is non-zero-diff when running on AMD Rome"
echo " compared to the other Intel nodes."
echo " "
set MODEL = `echo $<`
set MODEL = `echo $MODEL | tr "[:upper:]" "[:lower:]"`
if ( .$MODEL == .) then
set MODEL = 'rom'
endif

if( $MODEL != 'rom' ) goto ASKPROC
else

echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
echo " ${C2}has (Haswell)${CN}"
echo " ${C2}bro (Broadwell)${CN}"
echo " ${C2}sky (Skylake)${CN} (default)"
echo " ${C2}cas (Cascade Lake)${CN}"
echo " "
echo " NOTE 1: Due to how FV3 is compiled by default, Sandy Bridge"
echo " and Ivy Bridge are not supported by current GEOS"
echo " "
echo " NOTE 2: Due to OS differences, if you want to run on the AMD"
echo " Rome nodes at NAS, you must recompile on the Rome nodes"
echo " "
set MODEL = `echo $<`
set MODEL = `echo $MODEL | tr "[:upper:]" "[:lower:]"`
if ( .$MODEL == .) then
set MODEL = 'sky'
endif

if( $MODEL != 'has' & \
$MODEL != 'bro' & \
$MODEL != 'sky' & \
$MODEL != 'cas' ) goto ASKPROC

echo "Enter the ${C1}Processor Type${CN} you wish to run on:"
echo " ${C2}has (Haswell)${CN}"
echo " ${C2}bro (Broadwell)${CN}"
echo " ${C2}sky (Skylake)${CN} (default)"
echo " ${C2}cas (Cascade Lake)${CN}"
echo " ${C2}rom (AMD Rome)${CN}"
echo " "
echo " NOTE Due to how FV3 is compiled by default, Sandy Bridge"
echo " and Ivy Bridge are not supported by current GEOS"
echo " "
set MODEL = `echo $<`
set MODEL = `echo $MODEL | tr "[:upper:]" "[:lower:]"`
if ( .$MODEL == .) then
set MODEL = 'sky'
endif

if( $MODEL != 'has' & \
$MODEL != 'bro' & \
$MODEL != 'sky' & \
$MODEL != 'cas' & \
$MODEL != 'rom' ) goto ASKPROC

# Some processors have weird names at NAS
# ---------------------------------------

Expand Down Expand Up @@ -472,8 +450,43 @@ else if ( $SITE == 'NAS' ) then
set NCPUS_PER_NODE = 128
endif

else if( $SITE == 'AWS' | $SITE == 'Azure' ) then

# Because we do not know the name of the model or the number of CPUs
# per node. We ask the user to set these variables in the script

# AWS and Azure users must set the MODEL and NCPUS_PER_NODE
set MODEL = USER_MUST_SET
set NCPUS_PER_NODE = USER_MUST_SET

# Above we need a user to set the MODEL and NCPUS_PER_NODE
# variables. Here we check that they have been set. If not,
# we ask the user to set them
# --------------------------------------------------------

if ( $MODEL == USER_MUST_SET | $NCPUS_PER_NODE == USER_MUST_SET ) then
echo "ERROR: We have detected you are on $SITE. As we do not have"
echo " official fixed node info yet, we ask you to edit $0"
echo " and set the MODEL and NCPUS_PER_NODE variables."
echo " Look for the section that says:"
echo " "
echo " # AWS and Azure users must set the MODEL and NCPUS_PER_NODE"
exit 1
endif

else
set MODEL = 'UNKNOWN'
# As we do not know how many CPUs per node, we detect the number
# of CPUs per node by looking at the number of CPUs. This is different
# on Linux and macOS
if ( $ARCH == 'Linux' ) then
set NCPUS_PER_NODE = `grep -c ^processor /proc/cpuinfo`
else if ( $ARCH == 'Darwin' ) then
set NCPUS_PER_NODE = `sysctl -n hw.ncpu`
else
echo "ERROR: Unknown architecture $ARCH"
exit 1
endif
endif

#######################################################################
Expand Down Expand Up @@ -1734,30 +1747,30 @@ else if( $SITE == 'NCCS' ) then
if ( "$OCNMODEL" == "MIT" ) then
setenv COUPLEDIR /gpfsm/dnb32/estrobac/geos5/GRIDDIR # Coupled Ocean/Atmos Forcing
endif
else if( $SITE == 'AWS' ) then
setenv BATCH_CMD "sbatch" # SLURM Batch command
setenv BATCH_GROUP DELETE # SLURM Syntax for account name
setenv BATCH_TIME "SBATCH --time=" # SLURM Syntax for walltime
setenv BATCH_JOBNAME "SBATCH --job-name=" # SLURM Syntax for job name
setenv BATCH_OUTPUTNAME "SBATCH --output=" # SLURM Syntax for job output name
setenv BATCH_JOINOUTERR "DELETE" # SLURM joins out and err by default
setenv RUN_FT "06:00:00" # Wallclock Time for gcm_forecast.j
setenv RUN_T "12:00:00" # Wallclock Time for gcm_run.j
setenv POST_T "8:00:00" # Wallclock Time for gcm_post.j
setenv PLOT_T "12:00:00" # Wallclock Time for gcm_plot.j
setenv ARCHIVE_T "1:00:00" # Wallclock Time for gcm_archive.j
setenv RUN_Q DELETE # batch queue name for gcm_run.j
setenv RUN_P "SBATCH --ntasks=${MODEL_NPES}" # PE Configuration for gcm_run.j
setenv RUN_FP "SBATCH --ntasks=${MODEL_NPES}" # PE Configuration for gcm_forecast.j
setenv POST_Q NULL # batch queue name for gcm_post.j
setenv PLOT_Q NULL # batch queue name for gcm_plot.j
setenv MOVE_Q NULL # batch queue name for gcm_moveplot.j
setenv ARCHIVE_Q NULL # batch queue name for gcm_archive.j
setenv POST_P "SBATCH --ntasks=${POST_NPES}" # PE Configuration for gcm_post.j
setenv PLOT_P "SBATCH --nodes=4 --ntasks=4" # PE Configuration for gcm_plot.j
setenv ARCHIVE_P "SBATCH --ntasks=1" # PE Configuration for gcm_archive.j
setenv CONVERT_P "SBATCH --ntasks=${CNV_NPES}" # PE Configuration for gcm_convert.j
setenv MOVE_P "SBATCH --ntasks=1" # PE Configuration for gcm_moveplot.j
else if( $SITE == 'AWS' | $SITE == 'Azure' ) then
setenv BATCH_CMD "sbatch" # SLURM Batch command
setenv BATCH_GROUP DELETE # SLURM Syntax for account name
setenv BATCH_TIME "SBATCH --time=" # SLURM Syntax for walltime
setenv BATCH_JOBNAME "SBATCH --job-name=" # SLURM Syntax for job name
setenv BATCH_OUTPUTNAME "SBATCH --output=" # SLURM Syntax for job output name
setenv BATCH_JOINOUTERR "DELETE" # SLURM joins out and err by default
setenv RUN_FT "06:00:00" # Wallclock Time for gcm_forecast.j
setenv RUN_T "12:00:00" # Wallclock Time for gcm_run.j
setenv POST_T "8:00:00" # Wallclock Time for gcm_post.j
setenv PLOT_T "12:00:00" # Wallclock Time for gcm_plot.j
setenv ARCHIVE_T "1:00:00" # Wallclock Time for gcm_archive.j
setenv RUN_Q "SBATCH --constraint=${MODEL}" # batch queue name for gcm_run.j
setenv RUN_P "SBATCH --nodes=${NODES} --ntasks-per-node=${NCPUS_PER_NODE}" # PE Configuration for gcm_run.j
setenv RUN_FP "SBATCH --nodes=${NODES} --ntasks-per-node=${NCPUS_PER_NODE}" # PE Configuration for gcm_forecast.j
setenv POST_Q NULL # batch queue name for gcm_post.j
setenv PLOT_Q NULL # batch queue name for gcm_plot.j
setenv MOVE_Q NULL # batch queue name for gcm_moveplot.j
setenv ARCHIVE_Q NULL # batch queue name for gcm_archive.j
setenv POST_P "SBATCH --ntasks=${POST_NPES}" # PE Configuration for gcm_post.j
setenv PLOT_P "SBATCH --nodes=4 --ntasks=4" # PE Configuration for gcm_plot.j
setenv ARCHIVE_P "SBATCH --ntasks=1" # PE Configuration for gcm_archive.j
setenv CONVERT_P "SBATCH --ntasks=${CNV_NPES}" # PE Configuration for gcm_convert.j
setenv MOVE_P "SBATCH --ntasks=1" # PE Configuration for gcm_moveplot.j

setenv BCSDIR /ford1/share/gmao_SIteam/ModelData/bcs/${LSM_BCS}/${LSM_BCS}_${OCEAN_TAG} # location of Boundary Conditions
setenv REPLAY_ANA_EXPID REPLAY_UNSUPPORTED # Default Analysis Experiment for REPLAY
Expand All @@ -1770,11 +1783,6 @@ else if( $SITE == 'AWS' ) then
setenv COUPLEDIR /ford1/share/gmao_SIteam/ModelData/aogcm # Coupled Ocean/Atmos Forcing
setenv GWDRSDIR /ford1/share/gmao_SIteam/ModelData/GWD_RIDGE # Location of GWD_RIDGE files

# By default on AWS, just ignore IOSERVER for now until testing
set USE_IOSERVER = 0
set NUM_OSERVER_NODES = 0
set NUM_BACKEND_PES = 0
set NCPUS_PER_NODE = 0
else
# These are defaults for the desktop
setenv BATCH_CMD "sbatch" # SLURM Batch command
Expand Down Expand Up @@ -1819,7 +1827,6 @@ else
set USE_IOSERVER = 0
set NUM_OSERVER_NODES = 0
set NUM_BACKEND_PES = 0
set NCPUS_PER_NODE = 0
endif

#######################################################################
Expand Down
Loading