Skip to content

Commit

Permalink
Add queue option, fix some issues with the batch submission, remove u…
Browse files Browse the repository at this point in the history
…se of omplace only needed for hybrid MPI/OpenMP code, because of duplication this had to go in two places
  • Loading branch information
ekluzek committed Oct 10, 2023
1 parent db2a42f commit 90e9483
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 11 deletions.
29 changes: 22 additions & 7 deletions tools/mksurfdata_esmf/gen_mksurfdata_jobscript_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,14 @@ def get_parser():
required=False,
default="12:00:00",
)
parser.add_argument(
"--queue",
help="""Queue to submit to)""",
action="store",
dest="queue",
required=False,
default="regular",
)
parser.add_argument(
"--scenario",
help="""scenario""",
Expand Down Expand Up @@ -119,6 +127,7 @@ def main ():
tasks_per_node = args.tasks_per_node
account = args.account
walltime = args.walltime
queue = args.queue

# --------------------------
# Determine target list
Expand Down Expand Up @@ -238,9 +247,13 @@ def main ():
runfile.write(f"#PBS -A {account} \n")
runfile.write(f'#PBS -N mksrf_{scenario} \n')
runfile.write('#PBS -j oe \n')
runfile.write('#PBS -q regular \n')
runfile.write('#PBS -k eod \n')
runfile.write('#PBS -S /bin/bash \n')
runfile.write(f'#PBS -q {queue} \n')
runfile.write(f'#PBS -l walltime={walltime} \n')
runfile.write(f"#PBS -l select={number_of_nodes}:ncpus=36:mpiprocs={tasks_per_node} \n")
runfile.write(f"#PBS -l select={number_of_nodes}:ncpus={tasks_per_node}:mpiprocs={tasks_per_node}:mem=109GB \n")
runfile.write(f'# This is a batch script to run a set of resolutions for mksurfdata_esmf {scenario} \n')
runfile.write('# NOTE: THIS SCRIPT IS AUTOMATICALLY GENERATED SO IN GENERAL YOU SHOULD NOT EDIT it!!\n\n')
runfile.write("\n")

n_p = int(tasks_per_node) * int(number_of_nodes)
Expand All @@ -249,6 +262,8 @@ def main ():
# environment including the paths to compilers and libraries
# external to cime such as netcdf
runfile.write('. '+env_specific_script + '\n')
check = f"if [ $? != 0 ]; then echo 'Error running env_specific_script'; exit -4; fi"
runfile.write(f"{check} \n")
for target in target_list:
res_set = dataset_dict[target][1]
for res in resolution_dict[res_set]:
Expand All @@ -258,22 +273,22 @@ def main ():
print (f"command is {command}")
commands = [x for x in command.split(' ') if x]
try:
run_cmd = subprocess.run(commands, check=True,
run_cmd = subprocess.run(commands, check=True, shell=True,
capture_output=True)
except subprocess.CalledProcessError as e:
sys.exit(f'{e} ERROR calling {command}')
output = run_cmd.stdout.decode('utf-8').strip()
namelist = output.split(' ')[-1]
print (f"generated namelist {namelist}")
output = f"mpiexec_mpt -p \"%g:\" -np {n_p} omplace -tm open64 {mksurfdata} < {namelist}"
output = f"time mpiexec_mpt -p \"%g:\" -np {n_p} {mksurfdata} < {namelist}"
runfile.write(f"{output} \n")
check = f"if [ $? != 0 ]; then echo 'Error running resolution {res}'; exit -4; fi"
runfile.write(f"{check} \n")
runfile.write(f"Successfully ran resolution {res}\n")
runfile.write(f"echo Successfully ran resolution {res}\n")

runfile.write(f"Successfully ran {jobscript_file}\n")
runfile.write(f"echo Successfully ran {jobscript_file}\n")

print (f"Successfully created jobscript {jobscript_file}\n")
print (f"echo Successfully created jobscript {jobscript_file}\n")
sys.exit(0)

if __name__ == "__main__":
Expand Down
12 changes: 8 additions & 4 deletions tools/mksurfdata_esmf/gen_mksurfdata_jobscript_single.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,20 @@ def main ():
runfile.write(f'# Below are default batch directives for {machine} \n')
runfile.write('#PBS -N mksurfdata \n')
runfile.write('#PBS -j oe \n')
runfile.write('#PBS -k eod \n')
runfile.write('#PBS -S /bin/bash \n')
if machine == 'cheyenne':
attribs = {'mpilib': 'default'}
runfile.write('#PBS -l walltime=30:00 \n')
runfile.write(f"#PBS -A {account} \n")
runfile.write('#PBS -q regular \n')
runfile.write(f"#PBS -l select={number_of_nodes}:ncpus=36:mpiprocs={tasks_per_node} \n")
runfile.write(f"#PBS -l select={number_of_nodes}:ncpus=tasks_per_node}:mpiprocs={tasks_per_node} \n")
elif machine == 'casper':
attribs = {'mpilib': 'default'}
runfile.write('#PBS -l walltime=1:00:00 \n')
runfile.write(f"#PBS -A {account} \n")
runfile.write('#PBS -q casper \n')
runfile.write(f'#PBS -l select={number_of_nodes}:ncpus=12:' \
runfile.write(f'#PBS -l select={number_of_nodes}:ncpus={tasks_per_node}:' \
f'mpiprocs={tasks_per_node}:mem=80GB \n')
elif machine == 'izumi':
attribs = {'mpilib': 'mvapich2'}
Expand Down Expand Up @@ -167,6 +169,8 @@ def main ():
'dependent environment including the paths to ' \
'compilers and libraries external to cime such as netcdf')
runfile.write(f'\n. {env_mach_path}\n')
check = f'if [ $? != 0 ]; then echo "Error running env_mach_specific"; exit -4; fi'
runfile.write(f"{check} \n")
runfile.write('# Edit the mpirun command to use the MPI executable ' \
'on your system and the arguments it requires \n')
output = f'{executable} {mksurfdata_path} < {namelist_file}'
Expand All @@ -175,9 +179,9 @@ def main ():

check = f'if [ $? != 0 ]; then echo "Error running resolution {res}"; exit -4; fi'
runfile.write(f"{check} \n")
runfile.write(f"Successfully ran resolution\n")
runfile.write(f"echo Successfully ran resolution\n")

print (f"Successfully created jobscript {jobscript_file}\n")
print (f"echo Successfully created jobscript {jobscript_file}\n")
sys.exit(0)

if __name__ == "__main__":
Expand Down

0 comments on commit 90e9483

Please sign in to comment.