Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Assorted enhancements of remap_restarts.py package (EASE, SLES15, NAS, lake/landice log) #43

Merged
merged 29 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4ebbdc1
add ease grid for remapping
weiyuan-jiang Dec 4, 2023
c0c3456
correct typo
weiyuan-jiang Dec 4, 2023
f7c532d
1)default partiton is '' 2) work with SLES15 3)add log for remap_lake…
weiyuan-jiang Dec 13, 2023
dbd869f
add support of PBS on NAS
weiyuan-jiang Dec 15, 2023
58c9c96
Merge branch 'main' into feature/wjiang/add_ease_grid
gmao-rreichle Dec 18, 2023
c756188
remove an extra wrong line
weiyuan-jiang Dec 19, 2023
16010dd
added site detection by cmake
Dec 20, 2023
1f810a8
change log
Dec 20, 2023
64c66c5
use GEOS_SITE
weiyuan-jiang Dec 20, 2023
8117072
cmake change
weiyuan-jiang Dec 20, 2023
3a814e1
Updated CHANGELOG.md
gmao-rreichle Dec 20, 2023
8e4a9d1
unified messages for common questions in remap_restarts package (rema…
gmao-rreichle Dec 20, 2023
e4aa27b
questions for land
weiyuan-jiang Dec 21, 2023
4ba8e84
correct tests
weiyuan-jiang Dec 21, 2023
f1b2f67
print information when test_remap_resartar is run on NAS
Dec 22, 2023
917e72c
minimal edit of exit message (test_remap_restarts.py)
gmao-rreichle Dec 22, 2023
2d5bbc5
first step to run remap_catch
weiyuan-jiang Dec 22, 2023
06d6350
Merge branch 'feature/wjiang/add_ease_grid' into feature/wjiang/catch…
weiyuan-jiang Dec 22, 2023
8de3c13
change exit logic
biljanaorescanin Dec 27, 2023
a5e9914
more refactoring on 'remap' and 'params'
weiyuan-jiang Jan 3, 2024
47b707c
add "land_only" and remove remap_params.py
weiyuan-jiang Jan 5, 2024
d89f2e1
Merge branch 'feature/wjiang/catch_questions' into feature/wjiang/add…
weiyuan-jiang Jan 5, 2024
0daef39
remove deleted mod
weiyuan-jiang Jan 5, 2024
1bd5dc1
add questions to remap to CS grid for land_only option
weiyuan-jiang Jan 8, 2024
caab8bc
minor changes on message
weiyuan-jiang Jan 10, 2024
961c8b2
edited and unified more messages for common questions in remap_restar…
gmao-rreichle Jan 11, 2024
06aeabd
clarified "land_only" option for remap_restarts.py package (remap_cat…
gmao-rreichle Jan 11, 2024
7f2c916
updated CHANGELOG.md; minimal edits of messages in remap_restarts.py …
gmao-rreichle Jan 11, 2024
8757894
correct label and constraint fvcore name
weiyuan-jiang Jan 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 45 additions & 39 deletions pre/remap_restart/remap_catchANDcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def remap(self):
surflay = config['output']['surface']['surflay']
in_tilefile = config['input']['surface']['catch_tilefile']

job = 'SLURM'
weiyuan-jiang marked this conversation as resolved.
Show resolved Hide resolved
if "gmao_SIteam/ModelData" in out_bc_base: job='PBS'

if not in_tilefile :
agrid = config['input']['shared']['agrid']
ogrid = config['input']['shared']['ogrid']
Expand Down Expand Up @@ -106,21 +109,25 @@ def remap(self):
else:
NPE = 160

QOS = "#SBATCH --qos="+config['slurm']['qos']
TIME ="#SBATCH --time=1:00:00"
if NPE >= 160:
assert config['slurm']['qos'] != 'debug', "qos should be allnccs"
TIME = "#SBATCH --time=12:00:00"
PARTITION =''
partition = config['slurm']['partition']
if (partition != ''):
PARTITION = "#SBATCH --partition=" + partition

CONSTRAINT = '#SBATCH --constraint="[cas|sky]"'
if BUILT_ON_SLES15:
CONSTRAINT = '#SBATCH --constraint=mil'

account = config['slurm']['account']
QOS = config['slurm_pbs']['qos']
TIME = "1:00:00"
if QOS != "debug": TIME="12:00:00"

NNODE = ''
if job == 'SLURM':
partition = config['slurm_pbs']['partition']
if (partition != ''):
PARTITION = "#SBATCH --partition=" + partition

CONSTRAINT = '"[cas|sky]"'
if BUILT_ON_SLES15:
CONSTRAINT = 'mil'
elif job == "PBS":
CONSTRAINT = 'cas_ait'
NNODE = (NPE-1)//40 + 1

account = config['slurm_pbs']['account']
# even if the (MERRA-2) input restarts are binary, the output restarts will always be nc4 (remap_bin2nc.py)
label = get_label(config)

Expand Down Expand Up @@ -149,19 +156,10 @@ def remap(self):
in_rstfile = dest

log_name = out_dir+'/'+'mk_catchANDcn_log'
mk_catch_j_template = """#!/bin/csh -f
#SBATCH --account={account}
#SBATCH --ntasks={NPE}
#SBATCH --job-name=mk_catchANDcn
#SBATCH --output={log_name}
{TIME}
{QOS}
{CONSTRAINT}
{PARTITION}
#

job_name = "mk_catchANDcn"
mk_catch_j_template = job_directive[job]+ \
"""
source {Bin}/g5_modules

limit stacksize unlimited

set esma_mpirun_X = ( {Bin}/esma_mpirun -np {NPE} )
Expand All @@ -175,7 +173,8 @@ def remap(self):

"""
catch1script = mk_catch_j_template.format(Bin = bindir, account = account, out_bcs = out_bc_landdir, \
model = model, out_dir = out_dir, surflay = surflay, log_name = log_name, NPE = NPE, \
model = model, out_dir = out_dir, surflay = surflay, log_name = log_name, job_name = job_name, \
NPE = NPE, NNODE=NNODE, \
in_wemin = in_wemin, out_wemin = out_wemin, out_tilefile = out_tilefile, in_tilefile = in_tilefile, \
in_rstfile = in_rstfile, out_rstfile = out_rstfile, time = yyyymmddhh_, TIME = TIME, QOS=QOS, CONSTRAINT=CONSTRAINT, PARTITION=PARTITION )

Expand All @@ -184,27 +183,34 @@ def remap(self):
catch_scrpt = open(script_name,'wt')
catch_scrpt.write(catch1script)
catch_scrpt.close()


interactive = None
if job == "SLURM": interactive = os.getenv('SLURM_JOB_ID', default = None)
if job == 'PBS': interactive = os.getenv('PBS_JOBID', default = None)

interactive = os.getenv('SLURM_JOB_ID', default = None)
if ( interactive ) :
print('interactive mode\n')
ntasks = os.getenv('SLURM_NTASKS', default = None)
if ( not ntasks):
nnodes = int(os.getenv('SLURM_NNODES', default = '1'))
ncpus = int(os.getenv('SLURM_CPUS_ON_NODE', default = '40'))
ntasks = nnodes * ncpus
ntasks = int(ntasks)
if job == "SLURM":
ntasks = os.getenv('SLURM_NTASKS', default = None)
if ( not ntasks):
nnodes = int(os.getenv('SLURM_NNODES', default = '1'))
ncpus = int(os.getenv('SLURM_CPUS_ON_NODE', default = '40'))
ntasks = nnodes * ncpus
ntasks = int(ntasks)

if (ntasks < NPE):
print("\nYou should have at least {NPE} cores. Now you only have {ntasks} cores ".format(NPE=NPE, ntasks=ntasks))
if (ntasks < NPE):
print("\nYou should have at least {NPE} cores. Now you only have {ntasks} cores ".format(NPE=NPE, ntasks=ntasks))

subprocess.call(['chmod', '755', script_name])
print(script_name+ ' 1>' + log_name + ' 2>&1')
os.system(script_name + ' 1>' + log_name+ ' 2>&1')

elif job == "SLURM" :
print('sbatch -W '+ script_name +'\n')
subprocess.call(['sbatch', '-W', script_name])
else:
print("sbatch -W " + script_name +"\n")
subprocess.call(['sbatch','-W', script_name])
print('qsub -W block=true '+ script_name +'\n')
subprocess.call(['qsub', '-W','block=true', script_name])

print( "cd " + cwdir)
os.chdir(cwdir)
Expand Down
12 changes: 6 additions & 6 deletions pre/remap_restart/remap_command_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ def parse_args(program_description):
p_command.add_argument('-out_bc_base',default="", help="Boundary conditions base dir (w/o bc_version and resolution info) for new restarts")
p_command.add_argument('-zoom', help= "Zoom parameter (search radius) for input surface restarts")

p_command.add_argument('-qos', default="debug", help="SLURM quality-of-service", choices=['debug', 'allnccs'])
p_command.add_argument('-qos', default="debug", help="slurm_pbs quality-of-service", choices=['debug', 'allnccs', 'normal'])
account = get_account()
p_command.add_argument('-account', default=account, help="SLURM account")
p_command.add_argument('-partition', default='', help="SLURM partition")
p_command.add_argument('-account', default=account, help="slurm_pbs account")
p_command.add_argument('-partition', default='', help="slurm_pbs partition")
p_command.add_argument('-rs', default='3', help="Flag indicating which restarts to regrid: 1 (upper air); 2 (surface); 3 (both)", choices=['1','2','3'])

# Parse using parse_known_args so we can pass the rest to the remap scripts
Expand Down Expand Up @@ -146,9 +146,9 @@ def get_answers_from_command_line(cml):
else:
answers["output:surface:wemin"] = wemin_default(answers['output:shared:bc_version'])

answers["slurm:account"] = cml.account
answers["slurm:qos"] = cml.qos
answers["slurm:partition"] = cml.partition
answers["slurm_pbs:account"] = cml.account
answers["slurm_pbs:qos"] = cml.qos
answers["slurm_pbs:partition"] = cml.partition

return answers

Expand Down
24 changes: 12 additions & 12 deletions pre/remap_restart/remap_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@

class remap_params(object):
def __init__(self, config_from_question):
self.common_in = config_from_question['input']['shared']
self.common_out = config_from_question['output']['shared']
self.upper_out = config_from_question['output']['air']
self.slurm_options = config_from_question['slurm']
self.surf_in = config_from_question['input']['surface']
self.surf_out = config_from_question['output']['surface']
self.ana_out = config_from_question['output']['analysis']
self.common_in = config_from_question['input']['shared']
self.common_out = config_from_question['output']['shared']
self.upper_out = config_from_question['output']['air']
self.slurm_pbs_options = config_from_question['slurm_pbs']
self.surf_in = config_from_question['input']['surface']
self.surf_out = config_from_question['output']['surface']
self.ana_out = config_from_question['output']['analysis']

# load input yaml
yaml = ruamel.yaml.YAML()
Expand Down Expand Up @@ -65,7 +65,7 @@ def __init__(self, config_from_question):
config_tpl = self.params_for_air(config_tpl)
config_tpl = self.params_for_surface(config_tpl)
config_tpl = self.params_for_analysis(config_tpl)
config_tpl = self.options_for_slurm(config_tpl)
config_tpl = self.options_for_slurm_pbs(config_tpl)

self.config = config_tpl

Expand All @@ -77,10 +77,10 @@ def params_for_air(self, config_tpl):

return config_tpl

def options_for_slurm(self, config_tpl):
config_tpl['slurm']['account'] = self.slurm_options['account']
config_tpl['slurm']['qos'] = self.slurm_options['qos']
config_tpl['slurm']['partition'] = self.slurm_options['partition']
def options_for_slurm_pbs(self, config_tpl):
config_tpl['slurm_pbs']['account'] = self.slurm_pbs_options['account']
config_tpl['slurm_pbs']['qos'] = self.slurm_pbs_options['qos']
config_tpl['slurm_pbs']['partition'] = self.slurm_pbs_options['partition']
return config_tpl

def params_for_surface(self, config_tpl):
Expand Down
2 changes: 1 addition & 1 deletion pre/remap_restart/remap_params.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ output:
bkg: true
aqua: False
lcv: false
slurm:
slurm_pbs:
account:
qos:
partition: ''
16 changes: 8 additions & 8 deletions pre/remap_restart/remap_questions.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def echo_bcs(x,opt):
return False

def default_partition(x):
if x['slurm:qos'] == 'debug':
x['slurm:partition'] = 'compute'
if x['slurm_pbs:qos'] == 'debug':
x['slurm_pbs:partition'] = 'compute'
return False
return True

Expand Down Expand Up @@ -410,21 +410,21 @@ def ask_questions():

{
"type": "text",
"name": "slurm:qos",
"message": "SLURM quality-of-service (qos)? (If on NCCS and atm resolution is c1440 or higher, enter allnccs.) ",
"name": "slurm_pbs:qos",
"message": "slurm or pbs quality-of-service (qos)? (If resolution is c1440 or higher, enter allnccs on NCCS or normal on NAS.) ",
"default": "debug",
},

{
"type": "text",
"name": "slurm:account",
"message": "SLURM account?",
"name": "slurm_pbs:account",
"message": "slurm_pbs account?",
"default": get_account(),
},
{
"type": "text",
"name": "slurm:partition",
"message": "Enter the SLURM partition only if you want particular partiton, otherwise keep empty as default: ",
"name": "slurm_pbs:partition",
"message": "Enter the slurm or pbs partition only if you want particular partiton, otherwise keep empty as default: ",
"default": '',
},
]
Expand Down
84 changes: 45 additions & 39 deletions pre/remap_restart/remap_upper.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@ def remap(self):
stretch = config['input']['shared']['stretch']
topo_bcsdir = get_topodir(in_bc_base, in_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch)

job = 'SLURM'
if "gmao_SIteam/ModelData" in in_bc_base: job='PBS'

topoin = glob.glob(topo_bcsdir+'/topo_DYN_ave*.data')[0]
# link topo file

Expand Down Expand Up @@ -141,21 +144,24 @@ def remap(self):
elif (imout>=2880):
NPE = 5400; nwrit = 6

QOS = "#SBATCH --qos="+config['slurm']['qos']
TIME ="#SBATCH --time=1:00:00"
if NPE > 532:
assert config['slurm']['qos'] != 'debug', "qos should be allnccs"
TIME = "#SBATCH --time=12:00:00"
PARTITION =''
partition = config['slurm']['partition']
if (partition != ''):
PARTITION = "#SBATCH --partition=" + partition

CONSTRAINT = '#SBATCH --constraint="[cas|sky]"'
if BUILT_ON_SLES15:
CONSTRAINT = '#SBATCH --constraint=mil'

log_name = out_dir+'/remap_upper_log'
QOS = config['slurm_pbs']['qos']
TIME = "1:00:00"
if NPE > 532:
assert config['slurm_pbs']['qos'] != 'debug', "qos should be allnccs or normal on NAS"
TIME = "12:00:00"
NNODE = ''
if job == 'SLURM':
partition = config['slurm_pbs']['partition']
if (partition != ''):
PARTITION = "#SBATCH --partition=" + partition

CONSTRAINT = '"[cas|sky]"'
if BUILT_ON_SLES15:
CONSTRAINT = 'mil'
else:
CONSTRAINT = 'cas_ait'
NNODE = (NPE-1)//40 + 1

# We need to create an input.nml file which is different if we are running stretched grid
# If we are running stretched grid, we need to pass in the target lon+lat and stretch factor
Expand Down Expand Up @@ -193,19 +199,12 @@ def remap(self):
with open('input.nml', 'w') as f:
f.write(nml_file)

remap_template="""#!/bin/csh -xf
#SBATCH --account={account}
#SBATCH --ntasks={NPE}
#SBATCH --job-name=remap_upper
#SBATCH --output={log_name}
{TIME}
{QOS}
{CONSTRAINT}
{PARTITION}
unlimit
remap_template = job_directive[job] + \
"""
source {Bin}/g5_modules
limit stacksize unlimited

cd {out_dir}/upper_data
source {Bin}/g5_modules
/bin/touch input.nml

# The MERRA fvcore_internal_restarts don't include W or DZ, but we can add them by setting
Expand Down Expand Up @@ -256,14 +255,15 @@ def remap(self):
-do_hydro {hydrostatic} $ioflag $dmflag -nwriter {nwrit} {stretch_str}

"""
account = config['slurm']['account']
account = config['slurm_pbs']['account']
drymassFLG = config['input']['air']['drymass']
hydrostatic = config['input']['air']['hydrostatic']
nlevel = config['output']['air']['nlevel']

log_name = out_dir+'/remap_upper_log'
job_name = 'remap_upper'
remap_upper_script = remap_template.format(Bin=bindir, account = account, \
out_dir = out_dir, log_name = log_name, drymassFLG = drymassFLG, \
imout = imout, nwrit = nwrit, NPE = NPE, \
out_dir = out_dir, log_name = log_name, job_name= job_name, drymassFLG = drymassFLG, \
imout = imout, nwrit = nwrit, NPE = NPE, NNODE = NNODE, \
QOS = QOS, TIME = TIME, CONSTRAINT = CONSTRAINT, PARTITION = PARTITION, nlevel = nlevel, hydrostatic = hydrostatic,
stretch_str = stretch_str)

Expand All @@ -273,25 +273,31 @@ def remap(self):
upper.write(remap_upper_script)
upper.close()

interactive = os.getenv('SLURM_JOB_ID', default = None)
interactive = None
if job == "SLURM": interactive = os.getenv('SLURM_JOB_ID', default = None)
if job == 'PBS': interactive = os.getenv('PBS_JOBID', default = None)

if (interactive) :
print('interactive mode\n')
ntasks = os.getenv('SLURM_NTASKS', default = None)
if ( not ntasks):
nnodes = int(os.getenv('SLURM_NNODES', default = '1'))
ncpus = int(os.getenv('SLURM_CPUS_ON_NODE', default = '28'))
ntasks = nnodes * ncpus
ntasks = int(ntasks)
if (ntasks < NPE ):
print("\nYou should have at least {NPE} cores. Now you only have {ntasks} cores ".format(NPE=NPE, ntasks=ntasks))
if job == 'SLURM':
ntasks = os.getenv('SLURM_NTASKS', default = None)
if ( not ntasks):
nnodes = int(os.getenv('SLURM_NNODES', default = '1'))
ncpus = int(os.getenv('SLURM_CPUS_ON_NODE', default = '28'))
ntasks = nnodes * ncpus
ntasks = int(ntasks)
if (ntasks < NPE ):
print("\nYou should have at least {NPE} cores. Now you only have {ntasks} cores ".format(NPE=NPE, ntasks=ntasks))

subprocess.call(['chmod', '755', script_name])
print(script_name+ ' 1>' + log_name + ' 2>&1')
os.system(script_name + ' 1>' + log_name+ ' 2>&1')
else :
elif job == "SLURM" :
print('sbatch -W '+ script_name +'\n')
subprocess.call(['sbatch', '-W', script_name])
else:
print('qsub -W block=true '+ script_name +'\n')
subprocess.call(['qsub', '-W','block=true', script_name])

#
# post process
Expand Down
Loading
Loading