Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to provenance/timing saving #1201

Merged
merged 9 commits into from
Jan 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cime/cime_config/acme/machines/config_batch.xml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
<directives>
<directive> --job-name={{ job_id }}</directive>
<directive> --nodes={{ num_nodes }}</directive>
<directive> --output={{ output_error_path }} </directive>
<directive> --output={{ output_error_path }}.%j </directive>
<directive> --exclusive </directive>
<directive> --time={{ job_wallclock_time }}</directive>
<directive> --partition={{ job_queue }}</directive>
Expand Down
10 changes: 5 additions & 5 deletions cime/cime_config/acme/machines/syslog.cetus
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ set timing = $5
set dir = $6

# wait until output file is nonempty before checking remaining time
# (note that calling script 'touch'es the cesm log file before spawning this script, so that 'wc' does not fail)
# (note that calling script 'touch'es the acme log file before spawning this script, so that 'wc' does not fail)
set outlth = 0
while ($outlth < 1)
sleep 10
set outlth = `wc \-l $run/cesm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
set outlth = `wc \-l $run/acme.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
end

set TimeRemaining = `qstat -lf $jid | grep TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
Expand All @@ -29,16 +29,16 @@ if ("X$rem_secs" == "X") set rem_secs = 0
cat > $run/Walltime.Remaining <<EOF1
$remaining $sample_interval
EOF1
/bin/cp -p $run/cesm.log.$lid $dir/cesm.log.$lid.$remaining
/bin/cp --preserve=timestamps $run/acme.log.$lid $dir/acme.log.$lid.$remaining

while ($remaining > 0)
grep -a -i -e "nstep" -e "model date" $run/*atm.log.$lid | tail > $dir/atm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/cesm.log.$lid | tail > $dir/cesm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/acme.log.$lid | tail > $dir/acme.log.$lid.nstep.$remaining
grep -a -i -e "timestep" -e "model date" $run/*lnd.log.$lid | tail > $dir/lnd.log.$lid.timestep.$remaining
grep -a -i -e "timestep" -e "Step number" -e "model date" $run/*ocn.log.$lid | tail > $dir/ocn.log.$lid.stepnum.$remaining
grep -a -i -e "timestep" -e "istep" -e "model date" $run/*ice.log.$lid | tail > $dir/ice.log.$lid.istep.$remaining
grep -a -i "model date" $run/*cpl.log.$lid | tail > $dir/cpl.log.$lid.modeldata.$remaining
cp -p -u $timing/* $dir
/bin/cp --preserve=timestamps -u $timing/* $dir
chmod a+r $dir/*
sleep $sample_interval
set TimeRemaining = `qstat -lf $jid | grep TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
Expand Down
72 changes: 72 additions & 0 deletions cime/cime_config/acme/machines/syslog.cori-haswell
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/csh -f
# cori-haswell syslog script:
# mach_syslog <sampling interval (in seconds)> <job identifier> <time stamp> <run directory> <timing directory> <output directory>

set sec = 0
set sample_interval = $1
set jid = $2
set lid = $3
set run = $4
set timing = $5
set dir = $6

# wait until job mapping information is output before saving output file
# (note that calling script 'touch'es the acme log file before spawning this script, so that 'wc' does not fail)
set nnodes = `sqs -f $jid | grep NumNodes | sed 's/^ *NumNodes= *\([0-9]*\).*/\1/' `
set outlth = 0
while ($outlth < $nnodes)
sleep 10
set outlth = `wc \-l $run/acme.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
end

set TimeLimit = `sqs -f $jid | grep TimeLimit | sed 's/^ *RunTime=.*TimeLimit=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
set limit_hours = `echo $TimeLimit | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set limit_mins = `echo $TimeLimit | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set limit_secs = `echo $TimeLimit | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$limit_hours" == "X") set limit_hours = 0
if ("X$limit_mins" == "X") set limit_mins = 0
if ("X$limit_secs" == "X") set limit_secs = 0
@ limit = 3600 * $limit_hours + 60 * $limit_mins + $limit_secs

set RunTime = `sqs -f $jid | grep RunTime | sed 's/^ *RunTime=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
set runt_hours = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set runt_mins = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set runt_secs = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$runt_hours" == "X") set runt_hours = 0
if ("X$runt_mins" == "X") set runt_mins = 0
if ("X$runt_secs" == "X") set runt_secs = 0
@ runt = 3600 * $runt_hours + 60 * $runt_mins + $runt_secs

@ remaining = $limit - $runt
cat > $run/Walltime.Remaining <<EOF1
$remaining $sample_interval
EOF1
/bin/cp --preserve=timestamps $run/acme.log.$lid $dir/acme.log.$lid.$remaining

while ($remaining > 0)
grep -a -i -e "nstep" -e "model date" $run/*atm.log.$lid | tail > $dir/atm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/acme.log.$lid | tail > $dir/acme.log.$lid.nstep.$remaining
grep -a -i -e "timestep" -e "model date" $run/*lnd.log.$lid | tail > $dir/lnd.log.$lid.timestep.$remaining
grep -a -i -e "timestep" -e "Step number" -e "model date" $run/*ocn.log.$lid | tail > $dir/ocn.log.$lid.stepnum.$remaining
grep -a -i -e "timestep" -e "istep" -e "model date" $run/*ice.log.$lid | tail > $dir/ice.log.$lid.istep.$remaining
grep -a -i "model date" $run/*cpl.log.$lid | tail > $dir/cpl.log.$lid.modeldata.$remaining
/bin/cp --preserve=timestamps -u $timing/* $dir
# xtnodestat > $dir/xtnodestat.$lid.$remaining
sqs -w -a > $dir/sqsw.$lid.$remaining
chmod a+r $dir/*
sleep $sample_interval
set RunTime = `sqs -f $jid | grep RunTime | sed 's/^ *RunTime=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
set runt_hours = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set runt_mins = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set runt_secs = `echo $RunTime | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$runt_hours" == "X") set runt_hours = 0
if ("X$runt_mins" == "X") set runt_mins = 0
if ("X$runt_secs" == "X") set runt_secs = 0
@ runt = 3600 * $runt_hours + 60 * $runt_mins + $runt_secs
@ remaining = $limit - $runt
cat > $run/Walltime.Remaining << EOF2
$remaining $sample_interval
EOF2

end

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/csh -f
# corip1 syslog script:
# cori-knl syslog script:
# mach_syslog <sampling interval (in seconds)> <job identifier> <time stamp> <run directory> <timing directory> <output directory>

set sec = 0
Expand All @@ -11,12 +11,12 @@ set timing = $5
set dir = $6

# wait until job mapping information is output before saving output file
# (note that calling script 'touch'es the cesm log file before spawning this script, so that 'wc' does not fail)
# (note that calling script 'touch'es the acme log file before spawning this script, so that 'wc' does not fail)
set nnodes = `sqs -f $jid | grep NumNodes | sed 's/^ *NumNodes= *\([0-9]*\).*/\1/' `
set outlth = 0
while ($outlth < $nnodes)
sleep 10
set outlth = `wc \-l $run/cesm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
set outlth = `wc \-l $run/acme.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
end

set TimeLimit = `sqs -f $jid | grep TimeLimit | sed 's/^ *RunTime=.*TimeLimit=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
Expand All @@ -41,16 +41,16 @@ if ("X$runt_secs" == "X") set runt_secs = 0
cat > $run/Walltime.Remaining <<EOF1
$remaining $sample_interval
EOF1
/bin/cp -p $run/cesm.log.$lid $dir/cesm.log.$lid.$remaining
/bin/cp --preserve=timestamps $run/acme.log.$lid $dir/acme.log.$lid.$remaining

while ($remaining > 0)
grep -a -i -e "nstep" -e "model date" $run/*atm.log.$lid | tail > $dir/atm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/cesm.log.$lid | tail > $dir/cesm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/acme.log.$lid | tail > $dir/acme.log.$lid.nstep.$remaining
grep -a -i -e "timestep" -e "model date" $run/*lnd.log.$lid | tail > $dir/lnd.log.$lid.timestep.$remaining
grep -a -i -e "timestep" -e "Step number" -e "model date" $run/*ocn.log.$lid | tail > $dir/ocn.log.$lid.stepnum.$remaining
grep -a -i -e "timestep" -e "istep" -e "model date" $run/*ice.log.$lid | tail > $dir/ice.log.$lid.istep.$remaining
grep -a -i "model date" $run/*cpl.log.$lid | tail > $dir/cpl.log.$lid.modeldata.$remaining
cp -p -u $timing/* $dir
/bin/cp --preserve=timestamps -u $timing/* $dir
# xtnodestat > $dir/xtnodestat.$lid.$remaining
sqs -w -a > $dir/sqsw.$lid.$remaining
chmod a+r $dir/*
Expand Down
10 changes: 5 additions & 5 deletions cime/cime_config/acme/machines/syslog.edison
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ set timing = $5
set dir = $6

# wait until job mapping information is output before saving output file
# (note that calling script 'touch'es the cesm log file before spawning this script, so that 'wc' does not fail)
# (note that calling script 'touch'es the acme log file before spawning this script, so that 'wc' does not fail)
set nnodes = `sqs -f $jid | grep NumNodes | sed 's/^ *NumNodes= *\([0-9]*\).*/\1/' `
set outlth = 0
while ($outlth < $nnodes)
sleep 10
set outlth = `wc \-l $run/cesm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
set outlth = `wc \-l $run/acme.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
end

set TimeLimit = `sqs -f $jid | grep TimeLimit | sed 's/^ *RunTime=.*TimeLimit=\([0-9]*:[0-9]*:[0-9]*\) .*/\1/' `
Expand All @@ -41,16 +41,16 @@ if ("X$runt_secs" == "X") set runt_secs = 0
cat > $run/Walltime.Remaining <<EOF1
$remaining $sample_interval
EOF1
/bin/cp -p $run/cesm.log.$lid $dir/cesm.log.$lid.$remaining
/bin/cp --preserve=timestamps $run/acme.log.$lid $dir/acme.log.$lid.$remaining

while ($remaining > 0)
grep -a -i -e "nstep" -e "model date" $run/*atm.log.$lid | tail > $dir/atm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/cesm.log.$lid | tail > $dir/cesm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/acme.log.$lid | tail > $dir/acme.log.$lid.nstep.$remaining
grep -a -i -e "timestep" -e "model date" $run/*lnd.log.$lid | tail > $dir/lnd.log.$lid.timestep.$remaining
grep -a -i -e "timestep" -e "Step number" -e "model date" $run/*ocn.log.$lid | tail > $dir/ocn.log.$lid.stepnum.$remaining
grep -a -i -e "timestep" -e "istep" -e "model date" $run/*ice.log.$lid | tail > $dir/ice.log.$lid.istep.$remaining
grep -a -i "model date" $run/*cpl.log.$lid | tail > $dir/cpl.log.$lid.modeldata.$remaining
cp -p -u $timing/* $dir
/bin/cp --preserve=timestamps -p -u $timing/* $dir
# xtnodestat > $dir/xtnodestat.$lid.$remaining
sqs -w -a > $dir/sqsw.$lid.$remaining
chmod a+r $dir/*
Expand Down
10 changes: 5 additions & 5 deletions cime/cime_config/acme/machines/syslog.mira
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ set timing = $5
set dir = $6

# wait until output file is nonempty before checking remaining time
# (note that calling script 'touch'es the cesm log file before spawning this script, so that 'wc' does not fail)
# (note that calling script 'touch'es the acme log file before spawning this script, so that 'wc' does not fail)
set outlth = 0
while ($outlth < 1)
sleep 10
set outlth = `wc \-l $run/cesm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
set outlth = `wc \-l $run/acme.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
end

set TimeRemaining = `qstat -lf $jid | grep TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
Expand All @@ -29,16 +29,16 @@ if ("X$rem_secs" == "X") set rem_secs = 0
cat > $run/Walltime.Remaining <<EOF1
$remaining $sample_interval
EOF1
/bin/cp -p $run/cesm.log.$lid $dir/cesm.log.$lid.$remaining
/bin/cp --preserve=timestamps $run/acme.log.$lid $dir/acme.log.$lid.$remaining

while ($remaining > 0)
grep -a -i -e "nstep" -e "model date" $run/*atm.log.$lid | tail > $dir/atm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/cesm.log.$lid | tail > $dir/cesm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/acme.log.$lid | tail > $dir/acme.log.$lid.nstep.$remaining
grep -a -i -e "timestep" -e "model date" $run/*lnd.log.$lid | tail > $dir/lnd.log.$lid.timestep.$remaining
grep -a -i -e "timestep" -e "Step number" -e "model date" $run/*ocn.log.$lid | tail > $dir/ocn.log.$lid.stepnum.$remaining
grep -a -i -e "timestep" -e "istep" -e "model date" $run/*ice.log.$lid | tail > $dir/ice.log.$lid.istep.$remaining
grep -a -i "model date" $run/*cpl.log.$lid | tail > $dir/cpl.log.$lid.modeldata.$remaining
cp -p -u $timing/* $dir
/bin/cp --preserve=timestamps -u $timing/* $dir
chmod a+r $dir/*
sleep $sample_interval
set TimeRemaining = `qstat -lf $jid | grep TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
Expand Down
10 changes: 5 additions & 5 deletions cime/cime_config/acme/machines/syslog.titan
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,27 @@ set timing = $5
set dir = $6

# wait until job mapping information is output before saving output file
# (note that calling script 'touch'es the cesm log file before spawning this script, so that 'wc' does not fail)
# (note that calling script 'touch'es the acme log file before spawning this script, so that 'wc' does not fail)
set nnodes = `qstat -f $jid | grep Resource_List.nodes | sed 's/ *Resource_List.nodes = *\([0-9]*\):ppn=*\([0-9]*\) */\1/' `
set outlth = 0
while ($outlth < $nnodes)
sleep 10
set outlth = `wc \-l $run/cesm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
set outlth = `wc \-l $run/acme.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
end
set remaining = `qstat -f $jid | grep Walltime.Remaining | sed 's/ *Walltime.Remaining = *\([0-9]*\) */\1/' `
cat > $run/Walltime.Remaining <<EOF1
$remaining $sample_interval
EOF1
/bin/cp -p $run/cesm.log.$lid $dir/cesm.log.$lid.$remaining
/bin/cp --preserve=timestamps $run/acme.log.$lid $dir/acme.log.$lid.$remaining

while ($remaining > 0)
grep -a -i -e "nstep" -e "model date" $run/*atm.log.$lid | tail > $dir/atm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/cesm.log.$lid | tail > $dir/cesm.log.$lid.nstep.$remaining
# grep -a -i "nstep" $run/acme.log.$lid | tail > $dir/acme.log.$lid.nstep.$remaining
grep -a -i -e "timestep" -e "model date" $run/*lnd.log.$lid | tail > $dir/lnd.log.$lid.timestep.$remaining
grep -a -i -e "timestep" -e "Step number" -e "model date" $run/*ocn.log.$lid | tail > $dir/ocn.log.$lid.stepnum.$remaining
grep -a -i -e "timestep" -e "istep" -e "model date" $run/*ice.log.$lid | tail > $dir/ice.log.$lid.istep.$remaining
grep -a -i "model date" $run/*cpl.log.$lid | tail > $dir/cpl.log.$lid.modeldata.$remaining
cp -p -u $timing/* $dir
/bin/cp --preserve=timestamps -u $timing/* $dir
xtnodestat > $dir/xtnodestat.$lid.$remaining
showq > $dir/showq.$lid.$remaining
chmod a+r $dir/*
Expand Down
1 change: 1 addition & 0 deletions cime/driver_cpl/cime_config/buildnml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def _main_func():
% (grid, atm_grid, lnd_grid, rof_grid, ocn_grid, wav_grid)

rc, out, err = run_cmd(cmd, from_dir=confdir)
logger.info(out)
expect(rc==0,"Command %s failed rc=%d\nout=%s\nerr=%s"%(cmd,rc,out,err))

# copy drv_in, drv_flds_in, seq_maps.rc and all *modio* files to rundir
Expand Down
9 changes: 8 additions & 1 deletion cime/utils/python/CIME/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,7 @@ def _set_pio_xml(self):
def _create_caseroot_tools(self):
machines_dir = os.path.abspath(self.get_value("MACHDIR"))
toolsdir = os.path.join(self.get_value("CIMEROOT"),"scripts","Tools")
casetools = os.path.join(self._caseroot, "Tools")
# setup executable files in caseroot/
exefiles = (os.path.join(toolsdir, "case.setup"),
os.path.join(toolsdir, "case.build"),
Expand Down Expand Up @@ -721,7 +722,7 @@ def _create_caseroot_tools(self):
toolfiles.append( os.path.join(toolsdir,"mdiag_reduce.pl") )

for toolfile in toolfiles:
destfile = os.path.join(self._caseroot,"Tools",os.path.basename(toolfile))
destfile = os.path.join(casetools, os.path.basename(toolfile))
expect(os.path.isfile(toolfile)," File %s does not exist"%toolfile)
try:
os.symlink(toolfile, destfile)
Expand Down Expand Up @@ -764,6 +765,12 @@ def _create_caseroot_tools(self):
# except Exception as e:
# logger.warning("FAILED to set up infofiles: %s" % str(e))

if get_model() == "acme":
if os.path.exists(os.path.join(machines_dir, "syslog.%s" % machine)):
shutil.copy(os.path.join(machines_dir, "syslog.%s" % machine), os.path.join(casetools, "mach_syslog"))
else:
shutil.copy(os.path.join(machines_dir, "syslog.noop"), os.path.join(casetools, "mach_syslog"))

def _create_caseroot_sourcemods(self):
components = self.get_compset_components()
for component in components:
Expand Down
6 changes: 3 additions & 3 deletions cime/utils/python/CIME/get_timing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from CIME.XML.standard_module_setup import *

import datetime, shutil, re, gzip
import datetime, shutil, re

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -160,7 +160,7 @@ def getTiming(self):
finfilename = os.path.join(self.caseroot, "timing",
"%s_timing_stats.%s" % (cime_model, self.lid))
foutfilename = os.path.join(self.caseroot, "timing",
"%s_timing.%s.%s.gz" % (cime_model, caseid, self.lid))
"%s_timing.%s.%s" % (cime_model, caseid, self.lid))

timingDir = os.path.join(self.caseroot, "timing")
if not os.path.isdir(timingDir):
Expand Down Expand Up @@ -215,7 +215,7 @@ def getTiming(self):
m.offset = int((maxoffset*m.rootpe)/peminmax) + extraoff
cpl.offset = 0
try:
self.fout = gzip.open(foutfilename, "wb")
self.fout = open(foutfilename, "w")
except Exception, e:
logger.critical("Could not open file for writing: %s"
% foutfilename)
Expand Down
Loading