Skip to content

Commit

Permalink
update to cylc#2781, cylc#2849
Browse files Browse the repository at this point in the history
  • Loading branch information
oliver-sanders committed Nov 9, 2018
1 parent 4d8b623 commit bbba630
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 22 deletions.
8 changes: 3 additions & 5 deletions lib/cylc/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,13 @@ def construct_ssh_cmd(raw_cmd, user=None, host=None, forward_x11=False,
user_at_host += 'localhost'
command.append(user_at_host)

# Pass CYLC_VERSION, CYLC_CONF_PATH and optionally CYLC_UTC through.
# Pass CYLC_VERSION and optionally, CYLC_CONF_PATH & CYLC_UTC through.
command += ['env', quote(r'CYLC_VERSION=%s' % CYLC_VERSION)]

try:
command.append(r'CYLC_CONF_PATH=%s' % quote(
os.environ['CYLC_CONF_PATH']))
command.append(
quote(r'CYLC_CONF_PATH=%s' % os.environ['CYLC_CONF_PATH']))
except KeyError:
pass

if set_UTC and os.getenv('CYLC_UTC') in ["True", "true"]:
command.append(quote(r'CYLC_UTC=True'))
command.append(quote(r'TZ=UTC'))
Expand Down
35 changes: 21 additions & 14 deletions lib/cylc/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,11 @@ def start(self):
self.shutdown(exc)
if self.auto_restart_mode == self.AUTO_STOP_RESTART_NORMAL:
self.suite_auto_restart()
self.close_logs()

except SchedulerError as exc:
self.shutdown(exc)
self.close_logs()
sys.exit(1)

except KeyboardInterrupt as exc:
Expand All @@ -262,6 +264,7 @@ def start(self):
# In case of exceptions in the shutdown method itself.
LOG.exception(exc2)
sys.exit(1)
self.close_logs()

except Exception as exc:
LOG.exception(exc)
Expand All @@ -271,6 +274,7 @@ def start(self):
except Exception as exc2:
# In case of exceptions in the shutdown method itself
LOG.warning(exc2)
self.close_logs()
if cylc.flags.debug:
raise
else:
Expand All @@ -279,8 +283,14 @@ def start(self):
else:
# main loop ends (not used?)
self.shutdown()
self.close_logs()

def close_logs(self):
"""Close the Cylc logger."""
LOG.info("DONE") # main thread exit
self.profiler.stop()
for handler in LOG.handlers:
handler.close()

@staticmethod
def _start_print_blurb():
Expand Down Expand Up @@ -1381,9 +1391,10 @@ def suite_shutdown(self):
# * Ensure the host can be safely taken down once suites
for itask in self.pool.get_tasks():
if (
itask.task_host == 'localhost' and
itask.summary['batch_sys_name'] in ['background', 'at'] and
itask.state.status in TASK_STATUSES_ACTIVE
and itask.summary['batch_sys_name']
and self.task_job_mgr.batch_sys_mgr.is_job_local_to_host(
itask.summary['batch_sys_name'])
):
LOG.info('Waiting for jobs running on localhost to '
'complete before attempting restart')
Expand All @@ -1404,7 +1415,7 @@ def suite_auto_restart(self, max_retries=3):

for attempt_no in range(max_retries):
new_host = HostAppointer(cached=False).appoint_host()
LOG.info('Attempting to restart on "%s"' % new_host)
LOG.info('Attempting to restart on "%s"', new_host)

# proc will start with current env (incl CYLC_HOME etc)
proc = Popen(
Expand All @@ -1415,15 +1426,15 @@ def suite_auto_restart(self, max_retries=3):
if attempt_no < max_retries:
msg += (' will retry in %ss'
% self.INTERVAL_AUTO_RESTART_ERROR)
msg += '. Restart error:\n%s' % proc.communicate()[1]
LOG.critical(msg)
LOG.critical(msg + '. Restart error:\n%s',
proc.communicate()[1])
sleep(self.INTERVAL_AUTO_RESTART_ERROR)
else:
LOG.info('Suite now running on "%s".' % new_host)
LOG.info('Suite now running on "%s".', new_host)
return True
LOG.critical(
'Suite unable to automatically restart after %s tries - '
'manual restart required.' % max_retries)
'manual restart required.', max_retries)
return False

def set_auto_restart(self, restart_delay=None,
Expand Down Expand Up @@ -1475,8 +1486,8 @@ def set_auto_restart(self, restart_delay=None,
# testing purposes.
shutdown_delay = abs(int(restart_delay))
shutdown_time = time() + shutdown_delay
LOG.info('Suite will restart in %ss (at %s)' % (
shutdown_delay, time2str(shutdown_time)))
LOG.info('Suite will restart in %ss (at %s)', shutdown_delay,
time2str(shutdown_time))
self.auto_restart_time = shutdown_time
else:
self.auto_restart_time = time()
Expand Down Expand Up @@ -1567,7 +1578,7 @@ def suite_health_check(self, has_changes):
'host is unable to continue running it.\n'
'When another suite host becomes available '
'the suite can be restarted by:\n'
' $ cylc restart %s' % self.suite)
' $ cylc restart %s', self.suite)
if self.set_auto_restart(mode=mode):
return # skip remaining health checks
elif (
Expand Down Expand Up @@ -1859,10 +1870,6 @@ def shutdown(self, reason=None):
# run shutdown handlers
self.run_event_handlers(self.EVENT_SHUTDOWN, str(reason))

LOG.info("DONE") # main thread exit
for handler in LOG.handlers:
handler.close()

def set_stop_point(self, stop_point_string):
"""Set stop point."""
stop_point = get_point(stop_point_string)
Expand Down
2 changes: 1 addition & 1 deletion tests/restart/34-auto-restart-basic.t
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ LATEST_TASK=$(cylc suite-state "${SUITE_NAME}" -S succeeded \
poll test -f "${SUITE_RUN_DIR}/.service/contact"
FILE=$(cylc cat-log "${SUITE_NAME}" -m p |xargs readlink -f)
log_scan "${TEST_NAME}-restart" "${FILE}" 20 1 \
"Suite starting: server=$(ssh "${CYLC_TEST_HOST}" hostname -f)"
"Suite server: url=https://$(ssh "${CYLC_TEST_HOST}" hostname -f)"
run_ok "${TEST_NAME}-restart-success" cylc suite-state "${SUITE_NAME}" \
--task=$(printf 'task_foo%02d' $(( LATEST_TASK + 3 ))) \
--status='succeeded' --point=1 --interval=1 --max-polls=20
Expand Down
1 change: 1 addition & 0 deletions tests/restart/38-auto-restart-stopping.t
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ fi
set_test_number 2
if ${CYLC_TEST_DEBUG:-false}; then ERR=2; else ERR=1; fi
#-------------------------------------------------------------------------------
# ensure that suites don't get auto stop-restarted if they are already stopping
BASE_GLOBALRC="
[cylc]
health check interval = PT1S
Expand Down
3 changes: 2 additions & 1 deletion tests/restart/41-auto-restart-local-jobs.t
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ grep_fail "$(job-ps-line bar)" "${TEST_NAME}-ps-2.stdout"
poll test -f "${SUITE_RUN_DIR}/.service/contact"
FILE=$(cylc cat-log "${SUITE_NAME}" -m p |xargs readlink -f)
log_scan "${TEST_NAME}-restart" "${FILE}" 20 1 \
"Suite starting: server=$(ssh "${CYLC_TEST_HOST2}" hostname -f)"
"Suite server: url=https://$(ssh "${CYLC_TEST_HOST2}" hostname -f)"
sleep 1
#-------------------------------------------------------------------------------
# auto stop-restart - force mode:
# ensure the suite DOESN'T WAIT for local jobs to complete before stopping
Expand Down
2 changes: 1 addition & 1 deletion tests/restart/42-auto-restart-ping-pong.t
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ for ear in $(seq 1 "${EARS}"); do
# test the restart procedure
FILE=$(cylc cat-log "${SUITE_NAME}" -m p |xargs readlink -f)
log_scan2 "${TEST_NAME_BASE}-${ear}-restart" "${FILE}" 20 1 \
"Suite starting: server=$(ssh "${JOKERS}" hostname -f)"
"Suite server: url=https://$(ssh "${JOKERS}" hostname -f)"
sleep 2
done

Expand Down

0 comments on commit bbba630

Please sign in to comment.