From 1fa4f029d76556c215b181ba1ae39a35e02402f3 Mon Sep 17 00:00:00 2001 From: Oliver Sanders Date: Mon, 28 Oct 2019 11:44:54 +0000 Subject: [PATCH] retry status: fixes --- bin/cylc-submit | 13 ++++++------- cylc/flow/task_events_mgr.py | 25 ++++++++++++++++++++++--- tests/retries/03-upgrade.t | 9 ++++++--- tests/retries/03-upgrade/suite.rc | 4 ++-- 4 files changed, 36 insertions(+), 15 deletions(-) diff --git a/bin/cylc-submit b/bin/cylc-submit index cc61af73713..0caedf238a8 100755 --- a/bin/cylc-submit +++ b/bin/cylc-submit @@ -59,6 +59,7 @@ from cylc.flow.task_proxy import TaskProxy from cylc.flow.task_state import TASK_STATUS_SUBMIT_FAILED from cylc.flow.templatevars import load_template_vars from cylc.flow.terminal import cli_function +from cylc.flow.xtrigger_mgr import XtriggerManager def get_option_parser(): @@ -117,25 +118,23 @@ def main(parser, options, suite, *task_ids): extract_resources( get_suite_srv_dir(suite), ['etc/job.sh']) - pool = SubProcPool() + proc_pool = SubProcPool() owner = get_user() job_pool = JobPool(suite, owner) db_mgr = SuiteDatabaseManager() - bdcst_mgr = BroadcastMgr(db_mgr), + bdcst_mgr = BroadcastMgr(db_mgr) task_job_mgr = TaskJobManager( suite, - pool, + proc_pool, db_mgr, TaskEventsManager( suite, - pool, + proc_pool, db_mgr, bdcst_mgr, XtriggerManager( suite, - owner, - bdcst_mgr, - proc_pool + owner ), job_pool ), diff --git a/cylc/flow/task_events_mgr.py b/cylc/flow/task_events_mgr.py index a562aef2540..d20f9965285 100644 --- a/cylc/flow/task_events_mgr.py +++ b/cylc/flow/task_events_mgr.py @@ -489,9 +489,28 @@ def _process_message_check( itask, itask.state, self.FLAG_RECEIVED_IGNORED, message, event_time, submit_num, itask.submit_num) return False - if itask.state(TASK_STATUS_WAITING): - # Ignore polled messages if waiting - # (this includes "retrying" states) + + if ( + itask.state(TASK_STATUS_WAITING) + and + ( + ( + # task has automatically submit-retried at least once + TimerFlags.SUBMISSION_RETRY in itask.try_timers + and itask.try_timers[ + TimerFlags.SUBMISSION_RETRY].num > 0 + ) + or + ( + # task has automatically execn-retried at least once + TimerFlags.EXECUTION_RETRY in itask.try_timers + and itask.try_timers[ + TimerFlags.EXECUTION_RETRY].num > 0 + ) + ) + + ): + # Ignore polled messages if task is already in retrying statuses LOG.warning( logfmt, itask, itask.state, self.FLAG_POLLED_IGNORED, message, diff --git a/tests/retries/03-upgrade.t b/tests/retries/03-upgrade.t index 847487270bf..3e6c75038e0 100644 --- a/tests/retries/03-upgrade.t +++ b/tests/retries/03-upgrade.t @@ -19,7 +19,7 @@ # Test the upgrade of the old *retrying states to the new xtrigger based # retry mechanism. . "$(dirname "$0")/test_header" -set_test_number 7 +set_test_number 10 install_suite # install the cylc7 restart database @@ -35,8 +35,11 @@ log_scan "${TEST_NAME_BASE}-retries" "${FILE}" 30 0.5 \ '\[b.1\] -submit-num=02' \ '\[b.1\] status=running: (received)failed/EXIT.*job(02)' \ '\[b.1\] -job(02) failed, retrying in PT2S' \ - 'xtrigger satisfied: cylc_retry_b.1' + 'xtrigger satisfied: cylc_retry_b.1' \ + '\[b.1\] -submit-num=03' \ + '\[b.1] status=running: (received)succeeded' \ + '\[c.1] status=running: (received)succeeded' -poll_suite_stopped +purge_suite "${SUITE_NAME}" exit diff --git a/tests/retries/03-upgrade/suite.rc b/tests/retries/03-upgrade/suite.rc index 3da90c2b7d9..33da6a39e2b 100644 --- a/tests/retries/03-upgrade/suite.rc +++ b/tests/retries/03-upgrade/suite.rc @@ -14,6 +14,6 @@ [runtime] [[b]] # fail four times then pass - script = test "$CYLC_TASK_SUBMIT_NUMBER" -ge 5; + script = test "$CYLC_TASK_SUBMIT_NUMBER" -ge 3; [[[job]]] - execution retry delays = 3*PT2S + execution retry delays = 2*PT2S