diff --git a/cylc/flow/scheduler.py b/cylc/flow/scheduler.py
index 27d5d89dab5..d9d9c2bec9f 100644
--- a/cylc/flow/scheduler.py
+++ b/cylc/flow/scheduler.py
@@ -94,7 +94,6 @@
from cylc.flow.task_state import (
TASK_STATUSES_ACTIVE,
TASK_STATUSES_NEVER_ACTIVE,
- TASK_STATUSES_SUCCESS,
TASK_STATUS_FAILED)
from cylc.flow.templatevars import load_template_vars
from cylc.flow import __version__ as CYLC_VERSION
@@ -1524,7 +1523,7 @@ async def update_data_structure(self):
updated_nodes = set(updated_tasks).union(
self.pool.get_pool_change_tasks())
if (
- has_updated or
+ updated_nodes or
self.data_store_mgr.updates_pending or
self.job_pool.updates_pending
):
@@ -1586,10 +1585,8 @@ def check_suite_stalled(self):
return
self.is_stalled = self.pool.is_stalled()
if self.is_stalled:
- message = 'suite stalled'
- LOG.warning(message)
- self.run_event_handlers(self.EVENT_STALLED, message)
- self.pool.report_stalled_task_deps()
+ self.run_event_handlers(self.EVENT_STALLED, 'suite stalled')
+ self.pool.report_unmet_deps()
if self._get_events_conf('abort on stalled'):
raise SchedulerError('Abort on suite stalled is set')
# Start suite timeout timer
@@ -1671,6 +1668,9 @@ async def shutdown(self, reason):
self.proc_pool.process()
if self.pool is not None:
+ if not self.is_stalled:
+ # (else already reported)
+ self.pool.report_unmet_deps()
self.pool.warn_stop_orphans()
try:
self.suite_db_mgr.put_task_event_timers(self.task_events_mgr)
@@ -1746,30 +1746,18 @@ def stop_clock_done(self):
return False
def check_auto_shutdown(self):
- """Check if we should do a normal automatic shutdown."""
+ """Check if we should do an automatic shutdown: main pool empty."""
if not self.can_auto_stop:
return False
- can_shutdown = True
- for itask in self.pool.get_all_tasks():
- if self.pool.stop_point is None:
- # Don't if any unsucceeded task exists.
- if not itask.state(*TASK_STATUSES_SUCCESS):
- can_shutdown = False
- break
- elif (
- itask.point <= self.pool.stop_point
- and not itask.state(*TASK_STATUSES_SUCCESS)
- ):
- # Don't if any unsucceeded task exists < stop point...
- if itask.identity not in self.pool.stuck_future_tasks:
- # ...unless it has a future trigger extending > stop point.
- can_shutdown = False
- break
- if can_shutdown and self.pool.stop_point:
+ self.pool.release_runahead_tasks()
+ if self.pool.get_tasks():
+ return False
+ # can shut down
+ if self.pool.stop_point:
self.options.stopcp = None
self.pool.stop_point = None
self.suite_db_mgr.delete_suite_stop_cycle_point()
- return can_shutdown
+ return True
def hold_suite(self, point=None):
"""Hold all tasks in suite."""
diff --git a/cylc/flow/task_pool.py b/cylc/flow/task_pool.py
index 62a0c1947f0..5656dbca7e8 100644
--- a/cylc/flow/task_pool.py
+++ b/cylc/flow/task_pool.py
@@ -40,7 +40,7 @@
from cylc.flow.task_proxy import TaskProxy
from cylc.flow.task_state import (
TASK_STATUSES_ACTIVE,
- TASK_STATUSES_NOT_STALLED,
+ TASK_STATUSES_FAILURE,
TASK_STATUS_WAITING,
TASK_STATUS_EXPIRED,
TASK_STATUS_QUEUED,
@@ -176,7 +176,6 @@ def __init__(self, config, suite_db_mgr, task_events_mgr, job_pool):
self.is_held = False
self.hold_point = None
- self.stuck_future_tasks = []
self.abs_outputs_done = set()
self.stop_task_id = None
@@ -241,7 +240,11 @@ def add_to_runahead_pool(self, itask, is_new=True):
return itask
def release_runahead_tasks(self):
- """Restrict the number of active cycle points.
+ """Release tasks from the runahead pool to the main pool.
+
+ This serves to:
+ - restrict the number of active cycle points
+ - keep partially-satisfied waiting tasks out of the n=0 active pool
Compute runahead limit, and release tasks to the main pool if they are
below that point (and <= the stop point, if there is a stop point).
@@ -347,6 +350,9 @@ def release_runahead_tasks(self):
for point, itask_id_map in self.runahead_pool.copy().items():
if point <= latest_allowed_point:
for itask in itask_id_map.copy().values():
+ if itask.is_task_prereqs_not_done():
+ # Only release if all prerequisites are satisfied.
+ continue
self.release_runahead_task(itask)
released = True
return released
@@ -728,15 +734,6 @@ def get_ready_tasks(self):
return ready_tasks
- def task_has_future_trigger_overrun(self, itask):
- """Check for future triggers extending beyond the final cycle."""
- if not self.stop_point:
- return False
- for pct in itask.state.prerequisites_get_target_points():
- if pct > self.stop_point:
- return True
- return False
-
def get_min_point(self):
"""Return the minimum cycle point currently in the pool."""
cycles = list(self.pool)
@@ -914,16 +911,31 @@ def can_stop(self, stop_mode):
def warn_stop_orphans(self):
"""Log (warning) orphaned tasks on suite stop."""
+ orphans = []
+ orphans_kill_failed = []
for itask in self.get_tasks():
- if (
- itask.state(*TASK_STATUSES_ACTIVE)
- and itask.state.kill_failed
- ):
- LOG.warning("%s: orphaned task (%s, kill failed)" % (
- itask.identity, itask.state.status))
- elif itask.state(*TASK_STATUSES_ACTIVE):
- LOG.warning("%s: orphaned task (%s)" % (
- itask.identity, itask.state.status))
+ if itask.state(*TASK_STATUSES_ACTIVE):
+ if itask.state.kill_failed:
+ orphans_kill_failed.append(itask)
+ else:
+ orphans.append(itask)
+ if orphans_kill_failed:
+ LOG.warning(
+ "Orphaned task jobs (kill failed):\n"
+ + "\n".join(
+ f"* {itask.identity} ({itask.state.status})"
+ for itask in orphans_kill_failed
+ )
+ )
+ if orphans:
+ LOG.warning(
+ "Orphaned task jobs:\n"
+ + "\n".join(
+ f"* {itask.identity} ({itask.state.status})"
+ for itask in orphans
+ )
+ )
+
for key1, point, name, submit_num in self.task_events_mgr.event_timers:
LOG.warning("%s/%s/%s: incomplete task event handler %s" % (
point, name, submit_num, key1))
@@ -931,60 +943,45 @@ def warn_stop_orphans(self):
def is_stalled(self):
"""Return True if the suite is stalled.
- A suite is stalled when:
- * It is not held.
- * It has no active tasks.
- * It has waiting tasks with unmet prerequisites
- (ignoring clock triggers).
+ A suite is stalled if it is not held and the active pool contains only
+ unhandled failed tasks.
"""
if self.is_held:
return False
- can_be_stalled = False
+ unhandled_failed = []
for itask in self.get_tasks():
- if (
- self.stop_point
- and itask.point > self.stop_point
- or itask.state(
- TASK_STATUS_SUCCEEDED,
- TASK_STATUS_EXPIRED,
- )
- ):
- # Ignore: Task beyond stop point.
- # Ignore: Succeeded and expired tasks.
- continue
- if itask.state(*TASK_STATUSES_NOT_STALLED):
- # Pool contains active tasks (or held active tasks)
- # Return "not stalled" immediately.
- return False
- if (
- itask.state(TASK_STATUS_WAITING)
- and itask.state.prerequisites_all_satisfied()
- ):
- # Waiting tasks with all prerequisites satisfied,
- # probably waiting for clock trigger only.
- # This task can be considered active.
- # Return "not stalled" immediately.
+ if itask.state(*TASK_STATUSES_FAILURE):
+ unhandled_failed.append(itask)
+ else:
return False
- # We should be left with (submission) failed tasks and
- # waiting tasks with unsatisfied prerequisites.
- can_be_stalled = True
- return can_be_stalled
+ if unhandled_failed:
+ LOG.warning(
+ "Suite stalled with unhandled failed tasks:\n"
+ + "\n".join(
+ f"* {itask.identity} ({itask.state.status})"
+ for itask in unhandled_failed
+ )
+ )
+ return True
+ else:
+ return False
- def report_stalled_task_deps(self):
- """Log unmet dependencies on stalled."""
+ def report_unmet_deps(self):
+ """Log unmet dependencies on stall or shutdown."""
prereqs_map = {}
- for itask in self.get_tasks():
- if (
- itask.state(TASK_STATUS_WAITING)
- and itask.state.prerequisites_are_not_all_satisfied()
- ):
- prereqs_map[itask.identity] = []
- for prereq_str, is_met in itask.state.prerequisites_dump():
- if not is_met:
- prereqs_map[itask.identity].append(prereq_str)
+ # Partially satisfied tasks are hidden in the runahead pool.
+ for itask in self.get_rh_tasks():
+ prereqs_map[itask.identity] = []
+ for prereq_str, is_met in itask.state.prerequisites_dump():
+ if not is_met:
+ prereqs_map[itask.identity].append(prereq_str)
# prune tree to ignore items that are elsewhere in it
for id_, prereqs in list(prereqs_map.copy().items()):
+ if not prereqs:
+ # (tasks in runahead pool that are not unsatisfied)
+ del prereqs_map[id_]
+ continue
for prereq in prereqs:
prereq_strs = prereq.split()
if prereq_strs[0] == "LABEL:":
@@ -998,10 +995,16 @@ def report_stalled_task_deps(self):
del prereqs_map[id_]
break
- for id_, prereqs in prereqs_map.items():
- LOG.warning("Unmet prerequisites for %s:" % id_)
- for prereq in prereqs:
- LOG.warning(" * %s" % prereq)
+ if prereqs_map:
+ LOG.warning(
+ "Some partially satisfied prerequisites left over:\n"
+ + "\n".join(
+ f"{id_} is waiting on:"
+ + "\n".join(
+ f"\n* {prereq}" for prereq in prereqs
+ ) for id_, prereqs in prereqs_map.items()
+ )
+ )
def set_hold_point(self, point):
"""Set the point after which tasks must be held."""
@@ -1217,13 +1220,16 @@ def spawn_task(self, name, point, flow_label=None, reflow=True,
"[%s] -holding (beyond suite hold point) %s",
itask, self.hold_point)
itask.state.reset(is_held=True)
- elif (self.stop_point and itask.point <= self.stop_point and
- self.task_has_future_trigger_overrun(itask)):
- # Record tasks waiting on a future trigger beyond the stop point.
- # (We ignore these waiting tasks when considering shutdown).
- LOG.info("[%s] -holding (future trigger beyond stop point)", itask)
- self.stuck_future_tasks.append(itask.identity)
- elif (self.is_held
+ if self.stop_point and itask.point <= self.stop_point:
+ future_trigger_overrun = False
+ for pct in itask.state.prerequisites_get_target_points():
+ if pct > self.stop_point:
+ future_trigger_overrun = True
+ break
+ if future_trigger_overrun:
+ LOG.warning("[%s] -won't run: depends on a "
+ "task beyond the stop point", itask)
+ if (self.is_held
and itask.state(TASK_STATUS_WAITING, is_held=False)):
# Hold newly-spawned tasks in a held suite (e.g. due to manual
# triggering of a held task).
diff --git a/cylc/flow/task_state.py b/cylc/flow/task_state.py
index 3cf5ee0ee18..c07585c22ae 100644
--- a/cylc/flow/task_state.py
+++ b/cylc/flow/task_state.py
@@ -147,21 +147,12 @@
TASK_STATUS_READY,
])
-# Task statuses that are to be externally active
-TASK_STATUSES_TO_BE_ACTIVE = set([
- TASK_STATUS_QUEUED,
- TASK_STATUS_READY,
-])
-
# Task statuses that are externally active
TASK_STATUSES_ACTIVE = set([
TASK_STATUS_SUBMITTED,
TASK_STATUS_RUNNING,
])
-# Task statuses in which tasks cannot be considered stalled
-TASK_STATUSES_NOT_STALLED = TASK_STATUSES_ACTIVE | TASK_STATUSES_TO_BE_ACTIVE
-
# Task statuses that can be manually triggered.
TASK_STATUSES_TRIGGERABLE = set([
TASK_STATUS_WAITING,
diff --git a/tests/flakyfunctional/cylc-take-checkpoints/00-basic.t b/tests/flakyfunctional/cylc-take-checkpoints/00-basic.t
deleted file mode 100755
index abf73eec632..00000000000
--- a/tests/flakyfunctional/cylc-take-checkpoints/00-basic.t
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env bash
-# THIS FILE IS PART OF THE CYLC SUITE ENGINE.
-# Copyright (C) NIWA & British Crown (Met Office) & Contributors.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see .
-#-------------------------------------------------------------------------------
-# Test checkpoint basic
-. "$(dirname "$0")/test_header"
-
-date-remove() {
- sed 's/[0-9]\+\(-[0-9]\{2\}\)\{2\}T[0-9]\{2\}\(:[0-9]\{2\}\)\{2\}Z/DATE/'
-}
-
-set_test_number 4
-
-install_suite "${TEST_NAME_BASE}" "${TEST_NAME_BASE}"
-
-run_ok "${TEST_NAME_BASE}-validate" cylc validate "${SUITE_NAME}"
-
-# Suite reloads+inserts new task to mess up prerequisites - suite should stall
-suite_run_ok "${TEST_NAME_BASE}-run" \
- cylc run --debug --no-detach --reference-test "${SUITE_NAME}"
-cylc ls-checkpoints "${SUITE_NAME}" | date-remove >'cylc-ls-checkpoints.out'
-contains_ok 'cylc-ls-checkpoints.out' <<'__OUT__'
-#######################################################################
-# CHECKPOINT ID (ID|TIME|EVENT)
-1|DATE|snappy
-0|DATE|latest
-__OUT__
-
-cylc ls-checkpoints "${SUITE_NAME}" 1 | date-remove >'cylc-ls-checkpoints-1.out'
-contains_ok 'cylc-ls-checkpoints-1.out' <<'__OUT__'
-#######################################################################
-# CHECKPOINT ID (ID|TIME|EVENT)
-1|DATE|snappy
-
-# SUITE PARAMS (KEY|VALUE)
-
-# TASK POOL (CYCLE|NAME|STATUS|IS_HELD)
-2017|t1|running|0
-__OUT__
-
-purge_suite "${SUITE_NAME}"
-exit
diff --git a/tests/flakyfunctional/cylc-take-checkpoints/00-basic/flow.cylc b/tests/flakyfunctional/cylc-take-checkpoints/00-basic/flow.cylc
deleted file mode 100644
index e57721a0e68..00000000000
--- a/tests/flakyfunctional/cylc-take-checkpoints/00-basic/flow.cylc
+++ /dev/null
@@ -1,26 +0,0 @@
-#!jinja2
-[cylc]
- UTC mode=True
- cycle point format = %Y
- [[events]]
- abort on stalled = True
- abort on inactivity = True
- inactivity = P1M
-[scheduling]
- initial cycle point = 2016
- final cycle point = 2020
- [[graph]]
- P1Y=t1[-P1Y] => t1
-[runtime]
- [[t1]]
- script = """
-if [[ "${CYLC_TASK_CYCLE_POINT}" == '2017' ]]; then
- wait "${CYLC_TASK_MESSAGE_STARTED_PID}" 2>/dev/null || true
- sleep 2 # state of current task should be recorded after 2 seconds
- cylc checkpoint "${CYLC_SUITE_NAME}" 'snappy'
- cylc__job__poll_grep_suite_log -F "Command succeeded: take_checkpoints(snappy)"
- sleep 2 # checkpoint should be recorded after 2 seconds
-fi
-"""
- [[[job]]]
- execution time limit = PT50S
diff --git a/tests/flakyfunctional/cylc-take-checkpoints/00-basic/reference.log b/tests/flakyfunctional/cylc-take-checkpoints/00-basic/reference.log
deleted file mode 100644
index e49908e5492..00000000000
--- a/tests/flakyfunctional/cylc-take-checkpoints/00-basic/reference.log
+++ /dev/null
@@ -1,7 +0,0 @@
-2016-10-10T14:01:04Z INFO - Initial point: 2016
-2016-10-10T14:01:04Z INFO - Final point: 2020
-2016-10-10T14:01:05Z INFO - [t1.2016] -triggered off []
-2016-10-10T14:01:05Z INFO - [t1.2017] -triggered off ['t1.2016']
-2016-10-10T14:01:05Z INFO - [t1.2018] -triggered off ['t1.2017']
-2016-10-10T14:01:08Z INFO - [t1.2019] -triggered off ['t1.2018']
-2016-10-10T14:01:11Z INFO - [t1.2020] -triggered off ['t1.2019']
diff --git a/tests/flakyfunctional/cylc-take-checkpoints/test_header b/tests/flakyfunctional/cylc-take-checkpoints/test_header
deleted file mode 120000
index 0126592858e..00000000000
--- a/tests/flakyfunctional/cylc-take-checkpoints/test_header
+++ /dev/null
@@ -1 +0,0 @@
-../../functional/lib/bash/test_header
\ No newline at end of file
diff --git a/tests/flakyfunctional/events/40-stall-despite-clock-trig.t b/tests/flakyfunctional/events/40-stall-despite-clock-trig.t
index 67c5777ffcb..daf2ec27b9e 100755
--- a/tests/flakyfunctional/events/40-stall-despite-clock-trig.t
+++ b/tests/flakyfunctional/events/40-stall-despite-clock-trig.t
@@ -21,14 +21,11 @@ set_test_number 3
install_suite "${TEST_NAME_BASE}" "${TEST_NAME_BASE}"
run_ok "${TEST_NAME_BASE}-validate" cylc validate "${SUITE_NAME}"
-# Saw evidence in a failed test that timeout 60 isn't long enough under load?:
-run_fail "${TEST_NAME_BASE}-run" \
- timeout 120 cylc run --debug --no-detach "${SUITE_NAME}"
-sed -n 's/^.* WARNING - //p' "${SUITE_RUN_DIR}/log/suite/log" \
- >"${SUITE_RUN_DIR}/log/suite/log.edited"
-contains_ok "${SUITE_RUN_DIR}/log/suite/log.edited" <<__OUT__
-suite stalled
-__OUT__
+
+TEST_NAME="${TEST_NAME_BASE}-run"
+suite_run_fail "${TEST_NAME}" cylc run --debug --no-detach "${SUITE_NAME}"
+
+grep_ok "suite stalled" "${TEST_NAME}.stderr"
purge_suite "${SUITE_NAME}"
exit
diff --git a/tests/flakyfunctional/events/40-stall-despite-clock-trig/flow.cylc b/tests/flakyfunctional/events/40-stall-despite-clock-trig/flow.cylc
index 3ece887ca1f..66e3d583c8e 100644
--- a/tests/flakyfunctional/events/40-stall-despite-clock-trig/flow.cylc
+++ b/tests/flakyfunctional/events/40-stall-despite-clock-trig/flow.cylc
@@ -1,4 +1,8 @@
-# Stall with t3 waiting due to unhandled failure of t2
+# Stall due to unhandled failure of t2
+# TODO: I think this test can be removed. Since SoD it only tests that a suite
+# can stall due to unhandled failed tasks, which is tested elsewhere. It was
+# probably meant to test that stall was not affected by the clock trigger on
+# waiting t1 in the next cycle under SoS.
[cylc]
UTC mode = True
cycle point format = %Y%m%d
@@ -11,7 +15,6 @@
[[special tasks]]
clock-trigger = t1(P0D)
[[graph]]
- # Stall with t2 failed, no waiting tasks.
P1D=t3[-P1D] => t1 => t2 => t3
[runtime]
[[t1]]
diff --git a/tests/flakyfunctional/restart/21-task-elapsed.t b/tests/flakyfunctional/restart/21-task-elapsed.t
index 0f0acd7257b..77596c7b4f7 100755
--- a/tests/flakyfunctional/restart/21-task-elapsed.t
+++ b/tests/flakyfunctional/restart/21-task-elapsed.t
@@ -15,7 +15,6 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#-------------------------------------------------------------------------------
-# Test restart from a checkpoint before a reload
. "$(dirname "$0")/test_header"
set_test_number 8
install_suite "${TEST_NAME_BASE}" "${TEST_NAME_BASE}"
diff --git a/tests/functional/events/26-suite-stalled-dump-prereq.t b/tests/functional/events/26-suite-stalled-dump-prereq.t
index 21e8672ccc3..429768e741d 100755
--- a/tests/functional/events/26-suite-stalled-dump-prereq.t
+++ b/tests/functional/events/26-suite-stalled-dump-prereq.t
@@ -17,19 +17,28 @@
#-------------------------------------------------------------------------------
# Test suite event handler, dump unmet prereqs on stall
. "$(dirname "$0")/test_header"
-set_test_number 5
+set_test_number 8
install_suite "${TEST_NAME_BASE}" "${TEST_NAME_BASE}"
run_ok "${TEST_NAME_BASE}-validate" \
cylc validate "${SUITE_NAME}"
+
suite_run_fail "${TEST_NAME_BASE}-run" \
cylc run --reference-test --debug --no-detach "${SUITE_NAME}"
-grep_ok "Abort on suite stalled is set" \
- "${TEST_NAME_BASE}-run.stderr"
-grep_ok "WARNING - Unmet prerequisites for foo.20100101T0600Z:" \
- "${TEST_NAME_BASE}-run.stderr"
-grep_ok "WARNING - \\* bar.20100101T0000Z succeeded" \
- "${TEST_NAME_BASE}-run.stderr"
+
+grep_ok "Abort on suite stalled is set" "${TEST_NAME_BASE}-run.stderr"
+
+grep_ok "WARNING - Suite stalled with unhandled failed tasks:" \
+ "${TEST_NAME_BASE}-run.stderr"
+grep_ok "\* bar.20100101T0000Z (failed)" \
+ "${TEST_NAME_BASE}-run.stderr"
+
+grep_ok "WARNING - Some partially satisfied prerequisites left over:" \
+ "${TEST_NAME_BASE}-run.stderr"
+grep_ok "foo.20100101T0600Z is waiting on:" \
+ "${TEST_NAME_BASE}-run.stderr"
+grep_ok "\* bar.20100101T0000Z succeeded" \
+ "${TEST_NAME_BASE}-run.stderr"
purge_suite "${SUITE_NAME}"
exit
diff --git a/tests/functional/events/26-suite-stalled-dump-prereq/flow.cylc b/tests/functional/events/26-suite-stalled-dump-prereq/flow.cylc
index 5e418725c94..3c8230591cf 100644
--- a/tests/functional/events/26-suite-stalled-dump-prereq/flow.cylc
+++ b/tests/functional/events/26-suite-stalled-dump-prereq/flow.cylc
@@ -7,7 +7,7 @@
[scheduling]
initial cycle point = 20100101T0000Z
[[graph]]
- # will abort on stalled with failed bar, waiting foo, at T00
+ # will abort on stalled with unhandled failed bar
T00, T06, T12, T18 = foo[-PT6H] & bar[-PT6H] => foo => bar => qux
T12 = qux[-PT6H] => baz
[runtime]
diff --git a/tests/functional/events/27-suite-stalled-dump-prereq-fam.t b/tests/functional/events/27-suite-stalled-dump-prereq-fam.t
index 4563036cc62..aff3b20228d 100755
--- a/tests/functional/events/27-suite-stalled-dump-prereq-fam.t
+++ b/tests/functional/events/27-suite-stalled-dump-prereq-fam.t
@@ -17,22 +17,37 @@
#-------------------------------------------------------------------------------
# Test suite event handler, dump unmet prereqs on stall
. "$(dirname "$0")/test_header"
-set_test_number 7
+set_test_number 12
install_suite "${TEST_NAME_BASE}" "${TEST_NAME_BASE}"
+
run_ok "${TEST_NAME_BASE}-validate" \
cylc validate "${SUITE_NAME}"
+
suite_run_fail "${TEST_NAME_BASE}-run" \
cylc run --reference-test --debug --no-detach "${SUITE_NAME}"
-grep_ok "Abort on suite stalled is set" \
+
+grep_ok "Abort on suite stalled is set" "${TEST_NAME_BASE}-run.stderr"
+
+grep_ok "WARNING - Suite stalled with unhandled failed tasks:" \
+ "${TEST_NAME_BASE}-run.stderr"
+grep_ok "\* foo.1 (failed)" \
"${TEST_NAME_BASE}-run.stderr"
-grep_ok "WARNING - Unmet prerequisites for f_1.1:" \
+
+grep_ok "WARNING - Some partially satisfied prerequisites left over:" \
+ "${TEST_NAME_BASE}-run.stderr"
+grep_ok "f_1.1 is waiting on:" \
+ "${TEST_NAME_BASE}-run.stderr"
+grep_ok "\* foo.1 succeeded" \
"${TEST_NAME_BASE}-run.stderr"
-grep_ok "WARNING - Unmet prerequisites for f_3.1:" \
+grep_ok "f_2.1 is waiting on:" \
"${TEST_NAME_BASE}-run.stderr"
-grep_ok "WARNING - Unmet prerequisites for f_2.1" \
+grep_ok "\* foo.1 succeeded" \
"${TEST_NAME_BASE}-run.stderr"
-grep_ok "WARNING - \\* foo.1 succeeded" \
+grep_ok "f_3.1 is waiting on:" \
"${TEST_NAME_BASE}-run.stderr"
+grep_ok "\* foo.1 succeeded" \
+ "${TEST_NAME_BASE}-run.stderr"
+
purge_suite "${SUITE_NAME}"
exit
diff --git a/tests/functional/events/27-suite-stalled-dump-prereq-fam/flow.cylc b/tests/functional/events/27-suite-stalled-dump-prereq-fam/flow.cylc
index d23faa16e39..0626d58f4ed 100644
--- a/tests/functional/events/27-suite-stalled-dump-prereq-fam/flow.cylc
+++ b/tests/functional/events/27-suite-stalled-dump-prereq-fam/flow.cylc
@@ -6,8 +6,7 @@
expected task failures = foo.1
[scheduling]
[[graph]]
- # Goo added to spawn waiting FAM and thereby cause a stall with
- # unsatisfied waiting tasks.
+ # will abort on stalled with unhandled failed foo
R1 = """foo & goo => FAM
FAM:succeed-any => bar"""
[runtime]
diff --git a/tests/functional/hold-release/02-hold-on-spawn.t b/tests/functional/hold-release/02-hold-on-spawn.t
index 76903ec8a65..cbbc2e7c49f 100755
--- a/tests/functional/hold-release/02-hold-on-spawn.t
+++ b/tests/functional/hold-release/02-hold-on-spawn.t
@@ -30,7 +30,7 @@ __FLOW_CONFIG__
suite_run_ok "${TEST_NAME_BASE}-run" cylc run --hold "${SUITE_NAME}"
cylc release "${SUITE_NAME}" foo.1
-# foo.1 should run and spawn bar.1 as waiting
+# foo.1 should run and spawn bar.1 as waiting and held
poll_grep_suite_log 'spawned bar\.1'
diff --git a/tests/functional/restart/22-hold/flow.cylc b/tests/functional/restart/22-hold/flow.cylc
index a89643f7141..d044cb8c9a1 100644
--- a/tests/functional/restart/22-hold/flow.cylc
+++ b/tests/functional/restart/22-hold/flow.cylc
@@ -18,7 +18,7 @@
[[t1]]
script = """
if [[ "${CYLC_TASK_CYCLE_POINT}" == '2016' ]]; then
- cylc__job__poll_grep_suite_log -F '[t2.2016] -released to the task pool'
+ cylc__job__poll_grep_suite_log -F 'spawned t2.2016'
cylc hold "${CYLC_SUITE_NAME}" t2.2016
cylc stop "${CYLC_SUITE_NAME}"
else
diff --git a/tests/functional/runahead/03-check-default-future.t b/tests/functional/runahead/03-check-default-future.t
index 3928761ab1a..4081ff5428a 100644
--- a/tests/functional/runahead/03-check-default-future.t
+++ b/tests/functional/runahead/03-check-default-future.t
@@ -37,6 +37,6 @@ run_ok "${TEST_NAME}" sqlite3 "${DB}" \
cmp_ok "${TEST_NAME}.stdout" <<< "20100101T0400Z"
# i.e. should have spawned 5 cycle points from initial T00 (wibble not spawned)
#-------------------------------------------------------------------------------
-grep_ok 'Suite shutting down - Abort on suite stalled is set' "${SUITE_RUN_DIR}/log/suite/log"
+grep_ok 'Suite shutting down - Abort on suite inactivity is set' "${SUITE_RUN_DIR}/log/suite/log"
#-------------------------------------------------------------------------------
purge_suite "${SUITE_NAME}"
diff --git a/tests/functional/runahead/05-check-default-future-2.t b/tests/functional/runahead/05-check-default-future-2.t
index 66740d05cc1..33340a1a706 100644
--- a/tests/functional/runahead/05-check-default-future-2.t
+++ b/tests/functional/runahead/05-check-default-future-2.t
@@ -36,10 +36,10 @@ run_ok "${TEST_NAME}" sqlite3 "${DB}" \
"select max(cycle) from task_states where name=='foo' and status=='failed'"
cmp_ok "${TEST_NAME}.stdout" <<< "20100101T1000Z"
# i.e. should have spawned 5 cycle points from initial T00, and then raised
-# this by PT6H due to fact that wibble spawned
+# this by PT6H due to fact that wibble spawned.
#-------------------------------------------------------------------------------
-TEST_NAME=${TEST_NAME_BASE}-check-stalled
+TEST_NAME=${TEST_NAME_BASE}-check-aborted
LOG="$RUN_DIR/${SUITE_NAME}/log/suite/log"
-grep_ok 'Suite shutting down - Abort on suite stalled is set' "${LOG}"
+grep_ok 'Suite shutting down - Abort on suite inactivity is set' "${LOG}"
#-------------------------------------------------------------------------------
purge_suite "${SUITE_NAME}"
diff --git a/tests/functional/runahead/06-release-update.t b/tests/functional/runahead/06-release-update.t
index 503c8ab8c02..8687358fa98 100644
--- a/tests/functional/runahead/06-release-update.t
+++ b/tests/functional/runahead/06-release-update.t
@@ -15,7 +15,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
#-------------------------------------------------------------------------------
-# Test that the state summary is updated when runahead tasks are released.
+# Test that the datastore is updated when runahead tasks are released.
# GitHub #1981
. "$(dirname "$0")/test_header"
set_test_number 3
@@ -27,11 +27,13 @@ CYLC_RUN_PID="$!"
poll_suite_running
YYYY="$(date +%Y)"
NEXT1=$(( YYYY + 1 ))
-poll_grep_suite_log -F "[bar.${NEXT1}] -released to the task pool"
+poll_grep_suite_log -F "spawned bar.${NEXT1}"
+# sleep a little to allow the datastore to update (`cylc dump` sees the
+# datastore) TODO can we avoid this flaky sleep somehow?
+sleep 10
cylc dump -t "${SUITE_NAME}" | awk '{print $1 $2 $3}' >'log'
cmp_ok 'log' - <<__END__
-bar,$NEXT1,waiting,
foo,$NEXT1,waiting,
__END__
diff --git a/tests/functional/runahead/default-future/flow.cylc b/tests/functional/runahead/default-future/flow.cylc
index 9c4650f4fca..abd5ef57691 100644
--- a/tests/functional/runahead/default-future/flow.cylc
+++ b/tests/functional/runahead/default-future/flow.cylc
@@ -2,31 +2,31 @@
[cylc]
UTC mode = True
[[events]]
- abort on stalled = True
- timeout = PT30S
- abort on timeout = True
+ inactivity = PT10S
+ abort on inactivity = True
[scheduling]
initial cycle point = 20100101T00
final cycle point = 20100105T00
+ [[xtriggers]]
+ never = wall_clock(P100Y)
[[graph]]
- # oops makes bar spawn as waiting, to hold back runahead
- PT1H = "foo & oops => bar"
+ R1 = spawner
+ PT1H = """
+ @never => bar
+ foo
+ """
# If wibble gets into the pool, it will demand a +PT6H raise
# of the 'runahead limit'.
{{ FUTURE_TRIGGER_START_POINT }}/PT6H = """
- baz[+PT6H] => wibble
- baz
+ foo[+PT6H] => wibble
"""
[runtime]
- [[foo]]
- script = """
- if [[ "$CYLC_TASK_CYCLE_POINT" == "20100101T0000Z" ]]; then
- # SoD: spawn wibble (it's not sitting there waiting like in SoS)
- cylc set-outputs $CYLC_SUITE_NAME baz.20100101T0800Z
-fi
-false"""
- [[bar,baz,wibble]]
+ [[root]]
script = true
-
-[visualization]
- number of cycle points = 10
+ [[spawner]]
+ script = """
+ # spawn wibble
+ cylc set-outputs $CYLC_SUITE_NAME foo.20100101T0800Z
+ """
+ [[foo]]
+ script = false
diff --git a/tests/functional/runahead/no_final/flow.cylc b/tests/functional/runahead/no_final/flow.cylc
index ca4e3466f8d..d412cd129d2 100644
--- a/tests/functional/runahead/no_final/flow.cylc
+++ b/tests/functional/runahead/no_final/flow.cylc
@@ -2,15 +2,18 @@
[cylc]
cycle point time zone = Z
[[events]]
- abort on stalled = True
+ abort on inactivity = True
+ inactivity = PT10S
[scheduling]
runahead limit = P4
initial cycle point = 20100101T00
+ [[xtriggers]]
+ never = wall_clock(P100Y)
[[graph]]
- # oops makes bar spawn as waiting, to hold back the runahead
- PT6H = "foo & oops => bar"
+ PT6H = """
+ foo
+ @never => bar
+ """
[runtime]
[[foo]]
script = false
- [[bar]]
- script = true
diff --git a/tests/functional/shutdown/09-now2.t b/tests/functional/shutdown/09-now2.t
index 4534f2a3cd4..6a51ac09c1c 100755
--- a/tests/functional/shutdown/09-now2.t
+++ b/tests/functional/shutdown/09-now2.t
@@ -18,14 +18,15 @@
# Test "cylc stop --now --now".
. "$(dirname "$0")/test_header"
-set_test_number 8
+set_test_number 9
install_suite "${TEST_NAME_BASE}" "${TEST_NAME_BASE}"
run_ok "${TEST_NAME_BASE}-validate" cylc validate "${SUITE_NAME}"
suite_run_ok "${TEST_NAME_BASE}-run" cylc run --no-detach "${SUITE_NAME}"
LOGD="$RUN_DIR/${SUITE_NAME}/log"
grep_ok 'INFO - Suite shutting down - REQUEST(NOW-NOW)' "${LOGD}/suite/log"
-grep_ok 'WARNING - t1.1: orphaned task (running)' "${LOGD}/suite/log"
+grep_ok 'WARNING - Orphaned task jobs' "${LOGD}/suite/log"
+grep_ok '\* t1.1 (running)' "${LOGD}/suite/log"
JLOGD="${LOGD}/job/1/t1/01"
# Check that t1.1 event handler runs
run_fail "${TEST_NAME_BASE}-activity-log-succeeded" \