diff --git a/CHANGES.md b/CHANGES.md index 59014d6fa27..2ec5c1ee6ec 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -28,6 +28,17 @@ creating a new release entry be sure to copy & paste the span tag with the `actions:bind` attribute, which is used by a regex to find the text to be updated. Only the first match gets replaced, so it's fine to leave the old ones in. --> + +------------------------------------------------------------------------------- +## __cylc-8.0.2 (Released YYYY-MM-DD)__ + +Maintenance release. + +### Fixes + +[#5067](https://github.com/cylc/cylc-flow/pull/5067) - Datastore fix for +taskdefs removed before restart. + ------------------------------------------------------------------------------- ## __cylc-8.0.1 (Released 2022-08-16)__ diff --git a/cylc/flow/data_store_mgr.py b/cylc/flow/data_store_mgr.py index c54cacc8b3d..a53e9cdca4f 100644 --- a/cylc/flow/data_store_mgr.py +++ b/cylc/flow/data_store_mgr.py @@ -818,12 +818,15 @@ def generate_ghost_task(self, tp_id, itask, is_parent=False): if is_orphan: self.generate_orphan_task(itask) - # Most the time the definition node will be in the store, - # so use try/except. + # Most of the time the definition node will be in the store. try: task_def = self.data[self.workflow_id][TASKS][t_id] except KeyError: - task_def = self.added[TASKS][t_id] + try: + task_def = self.added[TASKS][t_id] + except KeyError: + # Task removed from workflow definition. + return False update_time = time() tp_stamp = f'{tp_id}@{update_time}' diff --git a/tests/functional/restart/58-removed-task.t b/tests/functional/restart/58-removed-task.t new file mode 100755 index 00000000000..808273f55a9 --- /dev/null +++ b/tests/functional/restart/58-removed-task.t @@ -0,0 +1,48 @@ +#!/bin/bash +# THIS FILE IS PART OF THE CYLC WORKFLOW ENGINE. +# Copyright (C) NIWA & British Crown (Met Office) & Contributors. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +#------------------------------------------------------------------------------- + +# GitHub 5067: if a task is removed from the graph after shutdown, it should not +# cause an error at restart. If it was a failed incomplete task, however, it +# should still be polled and logged at restart. + +. "$(dirname "$0")/test_header" + +set_test_number 7 + +install_workflow "${TEST_NAME_BASE}" "${TEST_NAME_BASE}" + +run_ok "${TEST_NAME_BASE}-validate" cylc validate --set="INCL_B_C=True" "${WORKFLOW_NAME}" +run_ok "${TEST_NAME_BASE}-validate" cylc validate --set="INCL_B_C=False" "${WORKFLOW_NAME}" + +TEST_NAME="${TEST_NAME_BASE}-run" +workflow_run_ok "${TEST_NAME}" cylc play -n "${WORKFLOW_NAME}" + +# Restart with removed tasks should not cause an error. +# It should shut down cleanly after orphaned task a and incomplete failed task +# b are polled (even though b has been removed from the graph) and a finishes +# (after checking the poll results). +TEST_NAME="${TEST_NAME_BASE}-restart" +workflow_run_ok "${TEST_NAME}" cylc play --set="INCL_B_C=False" -n "${WORKFLOW_NAME}" + +grep_workflow_log_ok "grep-3" "\[1/a running job:01 flows:1\] (polled)started" +grep_workflow_log_ok "grep-4" "\[1/b failed job:01 flows:1\] (polled)failed" + +# Failed (but not incomplete) task c should not have been polled. +grep_fail "\[1/c failed job:01 flows:1\] (polled)failed" "${WORKFLOW_RUN_DIR}/log/scheduler/log" + +purge diff --git a/tests/functional/restart/58-removed-task/flow.cylc b/tests/functional/restart/58-removed-task/flow.cylc new file mode 100644 index 00000000000..94c5cf27b24 --- /dev/null +++ b/tests/functional/restart/58-removed-task/flow.cylc @@ -0,0 +1,32 @@ +#!Jinja2 + +# Task a shuts the scheduler down cleanly with --now after b and c have failed. +# On restart, a waits for a and b to be polled before finishing (otherwise we +# could shut down before poll results come in). + +{% set INCL_B_C = INCL_B_C | default(True) %} +[scheduler] + [[events]] + stall timeout = PT0S + abort on stall timeout = True + inactivity timeout = PT30S + abort on inactivity timeout = True +[scheduling] + [[graph]] + R1 = """ + a + {% if INCL_B_C %} + b & c? + {% endif %} + """ +[runtime] + [[a]] + script = """ + cylc__job__poll_grep_workflow_log "1/b .*failed" + cylc__job__poll_grep_workflow_log "1/c .*failed" + cylc stop --now $CYLC_WORKFLOW_ID + cylc__job__poll_grep_workflow_log "1/a .*(polled)started" + cylc__job__poll_grep_workflow_log "1/b .*(polled)failed" + """ + [[b, c]] + script = "false"