From 58fe9aab5df859163e9ac94016e80a0f76ec2443 Mon Sep 17 00:00:00 2001 From: Matt Shin Date: Wed, 4 Sep 2019 19:28:29 +0100 Subject: [PATCH] Add poll suite log function to job.sh This is a temporary measure to address the repeated pattern in various test suites where we have to repeatedly poll the suite log for events. --- cylc/flow/etc/job.sh | 13 ++++++++ flakytests/cylc-poll/03-poll-all/suite.rc | 11 +++---- flakytests/cylc-reset/02-output-1/suite.rc | 9 ++---- flakytests/cylc-reset/03-output-2/suite.rc | 9 ++---- .../cylc-take-checkpoints/00-basic/suite.rc | 7 +---- flakytests/hold-release/13-ready-restart.t | 7 ++--- .../13-ready-restart/bin/my-log-grepper | 6 ---- .../hold-release/13-ready-restart/suite.rc | 3 +- flakytests/hold-release/14-hold-kill/suite.rc | 15 ++++------ .../hold-release/15-hold-after/suite.rc | 7 ++--- .../20-reset-waiting-output/suite.rc | 24 +++++---------- flakytests/restart/19-checkpoint/suite.rc | 17 ++++------- tests/authentication/08-shared-fs.t | 2 +- tests/broadcast/07-timeout/suite.rc | 6 +--- tests/cylc-cat-log/05-remote-tail.t | 6 +--- .../cylc-poll/05-poll-multi-messages/suite.rc | 19 ++++-------- tests/database/04-lock-recover/suite.rc | 4 +-- tests/hold-release/00-suite/suite.rc | 6 +--- tests/hold-release/02-hold-on-spawn/suite.rc | 6 +--- .../11-retrying/bin/my-log-grepper | 6 ---- tests/hold-release/11-retrying/suite.rc | 9 ++++-- .../12-hold-then-retry/bin/my-log-grepper | 6 ---- .../hold-release/12-hold-then-retry/suite.rc | 12 ++++---- .../hold-release/17-hold-after-point/suite.rc | 6 +--- .../19-no-reset-prereq-on-waiting/suite.rc | 6 +--- tests/hold-release/hold-family/suite.rc | 6 +--- tests/hold-release/hold-task/suite.rc | 6 +--- tests/hold-release/release-family/suite.rc | 6 +--- tests/hold-release/release-task/suite.rc | 6 +--- tests/job-file-trap/00-sigusr1.t | 30 ++++--------------- tests/reload/01-startup/suite.rc | 4 +-- tests/reload/03-queues/suite.rc | 5 +--- tests/reload/05-graphing-simple/suite.rc | 4 +-- tests/reload/06-graphing-fam/suite.rc | 4 +-- tests/reload/07-final-cycle/suite.rc | 4 +-- tests/reload/08-cycle/suite.rc | 4 +-- tests/reload/11-retrying/suite.rc | 10 ++----- tests/reload/12-remove-task/suite.rc | 5 +--- tests/reload/14-waiting/suite.rc | 6 +--- tests/reload/19-remote-kill/suite.rc | 9 ++---- tests/reload/20-stop-point/suite.rc | 5 +--- tests/restart/04-running.t | 7 +---- tests/restart/reload/suite.rc | 4 +-- tests/triggering/19-and-suicide/suite.rc | 6 +--- 44 files changed, 99 insertions(+), 254 deletions(-) delete mode 100755 flakytests/hold-release/13-ready-restart/bin/my-log-grepper delete mode 100755 tests/hold-release/11-retrying/bin/my-log-grepper delete mode 100755 tests/hold-release/12-hold-then-retry/bin/my-log-grepper diff --git a/cylc/flow/etc/job.sh b/cylc/flow/etc/job.sh index 1d2c7598add..e3355d6c1ef 100644 --- a/cylc/flow/etc/job.sh +++ b/cylc/flow/etc/job.sh @@ -151,6 +151,19 @@ cylc__job__main() { exit 0 } +############################################################################### +# Poll existence of pattern from suite log for up to a minute. +cylc__job__poll_grep_suite_log() { + local TIMEOUT="$(($(date +%s) + 60))" # wait 1 minute + while ! grep -s "$@" "${CYLC_SUITE_LOG_DIR}/log"; do + sleep 1 + if (($(date +%s) > TIMEOUT)); then + echo "ERROR: poll timed out: grep -s $* ${CYLC_SUITE_LOG_DIR}/log" >&2 + exit 1 + fi + done +} + ############################################################################### # Run a function in the task job instance file, if possible. # Arguments: diff --git a/flakytests/cylc-poll/03-poll-all/suite.rc b/flakytests/cylc-poll/03-poll-all/suite.rc index 02ad82e84fc..c51904e6433 100644 --- a/flakytests/cylc-poll/03-poll-all/suite.rc +++ b/flakytests/cylc-poll/03-poll-all/suite.rc @@ -41,13 +41,10 @@ exit 0 script = """ cylc poll "${CYLC_SUITE_NAME}" -pat1="[submit_hold.${CYLC_TASK_CYCLE_POINT}] -ready => submitted" -pat2="[run_kill.${CYLC_TASK_CYCLE_POINT}] -suiciding" -log="${CYLC_SUITE_LOG_DIR}/log" -while (($(grep -c -F -e "${pat1}" -e "${pat2}" "${log}") != 2)) -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F \ + "[submit_hold.${CYLC_TASK_CYCLE_POINT}] -ready => submitted" +cylc__job__poll_grep_suite_log -F \ + "[run_kill.${CYLC_TASK_CYCLE_POINT}] -suiciding" st_file="${CYLC_SUITE_RUN_DIR}/log/job/${CYLC_TASK_CYCLE_POINT}/submit_hold/NN/job.status" pkill -g "$(awk -F= '$1 == "CYLC_BATCH_SYS_JOB_ID" {print $2}' "${st_file}")" diff --git a/flakytests/cylc-reset/02-output-1/suite.rc b/flakytests/cylc-reset/02-output-1/suite.rc index b795eb7b44d..55ef89fc8f6 100644 --- a/flakytests/cylc-reset/02-output-1/suite.rc +++ b/flakytests/cylc-reset/02-output-1/suite.rc @@ -20,15 +20,10 @@ t1:hello & t2:greet => t4 greet = Greet World [[t3]] script = """ -LOG="${CYLC_SUITE_LOG_DIR}/log" cylc reset --debug --output=hello "${CYLC_SUITE_NAME}" 't1.1' -while ! grep -qF '[t1.1] -reset output to complete: hello' "${LOG}"; do - sleep 1 # make sure reset completes -done +cylc__job__poll_grep_suite_log -F '[t1.1] -reset output to complete: hello' cylc reset --debug --output='Greet World' "${CYLC_SUITE_NAME}" 't2.1' -while ! grep -qF '[t2.1] -reset output to complete: Greet World' "${LOG}"; do - sleep 1 # make sure reset completes -done +cylc__job__poll_grep_suite_log -F '[t2.1] -reset output to complete: Greet World' """ [[[job]]] execution time limit = PT30S diff --git a/flakytests/cylc-reset/03-output-2/suite.rc b/flakytests/cylc-reset/03-output-2/suite.rc index 8074de1731c..02ff9dc00a3 100644 --- a/flakytests/cylc-reset/03-output-2/suite.rc +++ b/flakytests/cylc-reset/03-output-2/suite.rc @@ -15,14 +15,9 @@ greet = Greet World [[t2]] script = """ -LOG="${CYLC_SUITE_LOG_DIR}/log" cylc reset --output='!hello' --output='!Greet World' "${CYLC_SUITE_NAME}" 't1.1' -while ! grep -qF -e '[t1.1] -reset output to incomplete: hello' "${LOG}"; do - sleep 1 # make sure reset completes -done -while ! grep -qF '[t1.1] -reset output to incomplete: Greet World' "${LOG}"; do - sleep 1 # make sure reset completes -done +cylc__job__poll_grep_suite_log -F '[t1.1] -reset output to incomplete: hello' +cylc__job__poll_grep_suite_log -F '[t1.1] -reset output to incomplete: Greet World' cylc show "${CYLC_SUITE_NAME}" 't1.1' >"${CYLC_SUITE_RUN_DIR}/cylc-show.out" """ [[[job]]] diff --git a/flakytests/cylc-take-checkpoints/00-basic/suite.rc b/flakytests/cylc-take-checkpoints/00-basic/suite.rc index 91d83ea32a0..e57721a0e68 100644 --- a/flakytests/cylc-take-checkpoints/00-basic/suite.rc +++ b/flakytests/cylc-take-checkpoints/00-basic/suite.rc @@ -18,12 +18,7 @@ if [[ "${CYLC_TASK_CYCLE_POINT}" == '2017' ]]; then wait "${CYLC_TASK_MESSAGE_STARTED_PID}" 2>/dev/null || true sleep 2 # state of current task should be recorded after 2 seconds cylc checkpoint "${CYLC_SUITE_NAME}" 'snappy' - LOG="${CYLC_SUITE_LOG_DIR}/log" - while ! grep -qF "INFO - Command succeeded: take_checkpoints(snappy)" \ - "${LOG}" - do - sleep 1 # make sure take_checkpoints command completes - done + cylc__job__poll_grep_suite_log -F "Command succeeded: take_checkpoints(snappy)" sleep 2 # checkpoint should be recorded after 2 seconds fi """ diff --git a/flakytests/hold-release/13-ready-restart.t b/flakytests/hold-release/13-ready-restart.t index b6896fe61bf..c05f394c307 100755 --- a/flakytests/hold-release/13-ready-restart.t +++ b/flakytests/hold-release/13-ready-restart.t @@ -18,7 +18,7 @@ # Test restart with a "ready" task. See GitHub #958 (update: and #2610). . "$(dirname "$0")/test_header" -set_test_number 4 +set_test_number 3 install_suite "${TEST_NAME_BASE}" "${TEST_NAME_BASE}" run_ok "${TEST_NAME_BASE}-validate" cylc validate "${SUITE_NAME}" @@ -29,8 +29,7 @@ export PATH="${TEST_DIR}/${SUITE_NAME}/bin:$PATH" LOG="$(find "${CYLC_SUITE_LOG_DIR}/" -type f -name 'log.*' | sort | head -n 1)" run_ok "${TEST_NAME_BASE}-restart" timeout 1m my-file-poll "${LOG}" # foo-1 should run when the suite is released -run_ok "${TEST_NAME_BASE}-foo-1" \ - timeout 1m my-log-grepper 'foo-1\.1.*succeeded' -timeout 1m my-log-grepper 'Suite shutting down' +poll_grep_suite_log 'foo-1\.1.*succeeded' +poll_suite_stopped purge_suite "${SUITE_NAME}" exit diff --git a/flakytests/hold-release/13-ready-restart/bin/my-log-grepper b/flakytests/hold-release/13-ready-restart/bin/my-log-grepper deleted file mode 100755 index 9d7fcc818e7..00000000000 --- a/flakytests/hold-release/13-ready-restart/bin/my-log-grepper +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -eu -while ! grep -q "$@" "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done -exit diff --git a/flakytests/hold-release/13-ready-restart/suite.rc b/flakytests/hold-release/13-ready-restart/suite.rc index 707ce8d3eba..214b807bc36 100644 --- a/flakytests/hold-release/13-ready-restart/suite.rc +++ b/flakytests/hold-release/13-ready-restart/suite.rc @@ -38,7 +38,8 @@ bar # Restart the suite on hold. cylc restart --hold "${CYLC_SUITE_NAME}" - timeout 1m my-log-grepper 'Held on start-up (no tasks will be submitted)' + cylc__job__poll_grep_suite_log \ + 'Held on start-up (no tasks will be submitted)' # Modify the job submission command template for "foo-1". cylc broadcast "${CYLC_SUITE_NAME}" \ diff --git a/flakytests/hold-release/14-hold-kill/suite.rc b/flakytests/hold-release/14-hold-kill/suite.rc index 9c7a18726a5..029148ab86a 100644 --- a/flakytests/hold-release/14-hold-kill/suite.rc +++ b/flakytests/hold-release/14-hold-kill/suite.rc @@ -9,16 +9,11 @@ script = """ echo '# killing "sleeper"' cylc kill "${CYLC_SUITE_NAME}" "sleeper.1" - LOG="${CYLC_SUITE_LOG_DIR}/log" - while ! grep -q -F '[sleeper.1] -running => running (held)' "${LOG}" - do - sleep 1 - done - while ! grep -q -F '[sleeper.1] -running (held) => retrying (held)' "${LOG}" - do - sleep 1 - done - sleep 10 # sleep should still be held after 10 seconds + cylc__job__poll_grep_suite_log -F \ + '[sleeper.1] -running => running (held)' + cylc__job__poll_grep_suite_log -F \ + -F '[sleeper.1] -running (held) => retrying (held)' + sleep 10 # sleep, should still be held after 10 seconds cylc dump -s -t "${CYLC_SUITE_NAME}" >'cylc-dump.out' diff -u 'cylc-dump.out' - <<'__OUT__' 1, killer, running, spawned, unheld diff --git a/flakytests/hold-release/15-hold-after/suite.rc b/flakytests/hold-release/15-hold-after/suite.rc index 726d36e2cc8..11c9033c6b1 100644 --- a/flakytests/hold-release/15-hold-after/suite.rc +++ b/flakytests/hold-release/15-hold-after/suite.rc @@ -22,11 +22,8 @@ script = cylc hold --after '20140101T12' "${CYLC_SUITE_NAME}" [[stopper]] script = """ - while ! grep -qF '[bar.20140102T0000Z] -waiting => waiting (held)' \ - "${CYLC_SUITE_LOG_DIR}/log" - do - sleep 1 - done + cylc__job__poll_grep_suite_log -F \ + '[bar.20140102T0000Z] -waiting => waiting (held)' cylc stop "${CYLC_SUITE_NAME}" """ [[[job]]] diff --git a/flakytests/hold-release/20-reset-waiting-output/suite.rc b/flakytests/hold-release/20-reset-waiting-output/suite.rc index bc2add96cbb..79370f31dcb 100644 --- a/flakytests/hold-release/20-reset-waiting-output/suite.rc +++ b/flakytests/hold-release/20-reset-waiting-output/suite.rc @@ -11,24 +11,14 @@ [[t1]] script = """ cylc hold "${CYLC_SUITE_NAME}" - LOG="${CYLC_SUITE_LOG_DIR}/log" - while ! grep -qF 'INFO - Command succeeded: hold_suite()' "${LOG}"; do - sleep 1 # make sure hold completes - done + cylc__job__poll_grep_suite_log -F \ + 'INFO - Command succeeded: hold_suite()' cylc reset --state='succeeded' "${CYLC_SUITE_NAME}" 't2.1' - while ! grep -q \ - "INFO - Command succeeded: reset_task_states(\\['t2.1'\\],.*state=succeeded" \ - "${LOG}" - do - sleep 1 # make sure reset succeeded completes - done + cylc__job__poll_grep_suite_log \ + "INFO - Command succeeded: reset_task_states(\\['t2.1'\\],.*state=succeeded" cylc reset --state='waiting' "${CYLC_SUITE_NAME}" 't2.1' - while ! grep -q \ - "INFO - Command succeeded: reset_task_states(\\['t2.1'\\],.*state=waiting" \ - "${LOG}" - do - sleep 1 # make sure reset waiting completes - done + cylc__job__poll_grep_suite_log \ + "INFO - Command succeeded: reset_task_states(\\['t2.1'\\],.*state=waiting" cylc release "${CYLC_SUITE_NAME}" """ [[[job]]] @@ -37,7 +27,7 @@ failed handler = cylc release '%(suite)s' [[t2]] - script = sleep 10; touch "${CYLC_SUITE_RUN_DIR}/t2.done" + script = touch "${CYLC_SUITE_RUN_DIR}/t2.done" [[t3]] # This will fail if t3.1 starts together with t2.1 script = test -e "${CYLC_SUITE_RUN_DIR}/t2.done" diff --git a/flakytests/restart/19-checkpoint/suite.rc b/flakytests/restart/19-checkpoint/suite.rc index 886deb03eae..decac90e61a 100644 --- a/flakytests/restart/19-checkpoint/suite.rc +++ b/flakytests/restart/19-checkpoint/suite.rc @@ -17,22 +17,17 @@ script = """ wait "${CYLC_TASK_MESSAGE_STARTED_PID}" 2>/dev/null || true if [[ "${CYLC_TASK_CYCLE_POINT}" == '2017' ]]; then - LOG="${CYLC_SUITE_LOG_DIR}/log" - while ! grep -qF '[t1.2017] status=submitted: (received)started' "${LOG}"; do - sleep 1 # make sure started message is recorded in suite - done - sleep 2 + cylc__job__poll_grep_suite_log -F \ + '[t1.2017] status=submitted: (received)started' + sleep 2 # make sure status change recorded in DB cylc broadcast "${CYLC_SUITE_NAME}" -p '2017' -n 't1' --set='script=true' cylc hold "${CYLC_SUITE_NAME}" - while ! grep -qF 'INFO - Command succeeded: hold_suite()' "${LOG}"; do - sleep 1 # make sure hold completes - done + cylc__job__poll_grep_suite_log -F \ + 'INFO - Command succeeded: hold_suite()' sleep 2 (cd "${CYLC_SUITE_DEF_PATH}"; cp -p 'suite2.rc' 'suite.rc') cylc reload "${CYLC_SUITE_NAME}" - while ! grep -q 'Reload completed' "${LOG}"; do - sleep 1 # make sure reload completes - done + cylc__job__poll_grep_suite_log 'Reload completed' cylc insert "${CYLC_SUITE_NAME}" 't2.2017' while ! cylc show "${CYLC_SUITE_NAME}" 't2.2017' 1>'/dev/null' 2>&1; do sleep 1 # make sure insert completes diff --git a/tests/authentication/08-shared-fs.t b/tests/authentication/08-shared-fs.t index 150b515156e..c9cd003a725 100755 --- a/tests/authentication/08-shared-fs.t +++ b/tests/authentication/08-shared-fs.t @@ -48,7 +48,7 @@ SUITE_PID="$!" SUITE_LOG="${SUITE_RUN_DIR}/log/suite/log" # Note: double poll existence of suite log on suite host and then localhost to # avoid any issues with unstable mounting of the shared file system. -poll ssh -oBatchMode=yes -n '${CYLC_TEST_HOST}' test -e "${SUITE_LOG}" +poll ssh -oBatchMode=yes -n "${CYLC_TEST_HOST}" test -e "${SUITE_LOG}" poll_grep_suite_log -F '[t1.19700101T0000Z] -submitted => running' poll_grep_suite_log -F '[t1.19700101T0000Z] -running => failed' diff --git a/tests/broadcast/07-timeout/suite.rc b/tests/broadcast/07-timeout/suite.rc index ce446730984..974de1baf23 100644 --- a/tests/broadcast/07-timeout/suite.rc +++ b/tests/broadcast/07-timeout/suite.rc @@ -17,11 +17,7 @@ """ [[timeout]] script = """ -while ! grep -qF "[${CYLC_TASK_ID}] -execution timeout after PT1S" \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F "[${CYLC_TASK_ID}] -execution timeout after PT1S" """ [[[events]]] execution timeout = PT1M diff --git a/tests/cylc-cat-log/05-remote-tail.t b/tests/cylc-cat-log/05-remote-tail.t index 78ff6213469..38eecf42abc 100755 --- a/tests/cylc-cat-log/05-remote-tail.t +++ b/tests/cylc-cat-log/05-remote-tail.t @@ -42,11 +42,7 @@ $SCP "${PWD}/bin/my-tailer.sh" \ # Run detached. suite_run_ok "${TEST_NAME_BASE}-run" cylc run "${SUITE_NAME}" #------------------------------------------------------------------------------- -while ! grep -q -F '[foo.1] status=submitted: (received)started' \ - "${SUITE_RUN_DIR}/log/suite/log" -do - sleep 1 -done +poll_grep_suite_log -F '[foo.1] status=submitted: (received)started' # cylc cat-log -m 't' tail-follows a file, so needs to be killed. # Send interrupt signal to tail command after 15 seconds. TEST_NAME="${TEST_NAME_BASE}-cat-log" diff --git a/tests/cylc-poll/05-poll-multi-messages/suite.rc b/tests/cylc-poll/05-poll-multi-messages/suite.rc index 615310eae73..59c490bfdbe 100644 --- a/tests/cylc-poll/05-poll-multi-messages/suite.rc +++ b/tests/cylc-poll/05-poll-multi-messages/suite.rc @@ -18,13 +18,8 @@ wait echo "CYLC_MESSAGE=$(date +%FT%H:%M:%SZ)|INFO|hello1" echo "CYLC_MESSAGE=$(date +%FT%H:%M:%SZ)|INFO|hello2" } >>"${CYLC_TASK_LOG_ROOT}.status" -LOG="${CYLC_SUITE_LOG_DIR}/log" -while ! grep -qF '[speaker1.1] status=running: (polled)hello1' "${LOG}"; do - sleep 1 -done -while ! grep -qF '[speaker1.1] status=running: (polled)hello2' "${LOG}"; do - sleep 1 -done +cylc__job__poll_grep_suite_log -F '[speaker1.1] status=running: (polled)hello1' +cylc__job__poll_grep_suite_log -F '[speaker1.1] status=running: (polled)hello2' """ [[[outputs]]] hello1 = "hello1" @@ -35,13 +30,9 @@ done wait # Simulate "cylc task message", messages written to status file but failed to # get sent back to the suite -{ - echo "CYLC_MESSAGE=$(date +%FT%H:%M:%SZ)|INFO|greet" -} >>"${CYLC_TASK_LOG_ROOT}.status" -LOG="${CYLC_SUITE_LOG_DIR}/log" -while ! grep -qF '[speaker2.1] status=running: (polled)greet' "${LOG}"; do - sleep 1 -done +echo "CYLC_MESSAGE=$(date +%FT%H:%M:%SZ)|INFO|greet" \ + >>"${CYLC_TASK_LOG_ROOT}.status" +cylc__job__poll_grep_suite_log -F '[speaker2.1] status=running: (polled)greet' """ [[[outputs]]] greet = "greet" diff --git a/tests/database/04-lock-recover/suite.rc b/tests/database/04-lock-recover/suite.rc index 65434bff594..b4fd14ab90e 100644 --- a/tests/database/04-lock-recover/suite.rc +++ b/tests/database/04-lock-recover/suite.rc @@ -17,7 +17,5 @@ TRUES:succeed-all => done inherit = TRUES [[done]] script = """ -while ! grep -F -q 'db: recovered' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'db: recovered' """ diff --git a/tests/hold-release/00-suite/suite.rc b/tests/hold-release/00-suite/suite.rc index 50147a0d51c..18a6481835e 100644 --- a/tests/hold-release/00-suite/suite.rc +++ b/tests/hold-release/00-suite/suite.rc @@ -22,11 +22,7 @@ script = """ wait cylc hold "${CYLC_SUITE_NAME}" -while ! grep -qF 'INFO - Command succeeded: hold_suite()' \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'INFO - Command succeeded: hold_suite()' cylc release "${CYLC_SUITE_NAME}" """ [[foo,bar]] diff --git a/tests/hold-release/02-hold-on-spawn/suite.rc b/tests/hold-release/02-hold-on-spawn/suite.rc index 3d11a3c6ccb..86f9b2a421f 100644 --- a/tests/hold-release/02-hold-on-spawn/suite.rc +++ b/tests/hold-release/02-hold-on-spawn/suite.rc @@ -16,11 +16,7 @@ script = """ wait cylc hold "${CYLC_SUITE_NAME}" -while ! grep -qF 'INFO - Command succeeded: hold_suite()' \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'INFO - Command succeeded: hold_suite()' # Release all tasks in the first cycle point. cylc release "${CYLC_SUITE_NAME}" '*.20141009T0000Z' """ diff --git a/tests/hold-release/11-retrying/bin/my-log-grepper b/tests/hold-release/11-retrying/bin/my-log-grepper deleted file mode 100755 index 8cc4885fec3..00000000000 --- a/tests/hold-release/11-retrying/bin/my-log-grepper +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -eu -while ! grep -q -F "$@" "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done -exit diff --git a/tests/hold-release/11-retrying/suite.rc b/tests/hold-release/11-retrying/suite.rc index 3e4237c75b3..849a3a8ddc4 100644 --- a/tests/hold-release/11-retrying/suite.rc +++ b/tests/hold-release/11-retrying/suite.rc @@ -17,11 +17,14 @@ t-retry-able => t-analyse execution retry delays = PT15S, 2*PT1S [[t-hold-release]] script = """ -timeout 30s my-log-grepper '[t-retry-able.1] -job(01) failed, retrying in PT15S' +cylc__job__poll_grep_suite_log -F \ + '[t-retry-able.1] -job(01) failed, retrying in PT15S' cylc hold "${CYLC_SUITE_NAME}" 't-retry-able.1' -timeout 30s my-log-grepper '[t-retry-able.1] -retrying => retrying (held)' +cylc__job__poll_grep_suite_log -F \ + '[t-retry-able.1] -retrying => retrying (held)' cylc release "${CYLC_SUITE_NAME}" 't-retry-able.1' -timeout 30s my-log-grepper '[t-retry-able.1] -retrying (held) => retrying' +cylc__job__poll_grep_suite_log -F \ + '[t-retry-able.1] -retrying (held) => retrying' """ [[t-analyse]] script = """ diff --git a/tests/hold-release/12-hold-then-retry/bin/my-log-grepper b/tests/hold-release/12-hold-then-retry/bin/my-log-grepper deleted file mode 100755 index 8cc4885fec3..00000000000 --- a/tests/hold-release/12-hold-then-retry/bin/my-log-grepper +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -set -eu -while ! grep -q -F "$@" "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done -exit diff --git a/tests/hold-release/12-hold-then-retry/suite.rc b/tests/hold-release/12-hold-then-retry/suite.rc index 81db695be82..d87a81dffd0 100644 --- a/tests/hold-release/12-hold-then-retry/suite.rc +++ b/tests/hold-release/12-hold-then-retry/suite.rc @@ -19,18 +19,20 @@ t-submit-retry-able:submit => t-hold script = """ # Hold the suite cylc hold "${CYLC_SUITE_NAME}" - timeout 15 my-log-grepper 'Command succeeded: hold_suite' + cylc__job__poll_grep_suite_log -F 'Command succeeded: hold_suite' # Poll t-submit-retry-able, should return submit-fail cylc poll "${CYLC_SUITE_NAME}" 't-submit-retry-able' # Allow t-retry-able to continue rm -f "${CYLC_SUITE_RUN_DIR}/file" - timeout 15 my-log-grepper '[t-retry-able.1] -running (held) => retrying (held)' - timeout 15 my-log-grepper \ + cylc__job__poll_grep_suite_log -F \ + '[t-retry-able.1] -running (held) => retrying (held)' + cylc__job__poll_grep_suite_log -F \ '[t-submit-retry-able.1] -submitted (held) => submit-retrying (held)' # Release the suite cylc release "${CYLC_SUITE_NAME}" - timeout 15 my-log-grepper '[t-retry-able.1] -retrying (held) => retrying' - timeout 15 my-log-grepper \ + cylc__job__poll_grep_suite_log -F \ + '[t-retry-able.1] -retrying (held) => retrying' + cylc__job__poll_grep_suite_log -F \ '[t-submit-retry-able.1] -submit-retrying (held) => submit-retrying' """ [[t-retry-able]] diff --git a/tests/hold-release/17-hold-after-point/suite.rc b/tests/hold-release/17-hold-after-point/suite.rc index 1fa81a23041..183fbe8f169 100644 --- a/tests/hold-release/17-hold-after-point/suite.rc +++ b/tests/hold-release/17-hold-after-point/suite.rc @@ -16,11 +16,7 @@ [runtime] [[stopper]] script = """ -while ! grep -qF 'holding (beyond suite hold point) 20100102T00Z' \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'holding (beyond suite hold point) 20100102T00Z' cylc stop "${CYLC_SUITE_NAME}" """ [[foo]] diff --git a/tests/hold-release/19-no-reset-prereq-on-waiting/suite.rc b/tests/hold-release/19-no-reset-prereq-on-waiting/suite.rc index 865cd473583..7794ed00b43 100644 --- a/tests/hold-release/19-no-reset-prereq-on-waiting/suite.rc +++ b/tests/hold-release/19-no-reset-prereq-on-waiting/suite.rc @@ -19,11 +19,7 @@ t1[-P1Y] => t1 [[holder]] script = """ cylc hold "${CYLC_SUITE_NAME}" 't1.2016' -while ! grep -qF '[t1.20160101T0000Z] -waiting => waiting (held)' \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F '[t1.20160101T0000Z] -waiting => waiting (held)' """ [[releaser]] script = wait; cylc release "${CYLC_SUITE_NAME}" 't1.2016' diff --git a/tests/hold-release/hold-family/suite.rc b/tests/hold-release/hold-family/suite.rc index 333d613f882..e748b0507de 100644 --- a/tests/hold-release/hold-family/suite.rc +++ b/tests/hold-release/hold-family/suite.rc @@ -17,11 +17,7 @@ point of the suite.""" script = """ wait cylc hold "$CYLC_SUITE_NAME" '{{HOLD_MATCH}}.20141009T0000Z' -while ! grep -qF "[foo.20141009T0000Z] -waiting => waiting (held)" \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F "[foo.20141009T0000Z] -waiting => waiting (held)" """ [[STUFF]] [[STOP]] diff --git a/tests/hold-release/hold-task/suite.rc b/tests/hold-release/hold-task/suite.rc index d2f94397d44..115accfbf0f 100644 --- a/tests/hold-release/hold-task/suite.rc +++ b/tests/hold-release/hold-task/suite.rc @@ -16,11 +16,7 @@ point of the suite.""" [[holdrelease]] script = """ cylc hold "$CYLC_SUITE_NAME" '{{HOLD_MATCH}}.20141009T0000Z' -while ! grep -qF "[foo.20141009T0000Z] -waiting => waiting (held)" \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F "[foo.20141009T0000Z] -waiting => waiting (held)" """ [[foo]] script = true diff --git a/tests/hold-release/release-family/suite.rc b/tests/hold-release/release-family/suite.rc index 5bcfc72c34a..dd472e47f82 100644 --- a/tests/hold-release/release-family/suite.rc +++ b/tests/hold-release/release-family/suite.rc @@ -17,11 +17,7 @@ first cycle point of the suite..""" script = """ wait cylc hold "${CYLC_SUITE_NAME}" -while ! grep -qF 'INFO - Command succeeded: hold_suite()' \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'INFO - Command succeeded: hold_suite()' # Release a family in the first cycle point. cylc release "$CYLC_SUITE_NAME" '{{RELEASE_MATCH}}.20141009T0000Z' """ diff --git a/tests/hold-release/release-task/suite.rc b/tests/hold-release/release-task/suite.rc index 984a87f52c9..f776dbdad53 100644 --- a/tests/hold-release/release-task/suite.rc +++ b/tests/hold-release/release-task/suite.rc @@ -17,11 +17,7 @@ first cycle point of the suite.""" script = """ wait cylc hold "${CYLC_SUITE_NAME}" -while ! grep -qF 'INFO - Command succeeded: hold_suite()' \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'INFO - Command succeeded: hold_suite()' # Release tasks in the first cycle point. cylc release "$CYLC_SUITE_NAME" '{{RELEASE_MATCH}}.20141009T0000Z' """ diff --git a/tests/job-file-trap/00-sigusr1.t b/tests/job-file-trap/00-sigusr1.t index 132f24e570a..c6fd941a403 100755 --- a/tests/job-file-trap/00-sigusr1.t +++ b/tests/job-file-trap/00-sigusr1.t @@ -41,42 +41,22 @@ run_tests() { sleep 1 done run_fail "${TEST_NAME_BASE}-t1-status" grep -q '^CYLC_JOB' "${T1_STATUS_FILE}" - TIMEOUT=$(($(date +%s) + 120)) - while ! grep -q 'vacated/USR1' "${SUITE_RUN_DIR}/log/suite/log" \ - && ((TIMEOUT > $(date +%s))) - do - sleep 1 - done - TIMEOUT=$(($(date +%s) + 10)) - - while ! sqlite3 "${SUITE_RUN_DIR}/log/db" \ - 'SELECT status FROM task_states WHERE name=="t1";' \ - >"${TEST_NAME}-db-t1" 2>'/dev/null' \ - && ((TIMEOUT > $(date +%s))) - do - sleep 1 - done + poll_grep_suite_log 'vacated/USR1' + poll sqlite3 "${SUITE_RUN_DIR}/log/db" \ + 'SELECT status FROM task_states WHERE name=="t1";' \ + >"${TEST_NAME}-db-t1" 2>'/dev/null' grep_ok "^\(submitted\|running\)$" "${TEST_NAME}-db-t1" # Start the job again and see what happens mkdir -p "${SUITE_RUN_DIR}/work/1/t1/" touch "${SUITE_RUN_DIR}/work/1/t1/file" # Allow t1 to complete "${SUITE_RUN_DIR}/log/job/1/t1/01/job" <'/dev/null' >'/dev/null' 2>&1 & # Wait for suite to complete - TIMEOUT=$(($(date +%s) + 120)) - while [[ -f "${SUITE_RUN_DIR}/.service/contact" ]] && ((TIMEOUT > $(date +%s))); do - sleep 1 - done + poll_suite_stopped # Test t1 status in DB sqlite3 "${SUITE_RUN_DIR}/log/db" \ 'SELECT status FROM task_states WHERE name=="t1";' >"${TEST_NAME}-db-t1" cmp_ok "${TEST_NAME}-db-t1" - <<<'succeeded' # Test reference - TIMEOUT=$(($(date +%s) + 120)) - while ! grep -q 'DONE' "${SUITE_RUN_DIR}/log/suite/log" \ - && ((TIMEOUT > $(date +%s))) - do - sleep 1 - done grep_ok 'SUITE REFERENCE TEST PASSED' "${SUITE_RUN_DIR}/log/suite/log" purge_suite "${SUITE_NAME}" exit diff --git a/tests/reload/01-startup/suite.rc b/tests/reload/01-startup/suite.rc index c4d4f187e31..d618acf4f66 100644 --- a/tests/reload/01-startup/suite.rc +++ b/tests/reload/01-startup/suite.rc @@ -13,7 +13,5 @@ [[b]] script = """ cylc reload "${CYLC_SUITE_NAME}" -f -while ! grep -qF 'Reload completed' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'Reload completed' """ diff --git a/tests/reload/03-queues/suite.rc b/tests/reload/03-queues/suite.rc index 40fbf7ea826..7aa4077bc86 100644 --- a/tests/reload/03-queues/suite.rc +++ b/tests/reload/03-queues/suite.rc @@ -21,10 +21,7 @@ perl -pi -e 's/(limit = )5( # marker)/\1 3 \2/' $CYLC_SUITE_DEF_PATH/suite.rc # reload cylc reload -f "${CYLC_SUITE_NAME}" -while ! grep -qF 'Reload completed' "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log 'Reload completed' """ [[monitor]] script = """ diff --git a/tests/reload/05-graphing-simple/suite.rc b/tests/reload/05-graphing-simple/suite.rc index 19b8e9d20f7..247904d0bf6 100644 --- a/tests/reload/05-graphing-simple/suite.rc +++ b/tests/reload/05-graphing-simple/suite.rc @@ -12,9 +12,7 @@ perl -pi -e 's/(R1 = reloader => )bar => foo( # marker)/\1foo => bar\2/' $CYLC_SUITE_DEF_PATH/suite.rc # reload cylc reload -f "${CYLC_SUITE_NAME}" -while ! grep -qF 'Reload completed' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'Reload completed' """ [[foo, bar]] script = true diff --git a/tests/reload/06-graphing-fam/suite.rc b/tests/reload/06-graphing-fam/suite.rc index e210ab271ab..4dd187e7752 100644 --- a/tests/reload/06-graphing-fam/suite.rc +++ b/tests/reload/06-graphing-fam/suite.rc @@ -16,9 +16,7 @@ perl -pi -e 's/(reloader => )BAR( # marker1)/\1FOO\2/' $CYLC_SUITE_DEF_PATH/suit perl -pi -e 's/( )BAR:finish-all => FOO( # marker2)/\1FOO:finish-all => BAR\2/' $CYLC_SUITE_DEF_PATH/suite.rc # reload cylc reload -f "${CYLC_SUITE_NAME}" -while ! grep -qF 'Reload completed' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'Reload completed' """ [[FOO, BAR]] script = true diff --git a/tests/reload/07-final-cycle/suite.rc b/tests/reload/07-final-cycle/suite.rc index 9c31f031a17..1ec3f3e42aa 100644 --- a/tests/reload/07-final-cycle/suite.rc +++ b/tests/reload/07-final-cycle/suite.rc @@ -18,9 +18,7 @@ perl -pi -e 's/(final cycle point = )20100102T00( # marker)/\1 20100101T12\2/' $CYLC_SUITE_DEF_PATH/suite.rc # reload cylc reload -f $CYLC_SUITE_NAME -while ! grep -qF 'Reload completed' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'Reload completed' """ [[a]] script = true diff --git a/tests/reload/08-cycle/suite.rc b/tests/reload/08-cycle/suite.rc index 2cc9092b4ac..859291cd5cc 100644 --- a/tests/reload/08-cycle/suite.rc +++ b/tests/reload/08-cycle/suite.rc @@ -18,9 +18,7 @@ sed -i 's/T00,T12 = a\[-PT12H\]/T00,T06,T12,T18 = a[-PT6H]/' "${CYLC_SUITE_DEF_PATH}/suite.rc" # reload cylc reload -f "${CYLC_SUITE_NAME}" -while ! grep -qF 'Reload completed' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done +cylc__job__poll_grep_suite_log -F 'Reload completed' """ [[a]] script = true diff --git a/tests/reload/11-retrying/suite.rc b/tests/reload/11-retrying/suite.rc index c4a1e33465a..a90b3c4c85e 100644 --- a/tests/reload/11-retrying/suite.rc +++ b/tests/reload/11-retrying/suite.rc @@ -20,13 +20,9 @@ fi execution retry delays = PT0S [[reloader]] script = """ -while ! grep -qF '[retrier.1] -running (held) => retrying (held)' "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log -F '[retrier.1] -running (held) => retrying (held)' cylc reload "${CYLC_SUITE_NAME}" -while ! grep -qF 'Reload completed.' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done +cylc reload -f "${CYLC_SUITE_NAME}" +cylc__job__poll_grep_suite_log -F 'Reload completed' cylc release "${CYLC_SUITE_NAME}" 'retrier.1' """ diff --git a/tests/reload/12-remove-task/suite.rc b/tests/reload/12-remove-task/suite.rc index 544997d23a6..a1a7bee6c36 100644 --- a/tests/reload/12-remove-task/suite.rc +++ b/tests/reload/12-remove-task/suite.rc @@ -10,10 +10,7 @@ script = """ sed -i "s/remove_me =>//g" $CYLC_SUITE_DEF_PATH/suite.rc cylc reload $CYLC_SUITE_NAME - while ! grep -q 'Reload completed' "${CYLC_SUITE_LOG_DIR}/log" - do - sleep 1 # make sure reload completes - done + cylc__job__poll_grep_suite_log -F 'Reload completed' """ [[remove_me]] script = false diff --git a/tests/reload/14-waiting/suite.rc b/tests/reload/14-waiting/suite.rc index 804d56e7967..3cf401029a4 100644 --- a/tests/reload/14-waiting/suite.rc +++ b/tests/reload/14-waiting/suite.rc @@ -21,10 +21,6 @@ done [[reloader]] script = """ cylc reload "${CYLC_SUITE_NAME}" -while ! grep -q '\[waiter\.1\] -reloaded task definition' \ - "${CYLC_SUITE_LOG_DIR}/log" -do - sleep 1 -done +cylc__job__poll_grep_suite_log '\[waiter\.1\] -reloaded task definition' rm -f "${CYLC_SUITE_WORK_DIR}/1/sleeping-waiter/file" """ diff --git a/tests/reload/19-remote-kill/suite.rc b/tests/reload/19-remote-kill/suite.rc index 5701b7d478d..ce522b84005 100644 --- a/tests/reload/19-remote-kill/suite.rc +++ b/tests/reload/19-remote-kill/suite.rc @@ -13,14 +13,9 @@ script=""" wait "${CYLC_TASK_MESSAGE_STARTED_PID}" 2>/dev/null || true cylc reload "${CYLC_SUITE_NAME}" -LOG="${CYLC_SUITE_LOG_DIR}/log" -while ! grep -q 'Reload completed' "${LOG}"; do - sleep 1 # make sure reload completes -done +cylc__job__poll_grep_suite_log -F 'Reload completed' cylc kill "${CYLC_SUITE_NAME}" 'foo.1' -while ! grep -qF '[foo.1] -job(01) killed' "${LOG}"; do - sleep 1 # make sure reload completes -done +cylc__job__poll_grep_suite_log -F '[foo.1] -job(01) killed' """ [[[job]]] execution time limit = PT1M diff --git a/tests/reload/20-stop-point/suite.rc b/tests/reload/20-stop-point/suite.rc index c42954a3dd5..e16fd70dbb4 100644 --- a/tests/reload/20-stop-point/suite.rc +++ b/tests/reload/20-stop-point/suite.rc @@ -17,10 +17,7 @@ script=""" wait "${CYLC_TASK_MESSAGE_STARTED_PID}" 2>/dev/null || true cylc reload "${CYLC_SUITE_NAME}" -LOG="${CYLC_SUITE_LOG_DIR}/log" -while ! grep -q 'Reload completed' "${LOG}"; do - sleep 1 # make sure reload completes -done +cylc__job__poll_grep_suite_log -F 'Reload completed' """ [[[job]]] execution time limit = PT1M diff --git a/tests/restart/04-running.t b/tests/restart/04-running.t index 86efe9f4448..b9a15981412 100755 --- a/tests/restart/04-running.t +++ b/tests/restart/04-running.t @@ -32,12 +32,7 @@ init_suite "${TEST_NAME_BASE}" <<'__SUITERC__' script = """ wait cylc stop --now "${CYLC_SUITE_NAME}" - while ! grep -q 'Run: (re)start=1' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 - done - while [[ ! -s "${CYLC_SUITE_RUN_DIR}/.service/contact" ]]; do - sleep 1 - done + cylc__job__poll_grep_suite_log -F 'Run: (re)start=1' # Should be good to send succeeded message at this point """ __SUITERC__ diff --git a/tests/restart/reload/suite.rc b/tests/restart/reload/suite.rc index f400ec02dab..5fe5c477f97 100644 --- a/tests/restart/reload/suite.rc +++ b/tests/restart/reload/suite.rc @@ -23,8 +23,6 @@ which should run to completion on restarting.""" script = """ if [[ "$CYLC_TASK_CYCLE_POINT" == "$CYLC_SUITE_INITIAL_CYCLE_POINT" ]]; then cylc reload "${CYLC_SUITE_NAME}" - while ! grep -qF 'Reload completed' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 - done + cylc__job__poll_grep_suite_log -F 'Reload completed' cylc stop "${CYLC_SUITE_NAME}" fi""" diff --git a/tests/triggering/19-and-suicide/suite.rc b/tests/triggering/19-and-suicide/suite.rc index 459f4a832bc..0d1dbba8a75 100644 --- a/tests/triggering/19-and-suicide/suite.rc +++ b/tests/triggering/19-and-suicide/suite.rc @@ -14,11 +14,7 @@ [[t0]] # https://github.com/cylc/cylc-flow/issues/2655 # "t2.1" should not suicide on "t1.1:failed" - script = """ -while ! grep -q '\[t1\.1\].*failed' "${CYLC_SUITE_LOG_DIR}/log"; do - sleep 1 -done -""" + script = cylc__job__poll_grep_suite_log '\[t1\.1\].*failed' [[t1]] script = false [[t2]]