forked from os-autoinst/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopenqa-investigate
executable file
·186 lines (169 loc) · 9.42 KB
/
openqa-investigate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/bin/bash
# Usage
# echo jobnumber | openqa-investigate
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
set -euo pipefail
# shellcheck source=/dev/null
. "$(dirname "$0")"/_common
host="${host:-"openqa.opensuse.org"}"
scheme="${scheme:-"https"}"
host_url="$scheme://$host"
dry_run="${dry_run:-"0"}"
verbose="${verbose:-"false"}"
prio_add="${prio_add:-"100"}"
exclude_name_regex="${exclude_name_regex:-":investigate:"}"
exclude_no_group="${exclude_no_group:-"true"}"
# exclude_group_regex checks a combined string "<parent job group name> / <job group name>"
exclude_group_regex="${exclude_group_regex:-"Development.*/ "}"
client_args=(api --header 'User-Agent: openqa-investigate (https://github.com/os-autoinst/scripts)' --host "$host_url")
jq_output_limit="${jq_output_limit:-15}"
curl_args=(--user-agent "openqa-investigate")
echoerr() { echo "$@" >&2; }
clone() {
local origin id name_suffix refspec job_data unsupported_cluster_jobs name base_prio clone_settings casedir repo out url clone_id
origin=${1:?"Need 'origin'"}
id=${2:?"Need 'id'"}
name_suffix=${3+":$3"}
refspec=${4+$4}
job_data=$(openqa-cli "${client_args[@]}" --json jobs/"$id")
# shellcheck disable=SC2181
[[ $? != 0 ]] && echoerr "unable to query job data for $id: $job_data" && return 1
unsupported_cluster_jobs=$(echo "$job_data" | runjq -r '(.job.children["Parallel"] | length) + (.job.parents["Parallel"] | length) + (.job.children["Directly chained"] | length) + (.job.parents["Directly chained"] | length)') || return $?
[[ $unsupported_cluster_jobs != 0 ]] \
&& echoerr "unable to clone job $id: it is part of a parallel or directly chained cluster (not supported)" && return 2
name="$(echo "$job_data" | runjq -r '.job.test'):investigate$name_suffix" || return $?
base_prio=$(echo "$job_data" | runjq -r '.job.priority') || return $?
clone_settings=("TEST=$name" '_GROUP_ID=0' 'BUILD=')
if [[ $refspec ]]; then
casedir=$(echo "$job_data" | runjq -r '.job.settings.CASEDIR') || return $?
[[ $casedir == null ]] && casedir=''
repo=${casedir:-'https://github.com/os-autoinst/os-autoinst-distri-opensuse.git'}
clone_settings+=("CASEDIR=${repo%#*}#${refspec}")
fi
[[ -n ${*:5} ]] && clone_settings+=("${@:5}")
# clear "PUBLISH_" settings to avoid overriding production assets
# shellcheck disable=SC2207
clone_settings+=($(echo "$job_data" | runjq -r '.job.settings | keys[] | select (startswith("PUBLISH_")) | . + "="')) || return $?
clone_settings+=("OPENQA_INVESTIGATE_ORIGIN=$host_url/t$origin")
out=$($clone_call "$host_url/tests/$id" "${clone_settings[@]}")
[[ $dry_run = 1 ]] && echo "$out"
# output is in $out, should change that text to a markdown list entry
url=$(echo "$out" | sed -n 's/^Created job.*-> //p')
echo "* **$name**: $url"
clone_id=${out/:*/}; clone_id=${clone_id/*#/}
"${client_call[@]}" --json --data "{\"priority\": $((base_prio + prio_add))}" -X PUT jobs/"$clone_id" >/dev/null
}
trigger_jobs() {
id="${1:?"Need 'job_id'"}"
# for 1. current job/build + current test -> check if reproducible/sporadic
clone "$id" "$id" 'retry' '' "${@:2}"
job_url="$host_url/tests/$id"
investigation=$(runcurl "${curl_args[@]}" -s "$job_url"/investigation_ajax) || return $?
last_good_exists=$(echo "$investigation" | runjq -r '.last_good') || return $?
if [[ "$last_good_exists" = "null" || "$last_good_exists" = "not found" ]]; then
echo "No last good recorded, skipping regression investigation jobs" && return 1
fi
last_good=$(echo "$investigation" | runjq -r '.last_good.text') || return $?
[[ ! $last_good =~ ^[0-9]+$ ]] && echo ".last_good.text not found: investigation for test $id returned '$investigation'" >&2 && return 1
# for 2. current job/build + last good test (+ last good needles) ->
# check for test (+needles) regression
test_log=$(echo "$investigation" | runjq -r '.test_log') || return $?
if echo "$test_log" | grep -q "No.*changes recorded"; then
echo "$test_log. Skipping test regression investigation job."
last_good_tests=''
else
vars_last_good=$(runcurl "${curl_args[@]}" -s "$host_url/tests/$last_good"/file/vars.json) || return $?
last_good_tests=$(echo "$vars_last_good" | runjq -r '.TEST_GIT_HASH') || return $?
# here we could apply the same approach for needles, not only tests
# With https://github.com/os-autoinst/os-autoinst/pull/1358 we could
# theoretically use TEST_GIT_REFSPEC but this would act on the shared
# test case dir within either the common openQA folder or the common
# worker cache and hence influence other tests.
# So better we use CASEDIR with a git refspec, only slightly less
# efficient and also needing to know which git repo to use
#refspec_arg="TEST_GIT_REFSPEC=$last_good_tests"
refspec_arg=$last_good_tests
clone "$id" "$id" "last_good_tests:$last_good_tests" "$refspec_arg" "${@:2}"
fi
# 3. last good job/build + current test -> check for product regression
if ! echo "$investigation" | grep -q '\<BUILD\>'; then
echo "Current job has same build as last good, product regression unlikely. Skipping product regression investigation job."
last_good_build=''
else
vars_last_good=${vars_last_good:-$(runcurl "${curl_args[@]}" -s "$host_url/tests/$last_good"/file/vars.json)} || return $?
last_good_build=$(echo "$vars_last_good" | runjq -r '.BUILD') || return $?
# here we clone with unspecified test refspec, i.e. this could be a
# more recent tests version. As an alternative we could explicitly
# checkout the git version from "first bad"
clone "$id" "$last_good" "last_good_build:$last_good_build" '' "${@:2}"
fi
# 4. last good job/build + last good test -> check for other problem
# sources, e.g. infrastructure
if [[ -z $last_good_tests ]]; then
echo "No test regression expected. Not triggered 'good build+test' as it would be the same as 3., good build + current test"
elif [[ -z $last_good_build ]]; then
echo "No product regression expected. Not triggered 'good build+test' as it would be the same as 2., current build + good test"
else
clone "$id" "$last_good" "last_good_tests_and_build:$last_good_tests+$last_good_build" "$refspec_arg" "${@:2}"
fi
}
# crosscheck
# 1. current job/build + current test -> check if reproducible/sporadic
# 2. current job/build + last good test (+ last good needles) -> check for
# test (+needles) regression
# 3. last good job/build + current test -> check for product regression
# 4. last good job/build + last good test -> check for other problem
# sources, e.g. infrastructure
investigate() {
id="${1##*/}"
job_data=$(openqa-cli "${client_args[@]}" --json jobs/"$id")
# shellcheck disable=SC2181
[[ $? != 0 ]] && echoerr "unable to query job data for $id: $job_data" && return 1
old_name="$(echo "$job_data" | runjq -r '.job.test')" || return $?
if [[ "$old_name" =~ ":investigate:" ]]; then
[[ ! "$old_name" =~ investigate:retry$ ]] && echo "Job is ':investigate:' already, skipping investigation" && return 0
investigate_origin="$(echo "$job_data" | runjq -r '.job.settings.OPENQA_INVESTIGATE_ORIGIN')" || return 1
origin_job_id=${investigate_origin#$host_url/t}
# meanwhile the original job might have been deleted already, handle
# gracefully
out=$("${client_call[@]}" -X POST jobs/"$origin_job_id"/comments text="Investigate retry job: $host_url/t$id failed, likely not a sporadic test" 2>/dev/null) || \
if [[ $(echo "$out" | runjq .error_status) != "404" ]]; then
echoerr "Unexpected error encountered when posting comments: '$out'"
return 2
fi
return 0
fi
[[ "$old_name" =~ $exclude_name_regex ]] && echo "Job name '$old_name' matches \$exclude_name_regex '$exclude_name_regex', skipping investigation" && return 0
group="$(echo "$job_data" | runjq -r '.job.parent_group + " / " + .job.group')" || return $?
[[ "$group" = " / " ]] && [[ "$exclude_no_group" = "true" ]] && echo "Job w/o job group, \$exclude_no_group is set, skipping investigation" && return 0
[[ "$group" =~ $exclude_group_regex ]] && echo "Job group '$group' matches \$exclude_group_regex '$exclude_group_regex', skipping investigation" && return 0
# Optionally we can find "first failed", could extend openQA investigation
# method instead for we are just working based on supplied job which can
# have more, ambiguous potential changes that we need to bisect on
out=$(trigger_jobs "$id" "${@:2}")
$verbose && echo "$0, id: '$id', out: '$out'"
[[ $out ]] || return 0
comment="Automatic investigation jobs:
$out"
"${client_call[@]}" -X POST jobs/"$id"/comments text="$comment"
}
main() {
client_prefix=''
[ "$dry_run" = "1" ] && client_prefix="echo"
set +u
if [[ -z "$client_call" ]]; then
client_call=(openqa-cli "${client_args[@]}")
if [[ -n "$client_prefix" ]]; then
client_call=("$client_prefix" "${client_call[@]}")
fi
fi
set -u
clone_call="${clone_call:-"$client_prefix openqa-clone-job --skip-chained-deps --within-instance"}"
error_count=0
# shellcheck disable=SC2013
for i in $(cat - | sed 's/ .*$//'); do
investigate "$i" "$@" || ((error_count++)) ||:
done
exit "$error_count"
}
main "$@"