-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
main.go
489 lines (457 loc) · 16.9 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
// Copyright 2016 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
// Command github-post parses the JSON-formatted output from a Go test session,
// as generated by either 'go test -json' or './pkg.test | go tool test2json -t',
// and posts issues for any failed tests to GitHub. If there are no failed
// tests, it assumes that there was a build error and posts the entire log to
// GitHub.
package main
import (
"bufio"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"log"
"os"
"os/exec"
"regexp"
"sort"
"strconv"
"strings"
"time"
"github.com/cockroachdb/cockroach/pkg/cmd/internal/issues"
"github.com/cockroachdb/errors"
)
const (
pkgEnv = "PKG"
)
func main() {
ctx := context.Background()
f := func(ctx context.Context, title, packageName, testName, testMessage, authorEmail string) error {
log.Printf("filing issue with title: %s", title)
req := issues.PostRequest{
// TODO(tbg): actually use this as a template and not a hard-coded
// string.
TitleTemplate: title,
BodyTemplate: issues.UnitTestFailureBody,
PackageName: packageName,
TestName: testName,
Message: testMessage,
Artifacts: "",
AuthorEmail: authorEmail,
}
return issues.Post(ctx, req)
}
if err := listFailures(ctx, os.Stdin, f); err != nil {
log.Fatal(err)
}
}
// This struct is described in the test2json documentation.
// https://golang.org/cmd/test2json/
type testEvent struct {
Action string
Package string
Test string
Output string
Time time.Time // encodes as an RFC3339-format string
Elapsed float64 // seconds
}
type scopedTest struct {
pkg string
name string
}
func scoped(te testEvent) scopedTest {
if te.Package == "" {
return scopedTest{pkg: mustPkgFromEnv(), name: te.Test}
}
return scopedTest{pkg: te.Package, name: te.Test}
}
func mustPkgFromEnv() string {
packageName := maybePkgFromEnv()
if packageName == "" {
panic(errors.Errorf("package name environment variable %s is not set", pkgEnv))
}
return packageName
}
func maybePkgFromEnv() string {
packageName, ok := os.LookupEnv(pkgEnv)
if !ok {
return ""
}
return packageName
}
func trimPkg(pkg string) string {
return strings.TrimPrefix(pkg, issues.CockroachPkgPrefix)
}
func listFailures(
ctx context.Context,
input io.Reader,
f func(ctx context.Context, title, packageName, testName, testMessage, authorEmail string) error,
) error {
// Tests that took less than this are not even considered for slow test
// reporting. This is so that we protect against large number of
// programatically-generated subtests.
const shortTestFilterSecs float64 = 0.5
var timeoutMsg = "panic: test timed out after"
var packageOutput bytes.Buffer
// map from test name to list of events (each log line is an event, plus
// start and pass/fail events).
// Tests/events are "outstanding" until we see a final pass/fail event.
// Because of the way the go test runner prints output, in case a subtest times
// out or panics, we don't get a pass/fail event for sibling and ancestor
// tests. Those tests will remain "outstanding" and will be ignored for the
// purpose of issue reporting.
outstandingOutput := make(map[scopedTest][]testEvent)
failures := make(map[scopedTest][]testEvent)
var slowPassEvents []testEvent
var slowFailEvents []testEvent
// init is true for the preamble of the input before the first "run" test
// event.
init := true
// trustTimestamps will be set if we don't find a marker suggesting that the
// input comes from a stress run. In that case, stress prints all its output
// at once (for a captured failed test run), so the test2json timestamps are
// meaningless.
trustTimestamps := true
// elapsedTotalSec accumulates the time spent in all tests, passing or
// failing. In case the input comes from a stress run, this will be used to
// deduce the duration of a timed out test.
var elapsedTotalSec float64
// Will be set if the last test timed out.
var timedOutCulprit scopedTest
var timedOutEvent testEvent
var curTestStart time.Time
var last scopedTest
var lastEvent testEvent
scanner := bufio.NewScanner(input)
for scanner.Scan() {
var te testEvent
{
line := scanner.Text() // has no EOL marker
if len(line) <= 2 || line[0] != '{' || line[len(line)-1] != '}' {
// This line is not test2json output, skip it. This can happen if
// whatever feeds our input has some extra non-JSON lines such as
// would happen with `make` invocations.
continue
}
if err := json.Unmarshal([]byte(line), &te); err != nil {
return errors.Wrapf(err, "unable to parse %q", line)
}
}
lastEvent = te
if te.Test != "" {
init = false
}
if init && strings.Contains(te.Output, "-exec 'stress '") {
trustTimestamps = false
}
if timedOutCulprit.name == "" && te.Elapsed > 0 {
// We don't count subtests as those are counted in the parent.
if split := strings.SplitN(te.Test, "/", 2); len(split) == 1 {
elapsedTotalSec += te.Elapsed
}
}
if timedOutCulprit.name == te.Test && te.Elapsed != 0 {
te.Elapsed = timedOutEvent.Elapsed
}
// Events for the overall package test do not set Test.
if len(te.Test) > 0 {
switch te.Action {
case "run":
last = scoped(te)
if trustTimestamps {
curTestStart = te.Time
}
case "output":
key := scoped(te)
outstandingOutput[key] = append(outstandingOutput[key], te)
if strings.Contains(te.Output, timeoutMsg) {
timedOutCulprit = key
// Fill in the Elapsed field for a timeout event.
// As of go1.11, the Elapsed field is bogus for fail events for timed
// out tests, so we do our own computation.
// See https://github.com/golang/go/issues/27568
//
// Also, if the input is coming from stress, there will not even be a
// fail event for the test, so the Elapsed field computed here will be
// useful.
if trustTimestamps {
te.Elapsed = te.Time.Sub(curTestStart).Seconds()
} else {
// If we don't trust the timestamps, then we compute the test's
// duration by subtracting all the durations that we've seen so far
// (which we do trust to some extent). Note that this is not
// entirely accurate, since there's no information about the
// duration about sibling subtests which may have run. And further
// note that it doesn't work well at all for small timeouts because
// the resolution that the test durations have is just tens of
// milliseconds, so many quick tests are rounded of to a duration of
// 0.
re := regexp.MustCompile(`panic: test timed out after (\d*(?:\.\d*)?)(.)`)
matches := re.FindStringSubmatch(te.Output)
if matches == nil {
log.Printf("failed to parse timeout message: %s", te.Output)
te.Elapsed = -1
} else {
dur, err := strconv.ParseFloat(matches[1], 64)
if err != nil {
log.Fatal(err)
}
if matches[2] == "m" {
// minutes to seconds
dur *= 60
} else if matches[2] != "s" {
log.Fatalf("unexpected time unit in: %s", te.Output)
}
te.Elapsed = dur - elapsedTotalSec
}
}
timedOutEvent = te
}
case "pass", "skip":
if timedOutCulprit.name != "" {
panic(fmt.Sprintf("detected test timeout but test seems to have passed (%+v)", te))
}
delete(outstandingOutput, scoped(te))
if te.Elapsed > shortTestFilterSecs {
// We ignore subtests; their time contributes to the parent's.
if !strings.Contains(te.Test, "/") {
slowPassEvents = append(slowPassEvents, te)
}
}
case "fail":
key := scoped(te)
// Record slow tests. We ignore subtests; their time contributes to the
// parent's. Except the timed out (sub)test, for which the parent (if
// any) is not going to appear in the report because there's not going
// to be a pass/fail event for it.
if !strings.Contains(te.Test, "/") || timedOutCulprit == key {
slowFailEvents = append(slowFailEvents, te)
}
// Move the test to the failures collection unless the test timed out.
// We have special reporting for timeouts below.
if timedOutCulprit != key {
failures[key] = outstandingOutput[key]
}
delete(outstandingOutput, key)
}
} else if te.Action == "output" {
// Output was outside the context of a test. This consists mostly of the
// preamble and epilogue that Make outputs, but also any log messages that
// are printed by a test binary's main function.
packageOutput.WriteString(te.Output)
}
}
// On timeout, we might or might not have gotten a fail event for the timed
// out test (we seem to get one when processing output from a test binary run,
// but not when processing the output of `stress`, which adds some lines at
// the end). If we haven't gotten a fail event, the test's output is still
// outstanding and the test is not registered in the slowFailEvents
// collection. The timeout handling code below relies on slowFailEvents not
// being empty though, so we'll process the test here.
if timedOutCulprit.name != "" {
if _, ok := outstandingOutput[timedOutCulprit]; ok {
slowFailEvents = append(slowFailEvents, timedOutEvent)
delete(outstandingOutput, timedOutCulprit)
}
} else {
// If we haven't received a final event for the last test, then a
// panic/log.Fatal must have happened. Consider it failed.
// Note that because of https://github.com/golang/go/issues/27582 there
// might be other outstanding tests; we ignore those.
if _, ok := outstandingOutput[last]; ok {
log.Printf("found outstanding output. Considering last test failed: %s", last)
failures[last] = outstandingOutput[last]
}
}
// test2json always puts a fail event last unless it sees a big pass message
// from the test output.
if lastEvent.Action == "fail" && len(failures) == 0 && timedOutCulprit.name == "" {
// If we couldn't find a failing Go test, assume that a failure occurred
// before running Go and post an issue about that.
const unknown = "(unknown)"
packageName := maybePkgFromEnv()
if packageName == "" {
packageName = "unknown"
}
trimmedPkgName := trimPkg(packageName)
title := fmt.Sprintf("%s: package failed", trimmedPkgName)
if err := f(
ctx, title, packageName, unknown, packageOutput.String(), "", /* authorEmail */
); err != nil {
return errors.Wrap(err, "failed to post issue")
}
} else {
for test, testEvents := range failures {
if split := strings.SplitN(test.name, "/", 2); len(split) == 2 {
parentTest, subTest := scopedTest{pkg: test.pkg, name: split[0]}, scopedTest{pkg: test.pkg, name: split[1]}
log.Printf("consolidating failed subtest %q into parent test %q", subTest.name, parentTest.name)
failures[parentTest] = append(failures[parentTest], testEvents...)
delete(failures, test)
} else {
log.Printf("failed parent test %q", test)
}
}
// Sort the failed tests to make the unit tests for this script deterministic.
var failedTestNames []scopedTest
for name := range failures {
failedTestNames = append(failedTestNames, name)
}
sort.Slice(failedTestNames, func(i, j int) bool {
return fmt.Sprint(failedTestNames[i]) < fmt.Sprint(failedTestNames[j])
})
for _, test := range failedTestNames {
testEvents := failures[test]
authorEmail, err := getAuthorEmail(ctx, test.pkg, test.name)
if err != nil {
log.Printf("unable to determine test author email: %s\n", err)
}
var outputs []string
for _, testEvent := range testEvents {
outputs = append(outputs, testEvent.Output)
}
message := strings.Join(outputs, "")
title := fmt.Sprintf("%s: %s failed", trimPkg(test.pkg), test.name)
if err := f(ctx, title, test.pkg, test.name, message, authorEmail); err != nil {
return errors.Wrap(err, "failed to post issue")
}
}
}
// Sort slow tests descendingly by duration.
sort.Slice(slowPassEvents, func(i, j int) bool {
return slowPassEvents[i].Elapsed > slowPassEvents[j].Elapsed
})
sort.Slice(slowFailEvents, func(i, j int) bool {
return slowFailEvents[i].Elapsed > slowFailEvents[j].Elapsed
})
report := genSlowTestsReport(slowPassEvents, slowFailEvents)
if err := writeSlowTestsReport(report); err != nil {
log.Printf("failed to create slow tests report: %s", err)
}
// If the run timed out, file an issue. A couple of cases:
// 1) If the test that was running when the package timed out is the longest
// test, then we blame it. The common case is the test deadlocking - it would
// have run forever.
// 2) Otherwise, we don't blame anybody in particular. We file a generic issue
// listing the package name containing the report of long-running tests.
if timedOutCulprit.name != "" {
slowest := slowFailEvents[0]
if len(slowPassEvents) > 0 && slowPassEvents[0].Elapsed > slowest.Elapsed {
slowest = slowPassEvents[0]
}
if timedOutCulprit == scoped(slowest) {
// The test that was running when the timeout hit is the one that ran for
// the longest time.
authorEmail, err := getAuthorEmail(ctx, timedOutCulprit.pkg, timedOutCulprit.name)
if err != nil {
log.Printf("unable to determine test author email: %s\n", err)
}
title := fmt.Sprintf("%s: %s timed out", trimPkg(timedOutCulprit.pkg), timedOutCulprit.name)
log.Printf("timeout culprit found: %s\n", timedOutCulprit.name)
if err := f(ctx, title, timedOutCulprit.pkg, timedOutCulprit.name, report, authorEmail); err != nil {
return errors.Wrap(err, "failed to post issue")
}
} else {
packageName := maybePkgFromEnv()
if packageName == "" {
packageName = "unknown"
}
trimmedPkgName := trimPkg(packageName)
title := fmt.Sprintf("%s: package timed out", trimmedPkgName)
// Andrei gets these reports for now, but don't think I'll fix anything
// you fools.
// TODO(andrei): Figure out how to assign to the on-call engineer. Maybe
// get their name from the Slack channel?
log.Printf("timeout culprit not found\n")
if err := f(
ctx, title, packageName, "(unknown)" /* testName */, report, "andreimatei1@gmail.com",
); err != nil {
return errors.Wrap(err, "failed to post issue")
}
}
}
return nil
}
func genSlowTestsReport(slowPassingTests, slowFailingTests []testEvent) string {
var b strings.Builder
b.WriteString("Slow failing tests:\n")
for i, te := range slowFailingTests {
if i == 20 {
break
}
fmt.Fprintf(&b, "%s - %.2fs\n", te.Test, te.Elapsed)
}
if len(slowFailingTests) == 0 {
fmt.Fprint(&b, "<none>\n")
}
b.WriteString("\nSlow passing tests:\n")
for i, te := range slowPassingTests {
if i == 20 {
break
}
fmt.Fprintf(&b, "%s - %.2fs\n", te.Test, te.Elapsed)
}
if len(slowPassingTests) == 0 {
fmt.Fprint(&b, "<none>\n")
}
return b.String()
}
func writeSlowTestsReport(report string) error {
return ioutil.WriteFile("artifacts/slow-tests-report.txt", []byte(report), 0644)
}
func getAuthorEmail(ctx context.Context, packageName, testName string) (string, error) {
// Search the source code for the email address of the last committer to touch
// the first line of the source code that contains testName. Then, ask GitHub
// for the GitHub username of the user with that email address by searching
// commits in cockroachdb/cockroach for commits authored by the address.
subtests := strings.Split(testName, "/")
testName = subtests[0]
packageName = strings.TrimPrefix(packageName, "github.com/cockroachdb/cockroach/")
cmd := exec.Command(`/bin/bash`, `-c`,
fmt.Sprintf(`git grep -n "func %s" $(git rev-parse --show-toplevel)/%s/*_test.go`,
testName, packageName))
// This command returns output such as:
// ../ccl/storageccl/export_test.go:31:func TestExportCmd(t *testing.T) {
out, err := cmd.CombinedOutput()
if err != nil {
return "", errors.Errorf("couldn't find test %s in %s: %s %s",
testName, packageName, err, string(out))
}
re := regexp.MustCompile(`(.*):(.*):`)
// The first 2 :-delimited fields are the filename and line number.
matches := re.FindSubmatch(out)
if matches == nil {
return "", errors.Errorf("couldn't find filename/line number for test %s in %s: %s",
testName, packageName, string(out))
}
filename := matches[1]
linenum := matches[2]
// Now run git blame.
cmd = exec.Command(`/bin/bash`, `-c`,
fmt.Sprintf(`git blame --porcelain -L%s,+1 %s | grep author-mail`,
linenum, filename))
// This command returns output such as:
// author-mail <jordan@cockroachlabs.com>
out, err = cmd.CombinedOutput()
if err != nil {
return "", errors.Errorf("couldn't find author of test %s in %s: %s %s",
testName, packageName, err, string(out))
}
re = regexp.MustCompile("author-mail <(.*)>")
matches = re.FindSubmatch(out)
if matches == nil {
return "", errors.Errorf("couldn't find author email of test %s in %s: %s",
testName, packageName, string(out))
}
return string(matches[1]), nil
}