Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-23.2: roachtest: no longer show other errors if a test times out #115467

Merged
merged 1 commit into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions pkg/cmd/roachtest/test_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,13 @@ type testImpl struct {
// referencing 0+ errors. failure captures all the errors
failures []failure

// failuresSuppressed indicates if further failures should be added to mu.failures.
failuresSuppressed bool

// numFailures is the number of failures that have been added via addFailures.
// This can deviate from len(failures) if failures have been suppressed.
numFailures int

// status is a map from goroutine id to status set by that goroutine. A
// special goroutine is indicated by runnerID; that one provides the test's
// "main status".
Expand Down Expand Up @@ -393,13 +400,16 @@ func (t *testImpl) addFailure(depth int, format string, args ...interface{}) {
t.mu.Lock()
defer t.mu.Unlock()

t.mu.failures = append(t.mu.failures, reportFailure)
if !t.mu.failuresSuppressed {
t.mu.failures = append(t.mu.failures, reportFailure)
}

var b strings.Builder
formatFailure(&b, reportFailure)
msg := b.String()

failureNum := len(t.mu.failures)
t.mu.numFailures++
failureNum := t.mu.numFailures
failureLog := fmt.Sprintf("failure_%d", failureNum)
t.L().Printf("test failure #%d: full stack retained in %s.log: %s", failureNum, failureLog, msg)
// Also dump the verbose error (incl. all stack traces) to a log file, in case
Expand All @@ -425,6 +435,16 @@ func (t *testImpl) addFailure(depth int, format string, args ...interface{}) {
t.mu.output = append(t.mu.output, '\n')
}

// suppressFailures will stop future failures from being surfaced to github posting
// or the test logger. It will not stop those failures from being logged in their
// own failure.log files. Used if we are confident on the root cause of a failure and
// want to reduce noise of other failures, i.e. timeouts.
func (t *testImpl) suppressFailures() {
t.mu.Lock()
defer t.mu.Unlock()
t.mu.failuresSuppressed = true
}

// We take the "squashed" error that contains information of all the errors for each failure.
func formatFailure(b *strings.Builder, reportFailures ...failure) {
for i, failure := range reportFailures {
Expand All @@ -450,7 +470,7 @@ func (t *testImpl) Failed() bool {
}

func (t *testImpl) failedRLocked() bool {
return len(t.mu.failures) > 0
return t.mu.numFailures > 0
}

func (t *testImpl) firstFailure() failure {
Expand Down
3 changes: 3 additions & 0 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -1139,6 +1139,9 @@ func (r *testRunner) runTest(
// NB: We're adding the timeout failure intentionally without cancelling the context
// to capture as much state as possible during artifact collection.
t.addFailure(0, "test timed out (%s)", timeout)
// We suppress other failures from being surfaced to the top as the timeout is always going
// to be the main error and subsequent errors (i.e. context cancelled) add noise.
t.suppressFailures()
timedOut = true
}

Expand Down