Skip to content

Commit

Permalink
roachtest: route runtime assertion build timeouts to test-eng
Browse files Browse the repository at this point in the history
As we determine the stability of runtime assertions, temporarily
route all timeout failures to test-eng to reduce noise for other
teams.
  • Loading branch information
DarrylWong committed Aug 12, 2024
1 parent 1a723bb commit 5886268
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 68 deletions.
58 changes: 43 additions & 15 deletions pkg/cmd/roachtest/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,11 @@ func generateHelpCommand(
}
}

func failuresAsErrorWithOwnership(failures []failure) *registry.ErrorWithOwnership {
var transientError rperrors.TransientError
func failuresAsErrorWithOwnership(
failures []failure, isRuntimeAssertionsBuild bool,
) *registry.ErrorWithOwnership {
var err registry.ErrorWithOwnership
var transientError rperrors.TransientError
if failuresMatchingError(failures, &transientError) {
err = registry.ErrorWithOwner(
registry.OwnerTestEng, transientError,
Expand All @@ -95,6 +97,19 @@ func failuresAsErrorWithOwnership(failures []failure) *registry.ErrorWithOwnersh
return &err
}

// Temporarily route all runtime assertion timeouts to test-eng while
// we gauge the frequency they occur and adjust test timeouts accordingly.
// TODO(darryl): once we are more confident in the stability of runtime
// assertions we can remove this.
var timeoutError registry.TimeoutError
if isRuntimeAssertionsBuild && failuresMatchingError(failures, &timeoutError) {
err = registry.ErrorWithOwner(
registry.OwnerTestEng, timeoutError,
)

return &err
}

if errWithOwner := failuresSpecifyOwner(failures); errWithOwner != nil {
return errWithOwner
}
Expand Down Expand Up @@ -161,6 +176,7 @@ func (g *githubIssues) createPostRequest(
failures []failure,
message string,
sideEyeTimeoutSnapshotURL string,
runtimeAssertionsBuild bool,
metamorphicBuild bool,
coverageBuild bool,
) (issues.PostRequest, error) {
Expand Down Expand Up @@ -191,14 +207,15 @@ func (g *githubIssues) createPostRequest(
// If we find a failure that was labeled as a roachprod transient
// error, redirect that to Test Eng with the corresponding label as
// title override.
errWithOwner := failuresAsErrorWithOwnership(failures)
errWithOwner := failuresAsErrorWithOwnership(failures, runtimeAssertionsBuild)
if errWithOwner != nil {
handleErrorWithOwnership(*errWithOwner)
}

// Issues posted from roachtest are identifiable as such, and they are also release blockers
// (this label may be removed by a human upon closer investigation).
const infraFlakeLabel = "X-infra-flake"
const runtimeAssertionsLabel = "B-runtime-assertions-enabled"
const metamorphicLabel = "B-metamorphic-enabled"
const coverageLabel = "B-coverage-enabled"
labels := []string{"O-roachtest"}
Expand All @@ -208,10 +225,13 @@ func (g *githubIssues) createPostRequest(
labels = append(labels, issues.TestFailureLabel)
if !spec.NonReleaseBlocker {
// TODO(radu): remove this check once these build types are stabilized.
if !metamorphicBuild && !coverageBuild {
if !metamorphicBuild && !coverageBuild && !runtimeAssertionsBuild {
labels = append(labels, issues.ReleaseBlockerLabel)
}
}
if runtimeAssertionsBuild {
labels = append(labels, runtimeAssertionsLabel)
}
if metamorphicBuild {
labels = append(labels, metamorphicLabel)
}
Expand Down Expand Up @@ -247,11 +267,12 @@ func (g *githubIssues) createPostRequest(
artifacts := fmt.Sprintf("/%s", testName)

clusterParams := map[string]string{
roachtestPrefix("cloud"): roachtestflags.Cloud.String(),
roachtestPrefix("cpu"): fmt.Sprintf("%d", spec.Cluster.CPUs),
roachtestPrefix("ssd"): fmt.Sprintf("%d", spec.Cluster.SSDs),
roachtestPrefix("metamorphicBuild"): fmt.Sprintf("%t", metamorphicBuild),
roachtestPrefix("coverageBuild"): fmt.Sprintf("%t", coverageBuild),
roachtestPrefix("cloud"): roachtestflags.Cloud.String(),
roachtestPrefix("cpu"): fmt.Sprintf("%d", spec.Cluster.CPUs),
roachtestPrefix("ssd"): fmt.Sprintf("%d", spec.Cluster.SSDs),
roachtestPrefix("runtimeAssertionsBuild"): fmt.Sprintf("%t", runtimeAssertionsBuild),
roachtestPrefix("metamorphicBuild"): fmt.Sprintf("%t", metamorphicBuild),
roachtestPrefix("coverageBuild"): fmt.Sprintf("%t", coverageBuild),
}
// Emit CPU architecture only if it was specified; otherwise, it's captured below, assuming cluster was created.
if spec.Cluster.Arch != "" {
Expand Down Expand Up @@ -292,6 +313,12 @@ func (g *githubIssues) createPostRequest(
"non-metamorphic run, there should be a similar issue without the "+metamorphicLabel+" label. If there "+
"isn't one, it is possible that this failure is caused by a metamorphic constant.")
}
if runtimeAssertionsBuild {
topLevelNotes = append(topLevelNotes,
"This build has runtime assertions enabled. If the same failure was hit in a "+
"non-cockroachEA run, there should be a similar issue without the "+runtimeAssertionsLabel+" label. If there "+
"isn't one, it is possible that this failure hit an assertion or is related to the assertions overhead.")
}

sideEyeMsg := ""
if sideEyeTimeoutSnapshotURL != "" {
Expand All @@ -305,7 +332,7 @@ func (g *githubIssues) createPostRequest(
TestName: issueName,
Labels: labels,
// Keep issues separate unless the if these labels don't match.
AdoptIssueLabelMatchSet: []string{infraFlakeLabel, coverageLabel, metamorphicLabel},
AdoptIssueLabelMatchSet: []string{infraFlakeLabel, coverageLabel, metamorphicLabel, runtimeAssertionsLabel},
TopLevelNotes: topLevelNotes,
Message: issueMessage,
Artifacts: artifacts,
Expand All @@ -325,19 +352,20 @@ func (g *githubIssues) MaybePost(
return nil, nil
}

var metamorphicBuild bool
var runtimeAssertionsBuild bool
switch t.spec.CockroachBinary {
case registry.StandardCockroach:
metamorphicBuild = false
runtimeAssertionsBuild = false
case registry.RuntimeAssertionsCockroach:
metamorphicBuild = true
runtimeAssertionsBuild = true
default:
metamorphicBuild = tests.UsingRuntimeAssertions(t)
runtimeAssertionsBuild = tests.UsingRuntimeAssertions(t)
}
postRequest, err := g.createPostRequest(
t.Name(), t.start, t.end, t.spec, t.failures(),
message, sideEyeTimeoutSnapshotURL,
metamorphicBuild, t.goCoverEnabled)
runtimeAssertionsBuild, t.metamorphicConstantsEnabled, t.goCoverEnabled)

if err != nil {
return nil, err
}
Expand Down
153 changes: 102 additions & 51 deletions pkg/cmd/roachtest/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ func TestCreatePostRequest(t *testing.T) {
clusterCreationFailed bool
loadTeamsFailed bool
localSSD bool
runtimeAssertionsBuild bool
metamorphicBuild bool
coverageBuild bool
extraLabels []string
Expand All @@ -161,15 +162,16 @@ func TestCreatePostRequest(t *testing.T) {
expectedTeam: "@cockroachdb/unowned",
expectedName: testName,
expectedParams: prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"metamorphicBuild": "false",
"coverageBuild": "false",
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"runtimeAssertionsBuild": "false",
"metamorphicBuild": "false",
"coverageBuild": "false",
}),
},
// 2.
Expand All @@ -186,15 +188,16 @@ func TestCreatePostRequest(t *testing.T) {
expectedName: "cluster_creation",
expectedMessagePrefix: testName + " failed",
expectedParams: prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "arm64",
"localSSD": "true",
"metamorphicBuild": "true",
"coverageBuild": "false",
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "arm64",
"localSSD": "true",
"runtimeAssertionsBuild": "false",
"metamorphicBuild": "true",
"coverageBuild": "false",
}),
},
// 3. Assert that release-blocker label doesn't exist when
Expand All @@ -210,11 +213,12 @@ func TestCreatePostRequest(t *testing.T) {
expectedName: "ssh_problem",
expectedMessagePrefix: testName + " failed",
expectedParams: prefixAll(map[string]string{
"cloud": "gce",
"ssd": "0",
"cpu": "4",
"metamorphicBuild": "false",
"coverageBuild": "false",
"cloud": "gce",
"ssd": "0",
"cpu": "4",
"runtimeAssertionsBuild": "false",
"metamorphicBuild": "false",
"coverageBuild": "false",
}),
},
// 4. Simulate failure loading TEAMS.yaml
Expand Down Expand Up @@ -243,15 +247,16 @@ func TestCreatePostRequest(t *testing.T) {
expectedTeam: "@cockroachdb/unowned",
expectedName: testName,
expectedParams: prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"metamorphicBuild": "false",
"coverageBuild": "false",
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"runtimeAssertionsBuild": "false",
"metamorphicBuild": "false",
"coverageBuild": "false",
}),
},
// 7. Verify that release-blocker label is not applied on metamorphic builds
Expand All @@ -264,15 +269,16 @@ func TestCreatePostRequest(t *testing.T) {
expectedTeam: "@cockroachdb/unowned",
expectedName: testName,
expectedParams: prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"metamorphicBuild": "true",
"coverageBuild": "false",
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"runtimeAssertionsBuild": "false",
"metamorphicBuild": "true",
"coverageBuild": "false",
}),
},
// 8. Verify that release-blocker label is not applied on coverage builds (for
Expand All @@ -286,15 +292,16 @@ func TestCreatePostRequest(t *testing.T) {
expectedName: testName,
expectedLabels: []string{"C-test-failure", "B-coverage-enabled", "foo-label"},
expectedParams: prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"metamorphicBuild": "false",
"coverageBuild": "true",
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"runtimeAssertionsBuild": "false",
"metamorphicBuild": "false",
"coverageBuild": "true",
}),
},
// 9. Verify preemption failure are routed to test-eng and marked as infra-flake, when the
Expand Down Expand Up @@ -398,6 +405,50 @@ func TestCreatePostRequest(t *testing.T) {
expectedTeam: "@cockroachdb/unowned",
sideEyeURL: "https://app.side-eye.io/snapshots/1",
},
// 17. Verify that runtime assertion builds get the runtime-assertions label.
{
extraLabels: []string{"foo-label"},
runtimeAssertionsBuild: true,
failures: []failure{createFailure(errors.New("random"))},
expectedPost: true,
expectedTeam: "@cockroachdb/unowned",
expectedName: testName,
expectedLabels: []string{"C-test-failure", "B-runtime-assertions-enabled", "foo-label"},
expectedParams: prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"runtimeAssertionsBuild": "true",
"metamorphicBuild": "false",
"coverageBuild": "false",
}),
},
// 18. Verify that runtime assertion timeouts are routed to test-eng.
{
extraLabels: []string{"foo-label"},
runtimeAssertionsBuild: true,
failures: []failure{createFailure(registry.TimeoutFailure(1 * time.Hour))},
expectedPost: true,
expectedTeam: "@cockroachdb/test-eng",
expectedName: testName,
expectedLabels: []string{"T-testeng", "C-test-failure", "B-runtime-assertions-enabled", "foo-label"},
expectedParams: prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"runtimeAssertionsBuild": "true",
"metamorphicBuild": "false",
"coverageBuild": "false",
}),
},
}

reg := makeTestRegistry()
Expand Down Expand Up @@ -446,7 +497,7 @@ func TestCreatePostRequest(t *testing.T) {

req, err := github.createPostRequest(
testName, ti.start, ti.end, testSpec, testCase.failures,
testCase.message, testCase.sideEyeURL, testCase.metamorphicBuild, testCase.coverageBuild,
testCase.message, testCase.sideEyeURL, testCase.runtimeAssertionsBuild, testCase.metamorphicBuild, testCase.coverageBuild,
)
if testCase.loadTeamsFailed {
// Assert that if TEAMS.yaml cannot be loaded then function errors.
Expand Down
14 changes: 14 additions & 0 deletions pkg/cmd/roachtest/registry/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ package registry
import (
"errors"
"fmt"
"time"
)

type (
Expand Down Expand Up @@ -65,3 +66,16 @@ func ErrorWithOwner(owner Owner, err error, opts ...errorOption) ErrorWithOwners

return result
}

// TimeoutError denotes a failure due to a test timing out.
type TimeoutError struct {
timeout time.Duration
}

func TimeoutFailure(timeout time.Duration) TimeoutError {
return TimeoutError{timeout: timeout}
}

func (te TimeoutError) Error() string {
return fmt.Sprintf("test timed out (%s)", te.timeout)
}
Loading

0 comments on commit 5886268

Please sign in to comment.