Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-23.2: roachtest: randomly run with runtime assertions by default #113752

Merged
merged 2 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,13 @@ case "$component" in
;;
roachtest)
# Roachtest binary.
bazel_args=(//pkg/cmd/roachtest)
# N.B. We always compile the roachtest binary with crdb_test so the same serialization
# on the wire is established with cockroach binaries built under crdb_test.
# E.g. KVNemesisSeq is used only under crdb_test builds. If the cockroach binary is
# built with crdb_test, it will expect this field to be sent by the roachtest runner.
# Note that the opposite is not true, and a cockroach binary built without crdb_test
# is still compatible with a roachtest binary built with it.
bazel_args=(//pkg/cmd/roachtest --crdb_test)
artifacts=("pkg/cmd/roachtest/roachtest_/roachtest:bin/roachtest.$os-$arch")
;;
*)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel

BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \
run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel

BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AZURE_CLIENT_ID -e AZURE_CLIENT_SECRET -e AZURE_SUBSCRIPTION_ID -e AZURE_TENANT_ID -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AZURE_CLIENT_ID -e AZURE_CLIENT_SECRET -e AZURE_SUBSCRIPTION_ID -e AZURE_TENANT_ID -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \
run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel

BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \
run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh
54 changes: 49 additions & 5 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"io"
"io/fs"
"math/rand"
"net"
"net/url"
"os"
Expand Down Expand Up @@ -689,8 +690,13 @@ type clusterImpl struct {
// BAZEL_COVER_DIR will be set to this value when starting a node.
goCoverDir string

os string // OS of the cluster
arch vm.CPUArch // CPU architecture of the cluster
os string // OS of the cluster
arch vm.CPUArch // CPU architecture of the cluster
randomSeed struct {
mu syncutil.Mutex
seed *int64
}

// destroyState contains state related to the cluster's destruction.
destroyState destroyState
}
Expand Down Expand Up @@ -1994,10 +2000,16 @@ func (c *clusterImpl) StartE(

startOpts.RoachprodOpts.EncryptedStores = c.encAtRest

if !envExists(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH") {
// Panic on span use-after-Finish, so we catch such bugs.
settings.Env = append(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH=true")
setUnlessExists := func(name string, value interface{}) {
if !envExists(settings.Env, name) {
settings.Env = append(settings.Env, fmt.Sprintf("%s=%s", name, fmt.Sprint(value)))
}
}
// Panic on span use-after-Finish, so we catch such bugs.
setUnlessExists("COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH", true)
// Set the same seed on every node, to be used by builds with
// runtime assertions enabled.
setUnlessExists("COCKROACH_RANDOM_SEED", c.cockroachRandomSeed())

// Needed for backward-compat on crdb_internal.ranges{_no_leases}.
// Remove in v23.2.
Expand Down Expand Up @@ -2062,6 +2074,38 @@ func (c *clusterImpl) RefetchCertsFromNode(ctx context.Context, node int) error
})
}

// SetRandomSeed sets the random seed to be used by the cluster. If
// not called, clusters generate a random seed from the global
// generator in the `rand` package. This function must be called
// before any nodes in the cluster start.
func (c *clusterImpl) SetRandomSeed(seed int64) {
c.randomSeed.seed = &seed
}

// cockroachRandomSeed returns the `COCKROACH_RANDOM_SEED` to be used
// by this cluster. The seed may have been previously set by a
// previous call to `StartE`, or by the user via `SetRandomSeed`. If
// not set, this function will generate a seed and return it.
func (c *clusterImpl) cockroachRandomSeed() int64 {
c.randomSeed.mu.Lock()
defer c.randomSeed.mu.Unlock()

// If the user provided a seed via environment variable, always use
// that, even if the test attempts to set a different seed.
if seedStr := os.Getenv(test.EnvAssertionsEnabledSeed); seedStr != "" {
seedOverride, err := strconv.ParseInt(seedStr, 0, 64)
if err != nil {
panic(fmt.Sprintf("error parsing %s: %s", test.EnvAssertionsEnabledSeed, err))
}
c.randomSeed.seed = &seedOverride
} else if c.randomSeed.seed == nil {
seed := rand.Int63()
c.randomSeed.seed = &seed
}

return *c.randomSeed.seed
}

// Start is like StartE() except that it will fatal the test on error.
func (c *clusterImpl) Start(
ctx context.Context,
Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/roachtest/cluster/cluster_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ type Cluster interface {
ctx context.Context, content, dest string, mode os.FileMode, opts ...option.Option,
) error

// SetRandomSeed allows tests to set their own random seed to be
// used by builds with runtime assertions enabled.
SetRandomSeed(seed int64)

// Starting and stopping CockroachDB.

StartE(ctx context.Context, l *logger.Logger, startOpts option.StartOpts, settings install.ClusterSettings, opts ...option.Option) error
Expand Down
6 changes: 5 additions & 1 deletion pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ func (t testWrapper) Cockroach() string {
return "./dummy-path/to/cockroach"
}

func (t testWrapper) CockroachShort() string {
func (t testWrapper) StandardCockroach() string {
return "./dummy-path/to/cockroach"
}

func (t testWrapper) RuntimeAssertionsCockroach() string {
return "./dummy-path/to/cockroach-short"
}

Expand Down
11 changes: 7 additions & 4 deletions pkg/cmd/roachtest/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/tests"
"github.com/cockroachdb/cockroach/pkg/internal/team"
rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
Expand Down Expand Up @@ -139,6 +140,7 @@ func (g *githubIssues) createPostRequest(
spec *registry.TestSpec,
firstFailure failure,
message string,
metamorphicBuild bool,
) (issues.PostRequest, error) {
var mention []string
var projColID int
Expand Down Expand Up @@ -205,9 +207,10 @@ func (g *githubIssues) createPostRequest(
artifacts := fmt.Sprintf("/%s", testName)

clusterParams := map[string]string{
roachtestPrefix("cloud"): spec.Cluster.Cloud,
roachtestPrefix("cpu"): fmt.Sprintf("%d", spec.Cluster.CPUs),
roachtestPrefix("ssd"): fmt.Sprintf("%d", spec.Cluster.SSDs),
roachtestPrefix("cloud"): spec.Cluster.Cloud,
roachtestPrefix("cpu"): fmt.Sprintf("%d", spec.Cluster.CPUs),
roachtestPrefix("ssd"): fmt.Sprintf("%d", spec.Cluster.SSDs),
roachtestPrefix("metamorphicBuild"): fmt.Sprintf("%t", metamorphicBuild),
}
// Emit CPU architecture only if it was specified; otherwise, it's captured below, assuming cluster was created.
if spec.Cluster.Arch != "" {
Expand Down Expand Up @@ -256,7 +259,7 @@ func (g *githubIssues) MaybePost(t *testImpl, l *logger.Logger, message string)
return nil
}

postRequest, err := g.createPostRequest(t.Name(), t.start, t.end, t.spec, t.firstFailure(), message)
postRequest, err := g.createPostRequest(t.Name(), t.start, t.end, t.spec, t.firstFailure(), message, tests.UsingRuntimeAssertions(t))
if err != nil {
return err
}
Expand Down
71 changes: 38 additions & 33 deletions pkg/cmd/roachtest/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ func TestCreatePostRequest(t *testing.T) {
clusterCreationFailed bool
loadTeamsFailed bool
localSSD bool
metamorphicBuild bool
extraLabels []string
arch vm.CPUArch
failure failure
Expand All @@ -140,55 +141,59 @@ func TestCreatePostRequest(t *testing.T) {
expectedSkipTestFailure bool
expectedParams map[string]string
}{
{true, false, false, false, nil, "", createFailure(errors.New("other")), true, false, false,
{true, false, false, false, false, nil, "", createFailure(errors.New("other")), true, false, false,
prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"metamorphicBuild": "false",
}),
},
{true, false, false, true, nil, vm.ArchARM64, createFailure(errClusterProvisioningFailed), true, false, true,
{true, false, false, true, true, nil, vm.ArchARM64, createFailure(errClusterProvisioningFailed), true, false, true,
prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "arm64",
"localSSD": "true",
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "arm64",
"localSSD": "true",
"metamorphicBuild": "true",
}),
},
// Assert that release-blocker label doesn't exist when
// !nonReleaseBlocker and issue is an SSH flake. Also ensure that
// in the event of a failed cluster creation, nil `vmOptions` and
// `clusterImpl` are not dereferenced
{false, true, false, false, nil, "", createFailure(rperrors.ErrSSH255), true, false, true,
{false, true, false, false, false, nil, "", createFailure(rperrors.ErrSSH255), true, false, true,
prefixAll(map[string]string{
"cloud": "gce",
"ssd": "0",
"cpu": "4",
"cloud": "gce",
"ssd": "0",
"cpu": "4",
"metamorphicBuild": "false",
}),
},
//Simulate failure loading TEAMS.yaml
{true, false, true, false, nil, "", createFailure(errors.New("other")), false, false, false, nil},
{true, false, true, false, false, nil, "", createFailure(errors.New("other")), false, false, false, nil},
//Error during post test assertions
{true, false, false, false, nil, "", createFailure(errDuringPostAssertions), false, false, false, nil},
{true, false, false, false, false, nil, "", createFailure(errDuringPostAssertions), false, false, false, nil},
//Error during dns operation
{true, false, false, false, nil, "", createFailure(gce.ErrDNSOperation), true, false, true, nil},
{true, false, false, false, false, nil, "", createFailure(gce.ErrDNSOperation), true, false, true, nil},
// Assert that extra labels in the test spec are added to the issue.
{true, false, false, false, []string{"foo-label"}, "", createFailure(errors.New("other")), true, false, false,
{true, false, false, false, false, []string{"foo-label"}, "", createFailure(errors.New("other")), true, false, false,
prefixAll(map[string]string{
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"cloud": "gce",
"encrypted": "false",
"fs": "ext4",
"ssd": "0",
"cpu": "4",
"arch": "amd64",
"localSSD": "false",
"metamorphicBuild": "false",
}),
},
}
Expand Down Expand Up @@ -239,10 +244,10 @@ func TestCreatePostRequest(t *testing.T) {

if c.loadTeamsFailed {
// Assert that if TEAMS.yaml cannot be loaded then function errors.
_, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, c.failure, "message")
_, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, c.failure, "message", c.metamorphicBuild)
assert.Error(t, err, "Expected an error in createPostRequest when loading teams fails, but got nil")
} else {
req, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, c.failure, "message")
req, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, c.failure, "message", c.metamorphicBuild)
assert.NoError(t, err, "Expected no error in createPostRequest")

r := &issues.Renderer{}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,10 @@ func UploadVersion(
newVersion *Version,
) (string, error) {
dstBinary := BinaryPathForVersion(t, newVersion)
srcBinary := t.Cockroach()
// Run with standard binary as older versions retrieved through roachprod stage
// are not currently available with crdb_test enabled.
// TODO(DarrylWong): Compile older versions with crdb_test flag.
srcBinary := t.StandardCockroach()

overrideBinary, isOverriden := t.VersionsBinaryOverride()[newVersion.String()]
if isOverriden {
Expand Down
9 changes: 9 additions & 0 deletions pkg/cmd/roachtest/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ package main
import (
"context"
"fmt"
"math/rand"
"net/http"
"os"
"os/signal"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logconfig"
"github.com/cockroachdb/cockroach/pkg/util/log/logpb"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
Expand All @@ -41,6 +43,7 @@ var (

parallelism int
cpuQuota int
globalSeed int64

// Path to a local dir where the test logs and artifacts collected from
// cluster will be placed.
Expand Down Expand Up @@ -153,6 +156,9 @@ func addRunBenchCommonFlags(cmd *cobra.Command) {
cmd.Flags().BoolVar(
&forceCloudCompat, "force-cloud-compat", false, "Includes tests that are not marked as compatible with the cloud used")
addSuiteAndOwnerFlags(cmd)
cmd.Flags().Int64Var(
&globalSeed, "global-seed", randutil.NewPseudoSeed(),
"The global random seed used for all tests.")
}

func addRunFlags(runCmd *cobra.Command) {
Expand Down Expand Up @@ -180,6 +186,8 @@ func addBenchFlags(benchCmd *cobra.Command) {
// Assumes initRunFlagsBinariesAndLibraries was called.
func runTests(register func(registry.Registry), filter *registry.TestFilter) error {
r := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg)
//lint:ignore SA1019 deprecated
rand.Seed(globalSeed)

// actual registering of tests
// TODO: don't register if we can't run on the specified registry cloud
Expand Down Expand Up @@ -257,6 +265,7 @@ func runTests(register func(registry.Registry), filter *registry.TestFilter) err
literalArtifactsDir: literalArtifactsDir,
runnerLogPath: runnerLogPath,
}
l.Printf("global random seed: %d", globalSeed)
go func() {
if err := http.ListenAndServe(
fmt.Sprintf(":%d", promPort),
Expand Down
20 changes: 16 additions & 4 deletions pkg/cmd/roachtest/test/test_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,25 @@ import (
// cluster.
const DefaultCockroachPath = "./cockroach-default"

// EnvAssertionsEnabledSeed is the name of the environment variable
// that, when set, causes roachtest to use a binary with runtime
// assertions enabled (if available), using the random seed contained
// in that environment variable.
var EnvAssertionsEnabledSeed = "ROACHTEST_ASSERTIONS_ENABLED_SEED"

// Test is the interface through which roachtests interact with the
// test harness.
type Test interface {
Cockroach() string // path to main cockroach binary
// CockroachShort returns the path to cockroach-short binary compiled with
// --crdb_test build tag, or an empty string if no such binary was given.
CockroachShort() string
// StandardCockroach returns path to main cockroach binary, compiled
// without runtime assertions.
StandardCockroach() string
// RuntimeAssertionsCockroach returns the path to cockroach-short
// binary compiled with --crdb_test build tag, or an empty string if
// no such binary was given.
RuntimeAssertionsCockroach() string
// Cockroach returns either StandardCockroach or RuntimeAssertionsCockroach,
// picked randomly.
Cockroach() string
Name() string
BuildVersion() *version.Version
IsBuildVersion(string) bool // "vXX.YY"
Expand Down
Loading