Skip to content

Commit

Permalink
roachtest: randomly run with runtime assertions by default
Browse files Browse the repository at this point in the history
This changes the semantics of `t.Cockroach()` to use a binary with
runtime assertions enabled by default. Performance tests (i.e., tests
that export to roachperf) will be able to continue using the standard
binary, without runtime assertions, as usual.

This commit also opts-out other tests that cannot easily be run with
runtime assertions enabled, most often due to timeouts.

Resolves cockroachdb#86678.
Informs cockroachdb#94986.

Epic: none

Release note: None
  • Loading branch information
renatolabs committed Mar 15, 2023
1 parent 125aa4a commit 1cb357f
Show file tree
Hide file tree
Showing 38 changed files with 292 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel

BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL" \
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \
run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel

BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL" \
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \
run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh
50 changes: 47 additions & 3 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"io"
"io/fs"
"math/rand"
"net"
"net/url"
"os"
Expand Down Expand Up @@ -660,6 +661,11 @@ type clusterImpl struct {
expiration time.Time
encAtRest bool // use encryption at rest

randomSeed struct {
mu syncutil.Mutex
seed *int64
}

// destroyState contains state related to the cluster's destruction.
destroyState destroyState
}
Expand Down Expand Up @@ -1832,10 +1838,16 @@ func (c *clusterImpl) StartE(

startOpts.RoachprodOpts.EncryptedStores = c.encAtRest

if !envExists(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH") {
// Panic on span use-after-Finish, so we catch such bugs.
settings.Env = append(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH=true")
setUnlessExists := func(name string, value interface{}) {
if !envExists(settings.Env, name) {
settings.Env = append(settings.Env, fmt.Sprintf("%s=%s", name, fmt.Sprint(value)))
}
}
// Panic on span use-after-Finish, so we catch such bugs.
setUnlessExists("COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH", true)
// Set the same seed on every node, to be used by builds with
// runtime assertions enabled.
setUnlessExists("COCKROACH_RANDOM_SEED", c.cockroachRandomSeed())

clusterSettingsOpts := []install.ClusterSettingOption{
install.TagOption(settings.Tag),
Expand Down Expand Up @@ -1884,6 +1896,38 @@ func (c *clusterImpl) RefetchCertsFromNode(ctx context.Context, node int) error
})
}

// SetRandomSeed sets the random seed to be used by the cluster. If
// not called, clusters generate a random seed from the global
// generator in the `rand` package. This function must be called
// before any nodes in the cluster start.
func (c *clusterImpl) SetRandomSeed(seed int64) {
c.randomSeed.seed = &seed
}

// cockroachRandomSeed returns the `COCKROACH_RANDOM_SEED` to be used
// by this cluster. The seed may have been previously set by a
// previous call to `StartE`, or by the user via `SetRandomSeed`. If
// not set, this function will generate a seed and return it.
func (c *clusterImpl) cockroachRandomSeed() int64 {
c.randomSeed.mu.Lock()
defer c.randomSeed.mu.Unlock()

// If the user provided a seed via environment variable, always use
// that, even if the test attempts to set a different seed.
if seedStr := os.Getenv(test.EnvAssertionsEnabledSeed); seedStr != "" {
seedOverride, err := strconv.ParseInt(seedStr, 0, 64)
if err != nil {
panic(fmt.Sprintf("error parsing %s: %s", test.EnvAssertionsEnabledSeed, err))
}
c.randomSeed.seed = &seedOverride
} else if c.randomSeed.seed == nil {
seed := rand.Int63()
c.randomSeed.seed = &seed
}

return *c.randomSeed.seed
}

// Start is like StartE() except that it will fatal the test on error.
func (c *clusterImpl) Start(
ctx context.Context,
Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/roachtest/cluster/cluster_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ type Cluster interface {
ctx context.Context, content, dest string, mode os.FileMode, opts ...option.Option,
) error

// SetRandomSeed allows tests to set their own random seed to be
// used by builds with runtime assertions enabled.
SetRandomSeed(seed int64)

// Starting and stopping CockroachDB.

StartE(ctx context.Context, l *logger.Logger, startOpts option.StartOpts, settings install.ClusterSettings, opts ...option.Option) error
Expand Down
6 changes: 5 additions & 1 deletion pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@ func (t testWrapper) Cockroach() string {
return "./dummy-path/to/cockroach"
}

func (t testWrapper) CockroachShort() string {
func (t testWrapper) StandardCockroach() string {
return "./dummy-path/to/cockroach"
}

func (t testWrapper) RuntimeAssertionsCockroach() string {
return "./dummy-path/to/cockroach-short"
}

Expand Down
6 changes: 5 additions & 1 deletion pkg/cmd/roachtest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ func parseCreateOpts(flags *pflag.FlagSet, opts *vm.CreateOpts) {
}

func main() {
rand.Seed(timeutil.Now().UnixNano())
globalSeed := timeutil.Now().UnixNano()
rand.Seed(globalSeed)
username := os.Getenv("ROACHPROD_USER")
parallelism := 10
var cpuQuota int
Expand Down Expand Up @@ -232,6 +233,7 @@ runner itself.
return runTests(tests.RegisterTests, cliCfg{
args: args,
count: count,
globalSeed: globalSeed,
cpuQuota: cpuQuota,
runSkipped: runSkipped,
debugMode: debugModeFromOpts(),
Expand Down Expand Up @@ -370,6 +372,7 @@ runner itself.
type cliCfg struct {
args []string
count int
globalSeed int64
cpuQuota int
debugMode debugMode
runSkipped bool
Expand Down Expand Up @@ -450,6 +453,7 @@ func runTests(register func(registry.Registry), cfg cliCfg) error {
literalArtifactsDir: cfg.literalArtifactsDir,
runnerLogPath: runnerLogPath,
}
l.Printf("global random seed: %d", cfg.globalSeed)
go func() {
if err := http.ListenAndServe(
fmt.Sprintf(":%d", cfg.promPort),
Expand Down
20 changes: 16 additions & 4 deletions pkg/cmd/roachtest/test/test_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,25 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/version"
)

// EnvAssertionsEnabledSeed is the name of the environment variable
// that, when set, causes roachtest to use a binary with runtime
// assertions enabled (if available), using the random seed contained
// in that environment variable.
var EnvAssertionsEnabledSeed = "ROACHTEST_ASSERTIONS_ENABLED_SEED"

// Test is the interface through which roachtests interact with the
// test harness.
type Test interface {
Cockroach() string // path to main cockroach binary
// CockroachShort returns the path to cockroach-short binary compiled with
// --crdb_test build tag, or an empty string if no such binary was given.
CockroachShort() string
// StandardCockroach returns path to main cockroach binary, compiled
// without runtime assertions.
StandardCockroach() string
// RuntimeAssertionsCockroach returns the path to cockroach-short
// binary compiled with --crdb_test build tag, or an empty string if
// no such binary was given.
RuntimeAssertionsCockroach() string
// Cockroach returns either StandardCockroach or RuntimeAssertionsCockroach,
// picked randomly.
Cockroach() string
Name() string
BuildVersion() *version.Version
IsBuildVersion(string) bool // "vXX.YY"
Expand Down
46 changes: 42 additions & 4 deletions pkg/cmd/roachtest/test_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ import (
"context"
"fmt"
"io"
"math/rand"
"os"
"strings"
"sync"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
Expand Down Expand Up @@ -54,8 +57,12 @@ type failure struct {
type testImpl struct {
spec *registry.TestSpec

cockroach string // path to main cockroach binary
cockroachShort string // path to cockroach-short binary compiled with --crdb_test build tag
cockroach string // path to main cockroach binary
cockroachShort string // path to cockroach-short binary compiled with --crdb_test build tag

randomCockroachOnce sync.Once
randomizedCockroach string // either `cockroach` or `cockroach-short`, picked randomly

deprecatedWorkload string // path to workload binary
debug bool // whether the test is in debug mode.
// buildVersion is the version of the Cockroach binary that the test will run
Expand Down Expand Up @@ -123,14 +130,45 @@ func (t *testImpl) BuildVersion() *version.Version {
return t.buildVersion
}

// Cockroach will return either `RuntimeAssertionsCockroach()` or
// `StandardCockroach()`, picked randomly. Once a random choice has
// been made, the same binary will be returned on every call to
// `Cockroach`, to avoid errors that may arise from binaries having a
// different value for metamorphic constants.
func (t *testImpl) Cockroach() string {
return t.cockroach
t.randomCockroachOnce.Do(func() {
assertionsEnabledProbability := 0.5
// If the user specified a custom seed to be used with runtime
// assertions, assume they want to run the test with assertions
// enabled, making it easier to reproduce issues.
if os.Getenv(test.EnvAssertionsEnabledSeed) != "" {
assertionsEnabledProbability = 1
}

if rand.Float64() < assertionsEnabledProbability {
// The build with runtime assertions should exist in every nightly
// CI build, but we can't assume it exists in every roachtest
// call.
if path := t.RuntimeAssertionsCockroach(); path != "" {
t.randomizedCockroach = path
} else {
t.l.Printf("WARNING: running without runtime assertions since the corresponding binary was not passed")
}
}
t.randomizedCockroach = t.StandardCockroach()
})

return t.randomizedCockroach
}

func (t *testImpl) CockroachShort() string {
func (t *testImpl) RuntimeAssertionsCockroach() string {
return t.cockroachShort
}

func (t *testImpl) StandardCockroach() string {
return t.cockroach
}

func (t *testImpl) DeprecatedWorkload() string {
return t.deprecatedWorkload
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import (
func registerMultiStoreOverload(r registry.Registry) {
runKV := func(ctx context.Context, t test.Test, c cluster.Cluster) {
nodes := c.Spec().NodeCount - 1
c.Put(ctx, t.Cockroach(), "./cockroach", c.Range(1, nodes))
c.Put(ctx, t.StandardCockroach(), "./cockroach", c.Range(1, nodes))
c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.Node(nodes+1))
startOpts := option.DefaultStartOptsNoBackups()
startOpts.RoachprodOpts.StoreCount = 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func runMultiTenantFairness(
}

t.L().Printf("starting cockroach securely (<%s)", time.Minute)
c.Put(ctx, t.Cockroach(), "./cockroach")
c.Put(ctx, t.StandardCockroach(), "./cockroach")
c.Start(ctx, t.L(),
option.DefaultStartOptsNoBackups(),
install.MakeClusterSettings(install.SecureOption(true)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func registerSnapshotOverload(r registry.Registry) {
t.Fatalf("expected at least 4 nodes, found %d", c.Spec().NodeCount)
}

c.Put(ctx, t.Cockroach(), "./cockroach", c.All())
c.Put(ctx, t.StandardCockroach(), "./cockroach", c.All())
crdbNodes := c.Spec().NodeCount - 1
workloadNode := crdbNodes + 1
for i := 1; i <= crdbNodes; i++ {
Expand Down
8 changes: 5 additions & 3 deletions pkg/cmd/roachtest/tests/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,11 @@ func registerAllocator(r registry.Registry) {
},
})
r.Add(registry.TestSpec{
Name: `replicate/wide`,
Owner: registry.OwnerKV,
Timeout: 10 * time.Minute,
Name: `replicate/wide`,
Owner: registry.OwnerKV,
// Allow a longer running time to account for runs that use a
// cockroach build with runtime assertions enabled.
Timeout: 30 * time.Minute,
Cluster: r.MakeClusterSpec(9, spec.CPU(1)),
Run: runWideReplication,
})
Expand Down
33 changes: 24 additions & 9 deletions pkg/cmd/roachtest/tests/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,29 @@ func destinationName(c cluster.Cluster) string {
}

func importBankDataSplit(
ctx context.Context, rows, ranges int, t test.Test, c cluster.Cluster,
ctx context.Context, rows, ranges int, t test.Test, c cluster.Cluster, perfRun bool,
) string {
dest := destinationName(c)

cockroach := t.Cockroach()
startOpts := option.DefaultStartOptsNoBackups()
if perfRun {
cockroach = t.StandardCockroach()
} else if usingRuntimeAssertions(t) {
// When running these imports tests with runtime assertions
// enabled, increase SQL's memory budget to avoid 'budget
// exceeded' failures.
startOpts.RoachprodOpts.ExtraArgs = append(
startOpts.RoachprodOpts.ExtraArgs,
"--max-sql-memory=50%",
)
}
c.Put(ctx, cockroach, "./cockroach")
c.Put(ctx, t.DeprecatedWorkload(), "./workload")
c.Put(ctx, t.Cockroach(), "./cockroach")

// NB: starting the cluster creates the logs dir as a side effect,
// needed below.
c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings())
c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings())
runImportBankDataSplit(ctx, rows, ranges, t, c)
return dest
}
Expand Down Expand Up @@ -174,8 +187,10 @@ func runImportBankDataSplit(ctx context.Context, rows, ranges int, t test.Test,
c.Run(ctx, c.Node(importNode), importBankCommand("./cockroach", rows, ranges, csvPort, importNode))
}

func importBankData(ctx context.Context, rows int, t test.Test, c cluster.Cluster) string {
return importBankDataSplit(ctx, rows, 0 /* ranges */, t, c)
func importBankData(
ctx context.Context, rows int, t test.Test, c cluster.Cluster, perfRun bool,
) string {
return importBankDataSplit(ctx, rows, 0 /* ranges */, t, c, perfRun)
}

func registerBackupNodeShutdown(r registry.Registry) {
Expand All @@ -190,7 +205,7 @@ func registerBackupNodeShutdown(r registry.Registry) {
// works so the job doesn't complete immediately.
rows = rows5GiB
}
return importBankData(ctx, rows, t, c)
return importBankData(ctx, rows, t, c, false /* perfRun */)
}

r.Add(registry.TestSpec{
Expand Down Expand Up @@ -372,7 +387,7 @@ func registerBackup(r registry.Registry) {
if c.IsLocal() {
rows = 100
}
dest := importBankData(ctx, rows, t, c)
dest := importBankData(ctx, rows, t, c, true /* perfRun */)
tick, perfBuf := initBulkJobPerfArtifacts("backup/2TB", 2*time.Hour)

m := c.NewMonitor(ctx)
Expand Down Expand Up @@ -421,7 +436,7 @@ func registerBackup(r registry.Registry) {

rows := 100

dest := importBankData(ctx, rows, t, c)
dest := importBankData(ctx, rows, t, c, false /* perfRun */)

var backupPath, kmsURI string
var err error
Expand Down Expand Up @@ -529,7 +544,7 @@ func registerBackup(r registry.Registry) {
if c.IsLocal() {
rows = 100
}
dest := importBankData(ctx, rows, t, c)
dest := importBankData(ctx, rows, t, c, false /* perfRun */)

conn := c.Conn(ctx, t.L(), 1)
m := c.NewMonitor(ctx)
Expand Down
Loading

0 comments on commit 1cb357f

Please sign in to comment.