Skip to content

Commit

Permalink
roachtest: randomly run with runtime assertions by default
Browse files Browse the repository at this point in the history
This changes the semantics of `t.Cockroach()` to use a binary with
runtime assertions enabled by default. Performance tests (indicated by
the benchmark TestSpec) will continue using the standard binary, without
runtime assertions, as usual.

This commit also opts-out other tests that cannot easily be run with
runtime assertions enabled, most often due to timeouts.

Resolves cockroachdb#86678.
Informs cockroachdb#94986.

Epic: none

Release note: None
  • Loading branch information
renatolabs authored and DarrylWong committed Oct 17, 2023
1 parent 46c9273 commit 325f77c
Show file tree
Hide file tree
Showing 25 changed files with 279 additions and 65 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel

BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \
run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))"
source "$dir/teamcity-support.sh" # For $root
source "$dir/teamcity-bazel-support.sh" # For run_bazel

BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \
BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \
run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh
54 changes: 49 additions & 5 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"fmt"
"io"
"io/fs"
"math/rand"
"net"
"net/url"
"os"
Expand Down Expand Up @@ -689,8 +690,13 @@ type clusterImpl struct {
// BAZEL_COVER_DIR will be set to this value when starting a node.
goCoverDir string

os string // OS of the cluster
arch vm.CPUArch // CPU architecture of the cluster
os string // OS of the cluster
arch vm.CPUArch // CPU architecture of the cluster
randomSeed struct {
mu syncutil.Mutex
seed *int64
}

// destroyState contains state related to the cluster's destruction.
destroyState destroyState
}
Expand Down Expand Up @@ -2003,10 +2009,16 @@ func (c *clusterImpl) StartE(

startOpts.RoachprodOpts.EncryptedStores = c.encAtRest

if !envExists(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH") {
// Panic on span use-after-Finish, so we catch such bugs.
settings.Env = append(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH=true")
setUnlessExists := func(name string, value interface{}) {
if !envExists(settings.Env, name) {
settings.Env = append(settings.Env, fmt.Sprintf("%s=%s", name, fmt.Sprint(value)))
}
}
// Panic on span use-after-Finish, so we catch such bugs.
setUnlessExists("COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH", true)
// Set the same seed on every node, to be used by builds with
// runtime assertions enabled.
setUnlessExists("COCKROACH_RANDOM_SEED", c.cockroachRandomSeed())

// Needed for backward-compat on crdb_internal.ranges{_no_leases}.
// Remove in v23.2.
Expand Down Expand Up @@ -2071,6 +2083,38 @@ func (c *clusterImpl) RefetchCertsFromNode(ctx context.Context, node int) error
})
}

// SetRandomSeed sets the random seed to be used by the cluster. If
// not called, clusters generate a random seed from the global
// generator in the `rand` package. This function must be called
// before any nodes in the cluster start.
func (c *clusterImpl) SetRandomSeed(seed int64) {
c.randomSeed.seed = &seed
}

// cockroachRandomSeed returns the `COCKROACH_RANDOM_SEED` to be used
// by this cluster. The seed may have been previously set by a
// previous call to `StartE`, or by the user via `SetRandomSeed`. If
// not set, this function will generate a seed and return it.
func (c *clusterImpl) cockroachRandomSeed() int64 {
c.randomSeed.mu.Lock()
defer c.randomSeed.mu.Unlock()

// If the user provided a seed via environment variable, always use
// that, even if the test attempts to set a different seed.
if seedStr := os.Getenv(test.EnvAssertionsEnabledSeed); seedStr != "" {
seedOverride, err := strconv.ParseInt(seedStr, 0, 64)
if err != nil {
panic(fmt.Sprintf("error parsing %s: %s", test.EnvAssertionsEnabledSeed, err))
}
c.randomSeed.seed = &seedOverride
} else if c.randomSeed.seed == nil {
seed := rand.Int63()
c.randomSeed.seed = &seed
}

return *c.randomSeed.seed
}

// Start is like StartE() except that it will fatal the test on error.
func (c *clusterImpl) Start(
ctx context.Context,
Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/roachtest/cluster/cluster_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ type Cluster interface {
ctx context.Context, content, dest string, mode os.FileMode, opts ...option.Option,
) error

// SetRandomSeed allows tests to set their own random seed to be
// used by builds with runtime assertions enabled.
SetRandomSeed(seed int64)

// Starting and stopping CockroachDB.

StartE(ctx context.Context, l *logger.Logger, startOpts option.StartOpts, settings install.ClusterSettings, opts ...option.Option) error
Expand Down
6 changes: 5 additions & 1 deletion pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ func (t testWrapper) Cockroach() string {
return "./dummy-path/to/cockroach"
}

func (t testWrapper) CockroachShort() string {
func (t testWrapper) StandardCockroach() string {
return "./dummy-path/to/cockroach"
}

func (t testWrapper) RuntimeAssertionsCockroach() string {
return "./dummy-path/to/cockroach-short"
}

Expand Down
9 changes: 9 additions & 0 deletions pkg/cmd/roachtest/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ package main
import (
"context"
"fmt"
"math/rand"
"net/http"
"os"
"os/signal"
Expand All @@ -29,6 +30,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/log/logconfig"
"github.com/cockroachdb/cockroach/pkg/util/log/logpb"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
Expand All @@ -41,6 +43,7 @@ var (

parallelism int
cpuQuota int
globalSeed int64

// Path to a local dir where the test logs and artifacts collected from
// cluster will be placed.
Expand Down Expand Up @@ -153,6 +156,9 @@ func addRunBenchCommonFlags(cmd *cobra.Command) {
cmd.Flags().BoolVar(
&forceCloudCompat, "force-cloud-compat", false, "Includes tests that are not marked as compatible with the cloud used")
addSuiteAndOwnerFlags(cmd)
cmd.Flags().Int64Var(
&globalSeed, "global-seed", randutil.NewPseudoSeed(),
"The global random seed used for all tests.")
}

func addRunFlags(runCmd *cobra.Command) {
Expand All @@ -179,6 +185,8 @@ func addBenchFlags(benchCmd *cobra.Command) {
// runTests is the main function for the run and bench commands.
// Assumes initRunFlagsBinariesAndLibraries was called.
func runTests(register func(registry.Registry), filter *registry.TestFilter) error {
//lint:ignore SA1019 deprecated
rand.Seed(globalSeed)
r := makeTestRegistry(cloud)

// actual registering of tests
Expand Down Expand Up @@ -257,6 +265,7 @@ func runTests(register func(registry.Registry), filter *registry.TestFilter) err
literalArtifactsDir: literalArtifactsDir,
runnerLogPath: runnerLogPath,
}
l.Printf("global random seed: %d", globalSeed)
go func() {
if err := http.ListenAndServe(
fmt.Sprintf(":%d", promPort),
Expand Down
20 changes: 16 additions & 4 deletions pkg/cmd/roachtest/test/test_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,25 @@ import (
// cluster.
const DefaultCockroachPath = "./cockroach-default"

// EnvAssertionsEnabledSeed is the name of the environment variable
// that, when set, causes roachtest to use a binary with runtime
// assertions enabled (if available), using the random seed contained
// in that environment variable.
var EnvAssertionsEnabledSeed = "ROACHTEST_ASSERTIONS_ENABLED_SEED"

// Test is the interface through which roachtests interact with the
// test harness.
type Test interface {
Cockroach() string // path to main cockroach binary
// CockroachShort returns the path to cockroach-short binary compiled with
// --crdb_test build tag, or an empty string if no such binary was given.
CockroachShort() string
// StandardCockroach returns path to main cockroach binary, compiled
// without runtime assertions.
StandardCockroach() string
// RuntimeAssertionsCockroach returns the path to cockroach-short
// binary compiled with --crdb_test build tag, or an empty string if
// no such binary was given.
RuntimeAssertionsCockroach() string
// Cockroach returns either StandardCockroach or RuntimeAssertionsCockroach,
// picked randomly.
Cockroach() string
Name() string
BuildVersion() *version.Version
IsBuildVersion(string) bool // "vXX.YY"
Expand Down
59 changes: 54 additions & 5 deletions pkg/cmd/roachtest/test_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ import (
"context"
"fmt"
"io"
"math/rand"
"os"
"strings"
"sync"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
Expand Down Expand Up @@ -59,8 +62,12 @@ type failure struct {
type testImpl struct {
spec *registry.TestSpec

cockroach string // path to main cockroach binary
cockroachShort string // path to cockroach-short binary compiled with --crdb_test build tag
cockroach string // path to main cockroach binary
cockroachShort string // path to cockroach-short binary compiled with --crdb_test build tag

randomCockroachOnce sync.Once
randomizedCockroach string // either `cockroach` or `cockroach-short`, picked randomly

deprecatedWorkload string // path to workload binary
debug bool // whether the test is in debug mode.
// buildVersion is the version of the Cockroach binary that the test will run
Expand Down Expand Up @@ -131,15 +138,57 @@ func (t *testImpl) BuildVersion() *version.Version {
return t.buildVersion
}

// Cockroach returns the path to the cockroach binary.
// Cockroach will return either `RuntimeAssertionsCockroach()` or
// `StandardCockroach()`, picked randomly. Once a random choice has
// been made, the same binary will be returned on every call to
// `Cockroach`, to avoid errors that may arise from binaries having a
// different value for metamorphic constants.
func (t *testImpl) Cockroach() string {
return t.cockroach
// If the test is a benchmark test, we don't want to enable assertions
// as it will slow down performance.
if t.spec.Benchmark {
// TODO: remove this just a sanity check
t.l.Printf("Benchmark test, running with standard cockroach")
return t.StandardCockroach()
}
t.randomCockroachOnce.Do(func() {
assertionsEnabledProbability := 0.5
// If the user specified a custom seed to be used with runtime
// assertions, assume they want to run the test with assertions
// enabled, making it easier to reproduce issues.
if os.Getenv(test.EnvAssertionsEnabledSeed) != "" {
assertionsEnabledProbability = 1
}

if rand.Float64() < assertionsEnabledProbability {
// The build with runtime assertions should exist in every nightly
// CI build, but we can't assume it exists in every roachtest
// call.
if path := t.RuntimeAssertionsCockroach(); path != "" {
// TODO: remove this just a sanity check
t.l.Printf("Running with runtime assertions enabled on global seed: %d", globalSeed)
t.randomizedCockroach = path
return
} else {
t.l.Printf("WARNING: running without runtime assertions since the corresponding binary was not specified")
}
}
// TODO: remove, this is just for testing
//t.randomizedCockroach = t.RuntimeAssertionsCockroach()
t.randomizedCockroach = t.StandardCockroach()
})

return t.randomizedCockroach
}

func (t *testImpl) CockroachShort() string {
func (t *testImpl) RuntimeAssertionsCockroach() string {
return t.cockroachShort
}

func (t *testImpl) StandardCockroach() string {
return t.cockroach
}

func (t *testImpl) DeprecatedWorkload() string {
return t.deprecatedWorkload
}
Expand Down
10 changes: 6 additions & 4 deletions pkg/cmd/roachtest/tests/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,12 @@ func registerAllocator(r registry.Registry) {
},
})
r.Add(registry.TestSpec{
Name: `replicate/wide`,
Owner: registry.OwnerKV,
Benchmark: true,
Timeout: 10 * time.Minute,
Name: `replicate/wide`,
Owner: registry.OwnerKV,
Benchmark: true,
// Allow a longer running time to account for runs that use a
// cockroach build with runtime assertions enabled.
Timeout: 30 * time.Minute,
Cluster: r.MakeClusterSpec(9, spec.CPU(1)),
Leases: registry.MetamorphicLeases,
CompatibleClouds: registry.AllExceptAWS,
Expand Down
9 changes: 5 additions & 4 deletions pkg/cmd/roachtest/tests/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,13 @@ func importBankDataSplit(
) string {
dest := destinationName(c)

cockroach := t.Cockroach()
c.Put(ctx, cockroach, "./cockroach")
c.Put(ctx, t.DeprecatedWorkload(), "./workload")
c.Put(ctx, t.Cockroach(), "./cockroach")

// NB: starting the cluster creates the logs dir as a side effect,
// needed below.
c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings())
c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings())
runImportBankDataSplit(ctx, rows, ranges, t, c)
return dest
}
Expand Down Expand Up @@ -195,7 +196,7 @@ func runImportBankDataSplit(ctx context.Context, rows, ranges int, t test.Test,
}

func importBankData(ctx context.Context, rows int, t test.Test, c cluster.Cluster) string {
return importBankDataSplit(ctx, rows, 0 /* ranges */, t, c)
return importBankDataSplit(ctx, rows, 0, t, c)
}

func registerBackupNodeShutdown(r registry.Registry) {
Expand Down Expand Up @@ -958,7 +959,7 @@ func runBackupMVCCRangeTombstones(
if !config.skipClusterSetup {
c.Put(ctx, t.Cockroach(), "./cockroach")
c.Put(ctx, t.DeprecatedWorkload(), "./workload") // required for tpch
c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings())
c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 35), install.MakeClusterSettings())
}
t.Status("starting csv servers")
c.Run(ctx, c.All(), `./cockroach workload csv-server --port=8081 &> logs/workload-csv-server.log < /dev/null &`)
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/roachtest/tests/backup_restore_roundtrip.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
// Upload binaries and start cluster.
uploadVersion(ctx, t, c, c.All(), clusterupgrade.MainVersion)

c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true)), roachNodes)
c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings(install.SecureOption(true)), roachNodes)
m := c.NewMonitor(ctx, roachNodes)

m.Go(func(ctx context.Context) error {
Expand Down
5 changes: 4 additions & 1 deletion pkg/cmd/roachtest/tests/cdc.go
Original file line number Diff line number Diff line change
Expand Up @@ -619,7 +619,10 @@ func newCDCTester(ctx context.Context, t test.Test, c cluster.Cluster) cdcTester

settings.Env = append(settings.Env, envVars...)

c.Put(ctx, t.Cockroach(), "./cockroach")
// Allow cockroach with runtime assertions enabled unless this is a
// performance test.
cockroach := t.Cockroach()
c.Put(ctx, cockroach, "./cockroach")
c.Start(ctx, t.L(), startOpts, settings, tester.crdbNodes)
c.Put(ctx, t.DeprecatedWorkload(), "./workload", tester.workloadNode)
tester.startGrafana()
Expand Down
9 changes: 8 additions & 1 deletion pkg/cmd/roachtest/tests/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ func registerCopy(r registry.Registry) {
c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.All())
c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All())

// Make sure the copy commands have sufficient time to finish when
// runtime assertions are enabled.
copyTimeout := 10 * time.Minute
if usingRuntimeAssertions(t) {
copyTimeout = 20 * time.Minute
}

m := c.NewMonitor(ctx, c.All())
m.Go(func(ctx context.Context) error {
db := c.Conn(ctx, t.L(), 1)
Expand Down Expand Up @@ -100,7 +107,7 @@ func registerCopy(r registry.Registry) {
QueryRowContext(ctx context.Context, query string, args ...interface{}) *gosql.Row
}
runCopy := func(ctx context.Context, qu querier) error {
ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) // avoid infinite internal retries
ctx, cancel := context.WithTimeout(ctx, copyTimeout) // avoid infinite internal retries
defer cancel()

for lastID := -1; lastID+1 < rows; {
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/tests/kvbench.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ func registerKVBenchSpec(r registry.Registry, b kvBenchSpec) {
Suites: registry.ManualOnly,
Tags: registry.Tags("manual"),
Owner: registry.OwnerKV,
Benchmark: true, // TODO: Comments imply this is a benchmark test but this was not set
Cluster: nodes,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runKVBench(ctx, t, c, b)
Expand Down
Loading

0 comments on commit 325f77c

Please sign in to comment.