diff --git a/build/teamcity/cockroach/nightlies/roachtest_compile_component.sh b/build/teamcity/cockroach/nightlies/roachtest_compile_component.sh index fcae0cab5576..d872ef9920af 100755 --- a/build/teamcity/cockroach/nightlies/roachtest_compile_component.sh +++ b/build/teamcity/cockroach/nightlies/roachtest_compile_component.sh @@ -83,7 +83,13 @@ case "$component" in ;; roachtest) # Roachtest binary. - bazel_args=(//pkg/cmd/roachtest) + # N.B. We always compile the roachtest binary with crdb_test so the same serialization + # on the wire is established with cockroach binaries built under crdb_test. + # E.g. KVNemesisSeq is used only under crdb_test builds. If the cockroach binary is + # built with crdb_test, it will expect this field to be sent by the roachtest runner. + # Note that the opposite is not true, and a cockroach binary built without crdb_test + # is still compatible with a roachtest binary built with it. + bazel_args=(//pkg/cmd/roachtest --crdb_test) artifacts=("pkg/cmd/roachtest/roachtest_/roachtest:bin/roachtest.$os-$arch") ;; *) diff --git a/build/teamcity/cockroach/nightlies/roachtest_nightly_aws.sh b/build/teamcity/cockroach/nightlies/roachtest_nightly_aws.sh index 2928bab4cdc3..02a48f874a7e 100755 --- a/build/teamcity/cockroach/nightlies/roachtest_nightly_aws.sh +++ b/build/teamcity/cockroach/nightlies/roachtest_nightly_aws.sh @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" source "$dir/teamcity-support.sh" # For $root source "$dir/teamcity-bazel-support.sh" # For run_bazel -BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \ +BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \ run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh diff --git a/build/teamcity/cockroach/nightlies/roachtest_nightly_azure.sh b/build/teamcity/cockroach/nightlies/roachtest_nightly_azure.sh index 8d5c5e844221..118c0680f7a6 100755 --- a/build/teamcity/cockroach/nightlies/roachtest_nightly_azure.sh +++ b/build/teamcity/cockroach/nightlies/roachtest_nightly_azure.sh @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" source "$dir/teamcity-support.sh" # For $root source "$dir/teamcity-bazel-support.sh" # For run_bazel -BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AZURE_CLIENT_ID -e AZURE_CLIENT_SECRET -e AZURE_SUBSCRIPTION_ID -e AZURE_TENANT_ID -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \ +BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AZURE_CLIENT_ID -e AZURE_CLIENT_SECRET -e AZURE_SUBSCRIPTION_ID -e AZURE_TENANT_ID -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \ run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh diff --git a/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh b/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh index 7d798a357850..e0899cef5f97 100755 --- a/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh +++ b/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" source "$dir/teamcity-support.sh" # For $root source "$dir/teamcity-bazel-support.sh" # For run_bazel -BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \ +BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \ run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go index 78f14e77e2e7..84c204905ce2 100644 --- a/pkg/cmd/roachtest/cluster.go +++ b/pkg/cmd/roachtest/cluster.go @@ -17,6 +17,7 @@ import ( "fmt" "io" "io/fs" + "math/rand" "net" "net/url" "os" @@ -689,8 +690,13 @@ type clusterImpl struct { // BAZEL_COVER_DIR will be set to this value when starting a node. goCoverDir string - os string // OS of the cluster - arch vm.CPUArch // CPU architecture of the cluster + os string // OS of the cluster + arch vm.CPUArch // CPU architecture of the cluster + randomSeed struct { + mu syncutil.Mutex + seed *int64 + } + // destroyState contains state related to the cluster's destruction. destroyState destroyState } @@ -2003,10 +2009,16 @@ func (c *clusterImpl) StartE( startOpts.RoachprodOpts.EncryptedStores = c.encAtRest - if !envExists(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH") { - // Panic on span use-after-Finish, so we catch such bugs. - settings.Env = append(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH=true") + setUnlessExists := func(name string, value interface{}) { + if !envExists(settings.Env, name) { + settings.Env = append(settings.Env, fmt.Sprintf("%s=%s", name, fmt.Sprint(value))) + } } + // Panic on span use-after-Finish, so we catch such bugs. + setUnlessExists("COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH", true) + // Set the same seed on every node, to be used by builds with + // runtime assertions enabled. + setUnlessExists("COCKROACH_RANDOM_SEED", c.cockroachRandomSeed()) // Needed for backward-compat on crdb_internal.ranges{_no_leases}. // Remove in v23.2. @@ -2071,6 +2083,38 @@ func (c *clusterImpl) RefetchCertsFromNode(ctx context.Context, node int) error }) } +// SetRandomSeed sets the random seed to be used by the cluster. If +// not called, clusters generate a random seed from the global +// generator in the `rand` package. This function must be called +// before any nodes in the cluster start. +func (c *clusterImpl) SetRandomSeed(seed int64) { + c.randomSeed.seed = &seed +} + +// cockroachRandomSeed returns the `COCKROACH_RANDOM_SEED` to be used +// by this cluster. The seed may have been previously set by a +// previous call to `StartE`, or by the user via `SetRandomSeed`. If +// not set, this function will generate a seed and return it. +func (c *clusterImpl) cockroachRandomSeed() int64 { + c.randomSeed.mu.Lock() + defer c.randomSeed.mu.Unlock() + + // If the user provided a seed via environment variable, always use + // that, even if the test attempts to set a different seed. + if seedStr := os.Getenv(test.EnvAssertionsEnabledSeed); seedStr != "" { + seedOverride, err := strconv.ParseInt(seedStr, 0, 64) + if err != nil { + panic(fmt.Sprintf("error parsing %s: %s", test.EnvAssertionsEnabledSeed, err)) + } + c.randomSeed.seed = &seedOverride + } else if c.randomSeed.seed == nil { + seed := rand.Int63() + c.randomSeed.seed = &seed + } + + return *c.randomSeed.seed +} + // Start is like StartE() except that it will fatal the test on error. func (c *clusterImpl) Start( ctx context.Context, diff --git a/pkg/cmd/roachtest/cluster/cluster_interface.go b/pkg/cmd/roachtest/cluster/cluster_interface.go index 9ee79b50021f..bf63794c1e20 100644 --- a/pkg/cmd/roachtest/cluster/cluster_interface.go +++ b/pkg/cmd/roachtest/cluster/cluster_interface.go @@ -49,6 +49,10 @@ type Cluster interface { ctx context.Context, content, dest string, mode os.FileMode, opts ...option.Option, ) error + // SetRandomSeed allows tests to set their own random seed to be + // used by builds with runtime assertions enabled. + SetRandomSeed(seed int64) + // Starting and stopping CockroachDB. StartE(ctx context.Context, l *logger.Logger, startOpts option.StartOpts, settings install.ClusterSettings, opts ...option.Option) error diff --git a/pkg/cmd/roachtest/cluster_test.go b/pkg/cmd/roachtest/cluster_test.go index 2a4682537ecc..feb893a3e12d 100644 --- a/pkg/cmd/roachtest/cluster_test.go +++ b/pkg/cmd/roachtest/cluster_test.go @@ -72,7 +72,11 @@ func (t testWrapper) Cockroach() string { return "./dummy-path/to/cockroach" } -func (t testWrapper) CockroachShort() string { +func (t testWrapper) StandardCockroach() string { + return "./dummy-path/to/cockroach" +} + +func (t testWrapper) RuntimeAssertionsCockroach() string { return "./dummy-path/to/cockroach-short" } diff --git a/pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go b/pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go index 2c4393566d02..d84d3780c89b 100644 --- a/pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go +++ b/pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go @@ -135,7 +135,10 @@ func UploadVersion( newVersion *Version, ) (string, error) { dstBinary := BinaryPathForVersion(t, newVersion) - srcBinary := t.Cockroach() + // Run with standard binary as older versions retrieved through roachprod stage + // are not currently available with crdb_test enabled. + // TODO(DarrylWong): Compile older versions with crdb_test flag. + srcBinary := t.StandardCockroach() overrideBinary, isOverriden := t.VersionsBinaryOverride()[newVersion.String()] if isOverriden { diff --git a/pkg/cmd/roachtest/run.go b/pkg/cmd/roachtest/run.go index 7edacd7de831..5fcdde4efa53 100644 --- a/pkg/cmd/roachtest/run.go +++ b/pkg/cmd/roachtest/run.go @@ -13,6 +13,7 @@ package main import ( "context" "fmt" + "math/rand" "net/http" "os" "os/signal" @@ -29,6 +30,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/log/logconfig" "github.com/cockroachdb/cockroach/pkg/util/log/logpb" + "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/stop" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" @@ -41,6 +43,7 @@ var ( parallelism int cpuQuota int + globalSeed int64 // Path to a local dir where the test logs and artifacts collected from // cluster will be placed. @@ -153,6 +156,9 @@ func addRunBenchCommonFlags(cmd *cobra.Command) { cmd.Flags().BoolVar( &forceCloudCompat, "force-cloud-compat", false, "Includes tests that are not marked as compatible with the cloud used") addSuiteAndOwnerFlags(cmd) + cmd.Flags().Int64Var( + &globalSeed, "global-seed", randutil.NewPseudoSeed(), + "The global random seed used for all tests.") } func addRunFlags(runCmd *cobra.Command) { @@ -179,6 +185,8 @@ func addBenchFlags(benchCmd *cobra.Command) { // runTests is the main function for the run and bench commands. // Assumes initRunFlagsBinariesAndLibraries was called. func runTests(register func(registry.Registry), filter *registry.TestFilter) error { + //lint:ignore SA1019 deprecated + rand.Seed(globalSeed) r := makeTestRegistry(cloud) // actual registering of tests @@ -257,6 +265,7 @@ func runTests(register func(registry.Registry), filter *registry.TestFilter) err literalArtifactsDir: literalArtifactsDir, runnerLogPath: runnerLogPath, } + l.Printf("global random seed: %d", globalSeed) go func() { if err := http.ListenAndServe( fmt.Sprintf(":%d", promPort), diff --git a/pkg/cmd/roachtest/test/test_interface.go b/pkg/cmd/roachtest/test/test_interface.go index 358ff240cb3f..19d09f6ef7c2 100644 --- a/pkg/cmd/roachtest/test/test_interface.go +++ b/pkg/cmd/roachtest/test/test_interface.go @@ -20,13 +20,25 @@ import ( // cluster. const DefaultCockroachPath = "./cockroach-default" +// EnvAssertionsEnabledSeed is the name of the environment variable +// that, when set, causes roachtest to use a binary with runtime +// assertions enabled (if available), using the random seed contained +// in that environment variable. +var EnvAssertionsEnabledSeed = "ROACHTEST_ASSERTIONS_ENABLED_SEED" + // Test is the interface through which roachtests interact with the // test harness. type Test interface { - Cockroach() string // path to main cockroach binary - // CockroachShort returns the path to cockroach-short binary compiled with - // --crdb_test build tag, or an empty string if no such binary was given. - CockroachShort() string + // StandardCockroach returns path to main cockroach binary, compiled + // without runtime assertions. + StandardCockroach() string + // RuntimeAssertionsCockroach returns the path to cockroach-short + // binary compiled with --crdb_test build tag, or an empty string if + // no such binary was given. + RuntimeAssertionsCockroach() string + // Cockroach returns either StandardCockroach or RuntimeAssertionsCockroach, + // picked randomly. + Cockroach() string Name() string BuildVersion() *version.Version IsBuildVersion(string) bool // "vXX.YY" diff --git a/pkg/cmd/roachtest/test_impl.go b/pkg/cmd/roachtest/test_impl.go index 4e405fef2b55..38f7a007d719 100644 --- a/pkg/cmd/roachtest/test_impl.go +++ b/pkg/cmd/roachtest/test_impl.go @@ -14,11 +14,14 @@ import ( "context" "fmt" "io" + "math/rand" "os" "strings" + "sync" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/util/syncutil" @@ -59,8 +62,12 @@ type failure struct { type testImpl struct { spec *registry.TestSpec - cockroach string // path to main cockroach binary - cockroachShort string // path to cockroach-short binary compiled with --crdb_test build tag + cockroach string // path to main cockroach binary + cockroachEA string // path to cockroach-short binary compiled with --crdb_test build tag + + randomCockroachOnce sync.Once + randomizedCockroach string // either `cockroach` or `cockroach-short`, picked randomly + deprecatedWorkload string // path to workload binary debug bool // whether the test is in debug mode. // buildVersion is the version of the Cockroach binary that the test will run @@ -131,13 +138,51 @@ func (t *testImpl) BuildVersion() *version.Version { return t.buildVersion } -// Cockroach returns the path to the cockroach binary. +// Cockroach will return either `RuntimeAssertionsCockroach()` or +// `StandardCockroach()`, picked randomly. Once a random choice has +// been made, the same binary will be returned on every call to +// `Cockroach`, to avoid errors that may arise from binaries having a +// different value for metamorphic constants. func (t *testImpl) Cockroach() string { - return t.cockroach + // If the test is a benchmark test, we don't want to enable assertions + // as it will slow down performance. + if t.spec.Benchmark { + t.l.Printf("Benchmark test, running with standard cockroach") + return t.StandardCockroach() + } + t.randomCockroachOnce.Do(func() { + assertionsEnabledProbability := 0.5 + // If the user specified a custom seed to be used with runtime + // assertions, assume they want to run the test with assertions + // enabled, making it easier to reproduce issues. + if os.Getenv(test.EnvAssertionsEnabledSeed) != "" { + assertionsEnabledProbability = 1 + } + + if rand.Float64() < assertionsEnabledProbability { + // The build with runtime assertions should exist in every nightly + // CI build, but we can't assume it exists in every roachtest call. + if path := t.RuntimeAssertionsCockroach(); path != "" { + t.l.Printf("Runtime assertions enabled") + t.randomizedCockroach = path + return + } else { + t.l.Printf("WARNING: running without runtime assertions since the corresponding binary was not specified") + } + } + t.l.Printf("Runtime assertions disabled") + t.randomizedCockroach = t.StandardCockroach() + }) + + return t.randomizedCockroach } -func (t *testImpl) CockroachShort() string { - return t.cockroachShort +func (t *testImpl) RuntimeAssertionsCockroach() string { + return t.cockroachEA +} + +func (t *testImpl) StandardCockroach() string { + return t.cockroach } func (t *testImpl) DeprecatedWorkload() string { diff --git a/pkg/cmd/roachtest/test_runner.go b/pkg/cmd/roachtest/test_runner.go index 3db4a6edf5af..0327e10bd42d 100644 --- a/pkg/cmd/roachtest/test_runner.go +++ b/pkg/cmd/roachtest/test_runner.go @@ -679,7 +679,7 @@ func (r *testRunner) runWorker( t := &testImpl{ spec: &testToRun.spec, cockroach: cockroach[arch], - cockroachShort: cockroachEA[arch], + cockroachEA: cockroachEA[arch], deprecatedWorkload: workload[arch], buildVersion: binaryVersion, artifactsDir: testArtifactsDir, diff --git a/pkg/cmd/roachtest/tests/allocator.go b/pkg/cmd/roachtest/tests/allocator.go index a638c54ebeeb..a69c60e953c1 100644 --- a/pkg/cmd/roachtest/tests/allocator.go +++ b/pkg/cmd/roachtest/tests/allocator.go @@ -172,10 +172,12 @@ func registerAllocator(r registry.Registry) { }, }) r.Add(registry.TestSpec{ - Name: `replicate/wide`, - Owner: registry.OwnerKV, - Benchmark: true, - Timeout: 10 * time.Minute, + Name: `replicate/wide`, + Owner: registry.OwnerKV, + Benchmark: true, + // Allow a longer running time to account for runs that use a + // cockroach build with runtime assertions enabled. + Timeout: 30 * time.Minute, Cluster: r.MakeClusterSpec(9, spec.CPU(1)), Leases: registry.MetamorphicLeases, CompatibleClouds: registry.AllExceptAWS, diff --git a/pkg/cmd/roachtest/tests/asyncpg.go b/pkg/cmd/roachtest/tests/asyncpg.go index d69e30cdc012..24a52667686f 100644 --- a/pkg/cmd/roachtest/tests/asyncpg.go +++ b/pkg/cmd/roachtest/tests/asyncpg.go @@ -47,7 +47,16 @@ func registerAsyncpg(r registry.Registry) { node := c.Node(1) t.Status("setting up cockroach") c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) - c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(), c.All()) + + // This test assumes that multiple_active_portals_enabled is false, but through + // metamorphic constants, it is possible for them to be enabled. We disable + // metamorphic testing to avoid this. Note the asyncpg test suite drops the + // database so we can't set the session variable like we do in pgjdbc. + // TODO(DarrylWong): Use a metamorphic constants exclusion list instead. + // See: https://github.com/cockroachdb/cockroach/issues/113164 + settings := install.MakeClusterSettings() + settings.Env = append(settings.Env, "COCKROACH_INTERNAL_DISABLE_METAMORPHIC_TESTING=true") + c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), settings, c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { diff --git a/pkg/cmd/roachtest/tests/backup.go b/pkg/cmd/roachtest/tests/backup.go index 82fa92c385ef..76da662ff014 100644 --- a/pkg/cmd/roachtest/tests/backup.go +++ b/pkg/cmd/roachtest/tests/backup.go @@ -96,12 +96,13 @@ func importBankDataSplit( ) string { dest := destinationName(c) + cockroach := t.Cockroach() + c.Put(ctx, cockroach, "./cockroach") c.Put(ctx, t.DeprecatedWorkload(), "./workload") - c.Put(ctx, t.Cockroach(), "./cockroach") // NB: starting the cluster creates the logs dir as a side effect, // needed below. - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings()) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings()) runImportBankDataSplit(ctx, rows, ranges, t, c) return dest } @@ -194,7 +195,7 @@ func runImportBankDataSplit(ctx context.Context, rows, ranges int, t test.Test, } func importBankData(ctx context.Context, rows int, t test.Test, c cluster.Cluster) string { - return importBankDataSplit(ctx, rows, 0 /* ranges */, t, c) + return importBankDataSplit(ctx, rows, 0, t, c) } func registerBackupNodeShutdown(r registry.Registry) { @@ -746,7 +747,7 @@ func runBackupMVCCRangeTombstones( if !config.skipClusterSetup { c.Put(ctx, t.Cockroach(), "./cockroach") c.Put(ctx, t.DeprecatedWorkload(), "./workload") // required for tpch - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings()) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings()) } t.Status("starting csv servers") c.Run(ctx, c.All(), `./cockroach workload csv-server --port=8081 &> logs/workload-csv-server.log < /dev/null &`) diff --git a/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go b/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go index 33222fa54553..835b49997f8f 100644 --- a/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go +++ b/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go @@ -103,7 +103,7 @@ func backupRestoreRoundTrip( "COCKROACH_MIN_RANGE_MAX_BYTES=1", }) - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes) m := c.NewMonitor(ctx, roachNodes) m.Go(func(ctx context.Context) error { diff --git a/pkg/cmd/roachtest/tests/cdc.go b/pkg/cmd/roachtest/tests/cdc.go index aa558fee7c9a..a4548de6a797 100644 --- a/pkg/cmd/roachtest/tests/cdc.go +++ b/pkg/cmd/roachtest/tests/cdc.go @@ -619,7 +619,10 @@ func newCDCTester(ctx context.Context, t test.Test, c cluster.Cluster) cdcTester settings.Env = append(settings.Env, envVars...) - c.Put(ctx, t.Cockroach(), "./cockroach") + // Allow cockroach with runtime assertions enabled unless this is a + // performance test. + cockroach := t.Cockroach() + c.Put(ctx, cockroach, "./cockroach") c.Start(ctx, t.L(), startOpts, settings, tester.crdbNodes) c.Put(ctx, t.DeprecatedWorkload(), "./workload", tester.workloadNode) tester.startGrafana() diff --git a/pkg/cmd/roachtest/tests/copy.go b/pkg/cmd/roachtest/tests/copy.go index 828b9da4425d..f1081730868d 100644 --- a/pkg/cmd/roachtest/tests/copy.go +++ b/pkg/cmd/roachtest/tests/copy.go @@ -49,7 +49,20 @@ func registerCopy(r registry.Registry) { c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.All()) - c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All()) + // We run this without metamorphic constants as kv-batch-size = 1 makes + // this test take far too long to complete. + // TODO(DarrylWong): Use a metamorphic constants exclusion list instead. + // See: https://github.com/cockroachdb/cockroach/issues/113164 + settings := install.MakeClusterSettings() + settings.Env = append(settings.Env, "COCKROACH_INTERNAL_DISABLE_METAMORPHIC_TESTING=true") + c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, c.All()) + + // Make sure the copy commands have sufficient time to finish when + // runtime assertions are enabled. + copyTimeout := 10 * time.Minute + if usingRuntimeAssertions(t) { + copyTimeout = 20 * time.Minute + } m := c.NewMonitor(ctx, c.All()) m.Go(func(ctx context.Context) error { @@ -100,7 +113,7 @@ func registerCopy(r registry.Registry) { QueryRowContext(ctx context.Context, query string, args ...interface{}) *gosql.Row } runCopy := func(ctx context.Context, qu querier) error { - ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) // avoid infinite internal retries + ctx, cancel := context.WithTimeout(ctx, copyTimeout) // avoid infinite internal retries defer cancel() for lastID := -1; lastID+1 < rows; { diff --git a/pkg/cmd/roachtest/tests/import_cancellation.go b/pkg/cmd/roachtest/tests/import_cancellation.go index b925f3cc51a6..77393b80ff6d 100644 --- a/pkg/cmd/roachtest/tests/import_cancellation.go +++ b/pkg/cmd/roachtest/tests/import_cancellation.go @@ -20,7 +20,6 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" @@ -51,7 +50,9 @@ func registerImportCancellation(r registry.Registry) { func runImportCancellation(ctx context.Context, t test.Test, c cluster.Cluster) { c.Put(ctx, t.Cockroach(), "./cockroach") c.Put(ctx, t.DeprecatedWorkload(), "./workload") // required for tpch - c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) + startOpts := maybeUseMemoryBudget(t, 50) + startOpts.RoachprodOpts.ScheduleBackups = true + c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings()) t.Status("starting csv servers") c.Run(ctx, c.All(), `./cockroach workload csv-server --port=8081 &> logs/workload-csv-server.log < /dev/null &`) diff --git a/pkg/cmd/roachtest/tests/kvbench.go b/pkg/cmd/roachtest/tests/kvbench.go index 462ce6d2acc0..9afe8dd81c16 100644 --- a/pkg/cmd/roachtest/tests/kvbench.go +++ b/pkg/cmd/roachtest/tests/kvbench.go @@ -96,6 +96,7 @@ func registerKVBenchSpec(r registry.Registry, b kvBenchSpec) { Suites: registry.ManualOnly, Tags: registry.Tags("manual"), Owner: registry.OwnerKV, + Benchmark: true, Cluster: nodes, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runKVBench(ctx, t, c, b) diff --git a/pkg/cmd/roachtest/tests/multitenant_distsql.go b/pkg/cmd/roachtest/tests/multitenant_distsql.go index af24cc4fd19e..edef1bd5fa52 100644 --- a/pkg/cmd/roachtest/tests/multitenant_distsql.go +++ b/pkg/cmd/roachtest/tests/multitenant_distsql.go @@ -59,6 +59,8 @@ func runMultiTenantDistSQL( bundle bool, timeoutMillis int, ) { + // This test fails when runtime assertions are enabled. + // See: https://github.com/cockroachdb/cockroach/issues/113170 c.Put(ctx, t.Cockroach(), "./cockroach") // This test sets a smaller default range size than the default due to // performance and resource limitations. We set the minimum range max bytes to diff --git a/pkg/cmd/roachtest/tests/multitenant_utils.go b/pkg/cmd/roachtest/tests/multitenant_utils.go index ab48b15c516d..3ad051cc6d04 100644 --- a/pkg/cmd/roachtest/tests/multitenant_utils.go +++ b/pkg/cmd/roachtest/tests/multitenant_utils.go @@ -14,6 +14,7 @@ import ( "context" gosql "database/sql" "fmt" + "math/rand" "net/url" "os" "path/filepath" @@ -187,10 +188,12 @@ func (tn *tenantNode) start(ctx context.Context, t test.Test, c cluster.Cluster, "--store=" + tn.storeDir()} internalIPs, err := c.InternalIP(ctx, t.L(), c.Node(tn.node)) + randomSeed := rand.Int63() + c.SetRandomSeed(randomSeed) require.NoError(t, err) tn.errCh = startTenantServer( ctx, c, c.Node(tn.node), internalIPs[0], binary, tn.kvAddrs, tn.tenantID, - tn.httpPort, tn.sqlPort, tn.envVars, + tn.httpPort, tn.sqlPort, tn.envVars, randomSeed, extraArgs..., ) @@ -254,6 +257,7 @@ func startTenantServer( httpPort int, sqlPort int, envVars []string, + randomSeed int64, extraFlags ...string, ) chan error { args := []string{ @@ -268,6 +272,10 @@ func startTenantServer( errCh := make(chan error, 1) go func() { + // Set the same random seed for every tenant; this is picked up by + // runs that use a build with runtime assertions enabled, and + // ignored otherwise. + envVars = append(envVars, fmt.Sprintf("COCKROACH_RANDOM_SEED=%d", randomSeed)) errCh <- c.RunE(tenantCtx, node, append(append(append([]string{}, envVars...), binary, "mt", "start-sql"), args...)..., ) diff --git a/pkg/cmd/roachtest/tests/mvcc_gc.go b/pkg/cmd/roachtest/tests/mvcc_gc.go index 2c83a961e481..1aee02e3798c 100644 --- a/pkg/cmd/roachtest/tests/mvcc_gc.go +++ b/pkg/cmd/roachtest/tests/mvcc_gc.go @@ -78,6 +78,15 @@ func runMVCCGC(ctx context.Context, t test.Test, c cluster.Cluster) { // How long to wait for data to be GCd during assert loop. const gcRetryTimeout = 7 * time.Minute + var randomSeed int64 + if usingRuntimeAssertions(t) { + // Do not use `0` as that is reserved to mean that we are running + // without runtime assertions. + for randomSeed == 0 { + randomSeed = rand.Int63() + } + c.SetRandomSeed(randomSeed) + } c.Put(ctx, t.Cockroach(), "./cockroach") s := install.MakeClusterSettings() s.Env = append(s.Env, "COCKROACH_SCAN_INTERVAL=30s") @@ -139,7 +148,7 @@ func runMVCCGC(ctx context.Context, t test.Test, c cluster.Cluster) { t.L().Printf("performing clean-assert cycle #%d", i) if err := retry.WithMaxAttempts(ctx, retry.Options{}, 3, func() error { - return deleteSomeTableDataWithOverlappingTombstones(ctx, t, c, conn, rng, m, 5) + return deleteSomeTableDataWithOverlappingTombstones(ctx, t, c, conn, rng, m, 5, randomSeed) }); err != nil { t.Fatal(err) } @@ -163,7 +172,7 @@ func runMVCCGC(ctx context.Context, t test.Test, c cluster.Cluster) { wlCancel() if err := retry.WithMaxAttempts(ctx, retry.Options{}, 3, func() error { - err := deleteAllTableDataWithOverlappingTombstones(ctx, t, c, conn, rng, m, 5) + err := deleteAllTableDataWithOverlappingTombstones(ctx, t, c, conn, rng, m, 5, randomSeed) if err != nil { return err } @@ -481,6 +490,7 @@ func deleteAllTableDataWithOverlappingTombstones( rng *rand.Rand, tm tableMetadata, fragments int, + randomSeed int64, ) error { t.Helper() encodeKey := func(index int64) roachpb.Key { @@ -514,7 +524,7 @@ func deleteAllTableDataWithOverlappingTombstones( t.L().Printf("adding range tombstone [%s, %s)", startKey, endKey) addDeleteRangeUsingTombstone(&ba, startKey, endKey) } - br, err := sendBatchRequest(ctx, t, c, 1, ba) + br, err := sendBatchRequest(ctx, t, c, 1, ba, randomSeed) if err != nil { return err } @@ -533,6 +543,7 @@ func deleteSomeTableDataWithOverlappingTombstones( rng *rand.Rand, tm tableMetadata, rangeKeys int, + randomSeed int64, ) error { t.Helper() encodeKey := func(index int64) roachpb.Key { @@ -559,7 +570,7 @@ func deleteSomeTableDataWithOverlappingTombstones( t.L().Printf("adding range tombstone [%s, %s)", startKey, endKey) addDeleteRangeUsingTombstone(&ba, startKey, endKey) } - br, err := sendBatchRequest(ctx, t, c, 1, ba) + br, err := sendBatchRequest(ctx, t, c, 1, ba, randomSeed) if err != nil { return err } @@ -595,7 +606,12 @@ func addDeleteRangeUsingTombstone(ba *kvpb.BatchRequest, startKey, endKey roachp } func sendBatchRequest( - ctx context.Context, t test.Test, c cluster.Cluster, node int, ba kvpb.BatchRequest, + ctx context.Context, + t test.Test, + c cluster.Cluster, + node int, + ba kvpb.BatchRequest, + randomSeed int64, ) (kvpb.BatchResponse, error) { reqArg, err := batchToJSONOrFatal(ba) if err != nil { @@ -605,8 +621,12 @@ func sendBatchRequest( if err := c.PutString(ctx, reqArg, requestFileName, 0755, c.Node(node)); err != nil { return kvpb.BatchResponse{}, err } - res, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(node), "./cockroach", "debug", - "send-kv-batch", "--insecure", requestFileName) + var debugEnv string + if randomSeed != 0 { + debugEnv = fmt.Sprintf("COCKROACH_RANDOM_SEED=%d ", randomSeed) + } + res, err := c.RunWithDetailsSingleNode( + ctx, t.L(), c.Node(node), debugEnv+"./cockroach debug send-kv-batch --insecure", requestFileName) if err != nil { return kvpb.BatchResponse{}, err } diff --git a/pkg/cmd/roachtest/tests/pgjdbc.go b/pkg/cmd/roachtest/tests/pgjdbc.go index 914aa88264ad..ac3af40fb6d7 100644 --- a/pkg/cmd/roachtest/tests/pgjdbc.go +++ b/pkg/cmd/roachtest/tests/pgjdbc.go @@ -72,6 +72,17 @@ func registerPgjdbc(r registry.Registry) { } } + if usingRuntimeAssertions(t) { + // This test assumes that multiple_active_portals_enabled is false, but through + // metamorphic constants, it is possible for them to be enabled. + if _, err = db.ExecContext(ctx, "SET multiple_active_portals_enabled=false"); err != nil { + t.Fatal(err) + } + if _, err = db.ExecContext(ctx, "ALTER DATABASE defaultdb SET multiple_active_portals_enabled=false"); err != nil { + t.Fatal(err) + } + } + t.Status("cloning pgjdbc and installing prerequisites") // Report the latest tag, but do not use it. The newest versions produces output that breaks our xml parser, // and we want to pin to the working version for now. diff --git a/pkg/cmd/roachtest/tests/schemachange.go b/pkg/cmd/roachtest/tests/schemachange.go index ff4d94203cba..a501b1a40ce5 100644 --- a/pkg/cmd/roachtest/tests/schemachange.go +++ b/pkg/cmd/roachtest/tests/schemachange.go @@ -341,7 +341,9 @@ func makeIndexAddTpccTest( } func registerSchemaChangeBulkIngest(r registry.Registry) { - r.Add(makeSchemaChangeBulkIngestTest(r, 5, 100000000, time.Minute*20)) + // Allow a long running time to account for runs that use a + // cockroach build with runtime assertions enabled. + r.Add(makeSchemaChangeBulkIngestTest(r, 5, 100000000, time.Minute*60)) } func makeSchemaChangeBulkIngestTest( diff --git a/pkg/cmd/roachtest/tests/split.go b/pkg/cmd/roachtest/tests/split.go index 9dd90e3e1528..c3d49b287094 100644 --- a/pkg/cmd/roachtest/tests/split.go +++ b/pkg/cmd/roachtest/tests/split.go @@ -429,12 +429,18 @@ func runLoadSplits(ctx context.Context, t test.Test, c cluster.Cluster, params s c.Put(ctx, t.Cockroach(), "./cockroach", crdbNodes) c.Put(ctx, t.DeprecatedWorkload(), "./workload", workloadNode) + // We run this without metamorphic constants as the tests make + // incorrect assumptions about the absolute values of QPS. + // See: https://github.com/cockroachdb/cockroach/issues/112664 + // TODO(DarrylWong): enable metamorphic contants once issue is resolved + settings := install.MakeClusterSettings() + settings.Env = append(settings.Env, "COCKROACH_INTERNAL_DISABLE_METAMORPHIC_TESTING=true") startOpts := option.DefaultStartOptsNoBackups() startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs, "--vmodule=split_queue=2,store_rebalancer=2,allocator=2,replicate_queue=2,"+ "decider=3,replica_split_load=1", ) - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), crdbNodes) + c.Start(ctx, t.L(), startOpts, settings, crdbNodes) m := c.NewMonitor(ctx, c.All()) m.Go(func(ctx context.Context) error { @@ -581,7 +587,9 @@ func runLargeRangeSplits(ctx context.Context, t test.Test, c cluster.Cluster, si rows := size / rowEstimate const minBytes = 16 << 20 // 16 MB - c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) + // Never run with runtime assertions as this makes this test take + // too long to complete. + c.Put(ctx, t.StandardCockroach(), "./cockroach", c.All()) c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.All()) numNodes := c.Spec().NodeCount c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.Node(1)) diff --git a/pkg/cmd/roachtest/tests/sqlsmith.go b/pkg/cmd/roachtest/tests/sqlsmith.go index 071fc807dd8f..a6a6580cc8cc 100644 --- a/pkg/cmd/roachtest/tests/sqlsmith.go +++ b/pkg/cmd/roachtest/tests/sqlsmith.go @@ -21,10 +21,12 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/internal/sqlsmith" + "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/errors" ) @@ -98,9 +100,9 @@ WITH into_db = 'defaultdb', unsafe_restore_incompatible_version; rng, seed := randutil.NewTestRand() t.L().Printf("seed: %d", seed) - // With 50% chance use the cockroach-short binary that was compiled with - // --crdb_test build tag. - maybeUseBuildWithEnabledAssertions(ctx, t, c, rng, 0.5 /* eaProb */) + c.SetRandomSeed(rng.Int63()) + c.Put(ctx, t.Cockroach(), "./cockroach") + c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) setupFunc, ok := setups[setupName] if !ok { diff --git a/pkg/cmd/roachtest/tests/tpce.go b/pkg/cmd/roachtest/tests/tpce.go index 4e17fefd441e..f970da6fddc4 100644 --- a/pkg/cmd/roachtest/tests/tpce.go +++ b/pkg/cmd/roachtest/tests/tpce.go @@ -145,7 +145,9 @@ func runTPCE(ctx context.Context, t test.Test, c cluster.Cluster, opts tpceOptio if opts.start == nil { opts.start = func(ctx context.Context, t test.Test, c cluster.Cluster) { t.Status("installing cockroach") - c.Put(ctx, t.Cockroach(), "./cockroach", crdbNodes) + // Never run with runtime assertions as this makes this test take + // too long to complete. + c.Put(ctx, t.StandardCockroach(), "./cockroach", crdbNodes) startOpts := option.DefaultStartOpts() startOpts.RoachprodOpts.StoreCount = opts.ssds @@ -258,7 +260,7 @@ func registerTPCE(r registry.Registry) { }, }) - // Weekly, large sclae configuration. + // Weekly, large scale configuration. largeWeekly := tpceOptions{ customers: 100_000, nodes: 5, diff --git a/pkg/cmd/roachtest/tests/tpch_concurrency.go b/pkg/cmd/roachtest/tests/tpch_concurrency.go index 82acfca333d2..17ab82fb5a55 100644 --- a/pkg/cmd/roachtest/tests/tpch_concurrency.go +++ b/pkg/cmd/roachtest/tests/tpch_concurrency.go @@ -33,7 +33,9 @@ func registerTPCHConcurrency(r registry.Registry) { c cluster.Cluster, disableStreamer bool, ) { - c.Put(ctx, t.Cockroach(), "./cockroach", c.Range(1, numNodes-1)) + // We run this test without runtime assertions as it uses too much memory + // budget. Even increasing the max to 80% still flaked. + c.Put(ctx, t.StandardCockroach(), "./cockroach", c.Range(1, numNodes-1)) c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.Node(numNodes)) c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(), c.Range(1, numNodes-1)) @@ -54,7 +56,7 @@ func registerTPCHConcurrency(r registry.Registry) { restartCluster := func(ctx context.Context, c cluster.Cluster, t test.Test) { c.Stop(ctx, t.L(), option.DefaultStopOpts(), c.Range(1, numNodes-1)) - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(), c.Range(1, numNodes-1)) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 35), install.MakeClusterSettings(), c.Range(1, numNodes-1)) } // checkConcurrency returns an error if at least one node of the cluster diff --git a/pkg/cmd/roachtest/tests/util.go b/pkg/cmd/roachtest/tests/util.go index df13b17ef40d..131561f69746 100644 --- a/pkg/cmd/roachtest/tests/util.go +++ b/pkg/cmd/roachtest/tests/util.go @@ -15,14 +15,12 @@ import ( gosql "database/sql" "fmt" "io" - "math/rand" "net/http" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" - "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" @@ -207,31 +205,27 @@ func setAdmissionControl(ctx context.Context, t test.Test, c cluster.Cluster, en } } -// maybeUseBuildWithEnabledAssertions stages the cockroach-short binary with -// enabled assertions with eaProb probability if that binary is available, -// otherwise stages the regular cockroach binary, and starts the cluster. -func maybeUseBuildWithEnabledAssertions( - ctx context.Context, t test.Test, c cluster.Cluster, rng *rand.Rand, eaProb float64, -) { - if rng.Float64() < eaProb { - // Check whether the cockroach-short binary is available. - if t.CockroachShort() != "" { - randomSeed := rng.Int63() - t.Status( - "using cockroach-short binary compiled with --crdb_test "+ - "build tag and COCKROACH_RANDOM_SEED=", randomSeed, - ) - c.Put(ctx, t.CockroachShort(), "./cockroach") - // We need to ensure that all nodes in the cluster start with the - // same random seed (if not, some assumptions can be violated - for - // example that coldata.BatchSize() values are the same on all - // nodes). - settings := install.MakeClusterSettings() - settings.Env = append(settings.Env, fmt.Sprintf("COCKROACH_RANDOM_SEED=%d", randomSeed)) - c.Start(ctx, t.L(), option.DefaultStartOpts(), settings) - return - } +// usingRuntimeAssertions returns true if calls to `t.Cockroach()` for +// this test will return the cockroach build with runtime +// assertions. Note that calling this function only makes sense if the +// test uploads cockroach using `t.Cockroach` (instead of calling +// t.StandardCockroach or t.RuntimeAssertionsCockroach directly). +func usingRuntimeAssertions(t test.Test) bool { + return t.Cockroach() == t.RuntimeAssertionsCockroach() +} + +// maybeUseMemoryBudget returns a StartOpts with the specified --max-sql-memory +// if runtime assertions are enabled, and the default values otherwise. +// A scheduled backup will not begin at the start of the roachtest. +func maybeUseMemoryBudget(t test.Test, budget int) option.StartOpts { + startOpts := option.DefaultStartOptsNoBackups() + if usingRuntimeAssertions(t) { + // When running tests with runtime assertions enabled, increase + // SQL's memory budget to avoid 'budget exceeded' failures. + startOpts.RoachprodOpts.ExtraArgs = append( + startOpts.RoachprodOpts.ExtraArgs, + fmt.Sprintf("--max-sql-memory=%d%%", budget), + ) } - c.Put(ctx, t.Cockroach(), "./cockroach") - c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) + return startOpts } diff --git a/pkg/roachprod/install/cockroach.go b/pkg/roachprod/install/cockroach.go index 5d5cfa8bd629..2be1e626a704 100644 --- a/pkg/roachprod/install/cockroach.go +++ b/pkg/roachprod/install/cockroach.go @@ -19,6 +19,7 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "runtime" "strconv" "strings" @@ -911,12 +912,30 @@ func (c *SyncedCluster) generateStartFlagsKV(node Node, startOpts StartOpts) []s return args } +var maxSQLMemoryRE = regexp.MustCompile(`^--max-sql-memory=(\d+)%$`) + // generateStartFlagsSQL generates `cockroach start` and `cockroach mt // start-sql` arguments that are relevant for the SQL layers, used by both KV // and storage layers. func (c *SyncedCluster) generateStartFlagsSQL(node Node, startOpts StartOpts) []string { var args []string - args = append(args, fmt.Sprintf("--max-sql-memory=%d%%", c.maybeScaleMem(25))) + formatArg := func(m int) string { + return fmt.Sprintf("--max-sql-memory=%d%%", c.maybeScaleMem(m)) + } + + if idx := argExists(startOpts.ExtraArgs, "--max-sql-memory"); idx == -1 { + args = append(args, formatArg(25)) + } else { + arg := startOpts.ExtraArgs[idx] + matches := maxSQLMemoryRE.FindStringSubmatch(arg) + mem, err := strconv.ParseInt(matches[1], 10, 64) + if err != nil { + panic(fmt.Sprintf("invalid --max-sql-memory parameter: %s", arg)) + } + + startOpts.ExtraArgs[idx] = formatArg(int(mem)) + } + if startOpts.Target == StartServiceForVirtualCluster { args = append(args, "--store", c.InstanceStoreDir(node, startOpts.VirtualClusterName, startOpts.SQLInstance)) }