From 581e671f813102b521b6ac0eed480bbf4e23ee18 Mon Sep 17 00:00:00 2001 From: Renato Costa Date: Thu, 9 Mar 2023 17:30:29 -0500 Subject: [PATCH 1/2] roachtest: randomly run with runtime assertions by default This changes the semantics of `t.Cockroach()` to use a binary with runtime assertions and metamorphic constants enabled by default. Performance tests (indicated by the benchmark TestSpec) will continue using the standard binary, without runtime assertions, as usual. This commit also opts-out other tests that cannot easily be run with runtime assertions or metamorphic constants enabled, most often due to timeouts and metamorphic constant incompatibility. Resolves #86678. Informs #94986. Epic: none Release note: None --- .../nightlies/roachtest_compile_component.sh | 8 ++- .../nightlies/roachtest_nightly_aws.sh | 2 +- .../nightlies/roachtest_nightly_azure.sh | 2 +- .../nightlies/roachtest_nightly_gce.sh | 2 +- pkg/cmd/roachtest/cluster.go | 54 ++++++++++++++++-- .../roachtest/cluster/cluster_interface.go | 4 ++ pkg/cmd/roachtest/cluster_test.go | 6 +- .../clusterupgrade/clusterupgrade.go | 5 +- pkg/cmd/roachtest/run.go | 9 +++ pkg/cmd/roachtest/test/test_interface.go | 20 +++++-- pkg/cmd/roachtest/test_impl.go | 57 +++++++++++++++++-- pkg/cmd/roachtest/test_runner.go | 2 +- pkg/cmd/roachtest/tests/allocator.go | 10 ++-- pkg/cmd/roachtest/tests/asyncpg.go | 11 +++- pkg/cmd/roachtest/tests/backup.go | 9 +-- .../tests/backup_restore_roundtrip.go | 2 +- pkg/cmd/roachtest/tests/cdc.go | 5 +- pkg/cmd/roachtest/tests/copy.go | 17 +++++- .../roachtest/tests/import_cancellation.go | 5 +- pkg/cmd/roachtest/tests/kvbench.go | 1 + .../roachtest/tests/multitenant_distsql.go | 2 + pkg/cmd/roachtest/tests/multitenant_utils.go | 10 +++- pkg/cmd/roachtest/tests/mvcc_gc.go | 34 ++++++++--- pkg/cmd/roachtest/tests/pgjdbc.go | 11 ++++ pkg/cmd/roachtest/tests/schemachange.go | 4 +- pkg/cmd/roachtest/tests/split.go | 12 +++- pkg/cmd/roachtest/tests/sqlsmith.go | 8 ++- pkg/cmd/roachtest/tests/tpce.go | 6 +- pkg/cmd/roachtest/tests/tpch_concurrency.go | 6 +- pkg/cmd/roachtest/tests/util.go | 50 +++++++--------- pkg/roachprod/install/cockroach.go | 21 ++++++- 31 files changed, 311 insertions(+), 84 deletions(-) diff --git a/build/teamcity/cockroach/nightlies/roachtest_compile_component.sh b/build/teamcity/cockroach/nightlies/roachtest_compile_component.sh index fcae0cab5576..d872ef9920af 100755 --- a/build/teamcity/cockroach/nightlies/roachtest_compile_component.sh +++ b/build/teamcity/cockroach/nightlies/roachtest_compile_component.sh @@ -83,7 +83,13 @@ case "$component" in ;; roachtest) # Roachtest binary. - bazel_args=(//pkg/cmd/roachtest) + # N.B. We always compile the roachtest binary with crdb_test so the same serialization + # on the wire is established with cockroach binaries built under crdb_test. + # E.g. KVNemesisSeq is used only under crdb_test builds. If the cockroach binary is + # built with crdb_test, it will expect this field to be sent by the roachtest runner. + # Note that the opposite is not true, and a cockroach binary built without crdb_test + # is still compatible with a roachtest binary built with it. + bazel_args=(//pkg/cmd/roachtest --crdb_test) artifacts=("pkg/cmd/roachtest/roachtest_/roachtest:bin/roachtest.$os-$arch") ;; *) diff --git a/build/teamcity/cockroach/nightlies/roachtest_nightly_aws.sh b/build/teamcity/cockroach/nightlies/roachtest_nightly_aws.sh index 2928bab4cdc3..02a48f874a7e 100755 --- a/build/teamcity/cockroach/nightlies/roachtest_nightly_aws.sh +++ b/build/teamcity/cockroach/nightlies/roachtest_nightly_aws.sh @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" source "$dir/teamcity-support.sh" # For $root source "$dir/teamcity-bazel-support.sh" # For run_bazel -BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \ +BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AWS_ACCESS_KEY_ID -e AWS_ACCESS_KEY_ID_ASSUME_ROLE -e AWS_KMS_KEY_ARN_A -e AWS_KMS_KEY_ARN_B -e AWS_KMS_REGION_A -e AWS_KMS_REGION_B -e AWS_ROLE_ARN -e AWS_SECRET_ACCESS_KEY -e AWS_SECRET_ACCESS_KEY_ASSUME_ROLE -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \ run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh diff --git a/build/teamcity/cockroach/nightlies/roachtest_nightly_azure.sh b/build/teamcity/cockroach/nightlies/roachtest_nightly_azure.sh index 8d5c5e844221..118c0680f7a6 100755 --- a/build/teamcity/cockroach/nightlies/roachtest_nightly_azure.sh +++ b/build/teamcity/cockroach/nightlies/roachtest_nightly_azure.sh @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" source "$dir/teamcity-support.sh" # For $root source "$dir/teamcity-bazel-support.sh" # For run_bazel -BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AZURE_CLIENT_ID -e AZURE_CLIENT_SECRET -e AZURE_SUBSCRIPTION_ID -e AZURE_TENANT_ID -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \ +BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e AZURE_CLIENT_ID -e AZURE_CLIENT_SECRET -e AZURE_SUBSCRIPTION_ID -e AZURE_TENANT_ID -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \ run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh diff --git a/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh b/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh index 7d798a357850..e0899cef5f97 100755 --- a/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh +++ b/build/teamcity/cockroach/nightlies/roachtest_nightly_gce.sh @@ -7,5 +7,5 @@ dir="$(dirname $(dirname $(dirname $(dirname "${0}"))))" source "$dir/teamcity-support.sh" # For $root source "$dir/teamcity-bazel-support.sh" # For run_bazel -BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY" \ +BAZEL_SUPPORT_EXTRA_DOCKER_ARGS="-e LITERAL_ARTIFACTS_DIR=$root/artifacts -e BUILD_VCS_NUMBER -e CLOUD -e COCKROACH_DEV_LICENSE -e TESTS -e COUNT -e GITHUB_API_TOKEN -e GITHUB_ORG -e GITHUB_REPO -e GOOGLE_EPHEMERAL_CREDENTIALS -e GOOGLE_KMS_KEY_A -e GOOGLE_KMS_KEY_B -e GOOGLE_CREDENTIALS_ASSUME_ROLE -e GOOGLE_SERVICE_ACCOUNT -e SLACK_TOKEN -e TC_BUILDTYPE_ID -e TC_BUILD_BRANCH -e TC_BUILD_ID -e TC_SERVER_URL -e SELECT_PROBABILITY -e COCKROACH_RANDOM_SEED -e ROACHTEST_ASSERTIONS_ENABLED_SEED" \ run_bazel build/teamcity/cockroach/nightlies/roachtest_nightly_impl.sh diff --git a/pkg/cmd/roachtest/cluster.go b/pkg/cmd/roachtest/cluster.go index 78f14e77e2e7..84c204905ce2 100644 --- a/pkg/cmd/roachtest/cluster.go +++ b/pkg/cmd/roachtest/cluster.go @@ -17,6 +17,7 @@ import ( "fmt" "io" "io/fs" + "math/rand" "net" "net/url" "os" @@ -689,8 +690,13 @@ type clusterImpl struct { // BAZEL_COVER_DIR will be set to this value when starting a node. goCoverDir string - os string // OS of the cluster - arch vm.CPUArch // CPU architecture of the cluster + os string // OS of the cluster + arch vm.CPUArch // CPU architecture of the cluster + randomSeed struct { + mu syncutil.Mutex + seed *int64 + } + // destroyState contains state related to the cluster's destruction. destroyState destroyState } @@ -2003,10 +2009,16 @@ func (c *clusterImpl) StartE( startOpts.RoachprodOpts.EncryptedStores = c.encAtRest - if !envExists(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH") { - // Panic on span use-after-Finish, so we catch such bugs. - settings.Env = append(settings.Env, "COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH=true") + setUnlessExists := func(name string, value interface{}) { + if !envExists(settings.Env, name) { + settings.Env = append(settings.Env, fmt.Sprintf("%s=%s", name, fmt.Sprint(value))) + } } + // Panic on span use-after-Finish, so we catch such bugs. + setUnlessExists("COCKROACH_CRASH_ON_SPAN_USE_AFTER_FINISH", true) + // Set the same seed on every node, to be used by builds with + // runtime assertions enabled. + setUnlessExists("COCKROACH_RANDOM_SEED", c.cockroachRandomSeed()) // Needed for backward-compat on crdb_internal.ranges{_no_leases}. // Remove in v23.2. @@ -2071,6 +2083,38 @@ func (c *clusterImpl) RefetchCertsFromNode(ctx context.Context, node int) error }) } +// SetRandomSeed sets the random seed to be used by the cluster. If +// not called, clusters generate a random seed from the global +// generator in the `rand` package. This function must be called +// before any nodes in the cluster start. +func (c *clusterImpl) SetRandomSeed(seed int64) { + c.randomSeed.seed = &seed +} + +// cockroachRandomSeed returns the `COCKROACH_RANDOM_SEED` to be used +// by this cluster. The seed may have been previously set by a +// previous call to `StartE`, or by the user via `SetRandomSeed`. If +// not set, this function will generate a seed and return it. +func (c *clusterImpl) cockroachRandomSeed() int64 { + c.randomSeed.mu.Lock() + defer c.randomSeed.mu.Unlock() + + // If the user provided a seed via environment variable, always use + // that, even if the test attempts to set a different seed. + if seedStr := os.Getenv(test.EnvAssertionsEnabledSeed); seedStr != "" { + seedOverride, err := strconv.ParseInt(seedStr, 0, 64) + if err != nil { + panic(fmt.Sprintf("error parsing %s: %s", test.EnvAssertionsEnabledSeed, err)) + } + c.randomSeed.seed = &seedOverride + } else if c.randomSeed.seed == nil { + seed := rand.Int63() + c.randomSeed.seed = &seed + } + + return *c.randomSeed.seed +} + // Start is like StartE() except that it will fatal the test on error. func (c *clusterImpl) Start( ctx context.Context, diff --git a/pkg/cmd/roachtest/cluster/cluster_interface.go b/pkg/cmd/roachtest/cluster/cluster_interface.go index 9ee79b50021f..bf63794c1e20 100644 --- a/pkg/cmd/roachtest/cluster/cluster_interface.go +++ b/pkg/cmd/roachtest/cluster/cluster_interface.go @@ -49,6 +49,10 @@ type Cluster interface { ctx context.Context, content, dest string, mode os.FileMode, opts ...option.Option, ) error + // SetRandomSeed allows tests to set their own random seed to be + // used by builds with runtime assertions enabled. + SetRandomSeed(seed int64) + // Starting and stopping CockroachDB. StartE(ctx context.Context, l *logger.Logger, startOpts option.StartOpts, settings install.ClusterSettings, opts ...option.Option) error diff --git a/pkg/cmd/roachtest/cluster_test.go b/pkg/cmd/roachtest/cluster_test.go index 2a4682537ecc..feb893a3e12d 100644 --- a/pkg/cmd/roachtest/cluster_test.go +++ b/pkg/cmd/roachtest/cluster_test.go @@ -72,7 +72,11 @@ func (t testWrapper) Cockroach() string { return "./dummy-path/to/cockroach" } -func (t testWrapper) CockroachShort() string { +func (t testWrapper) StandardCockroach() string { + return "./dummy-path/to/cockroach" +} + +func (t testWrapper) RuntimeAssertionsCockroach() string { return "./dummy-path/to/cockroach-short" } diff --git a/pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go b/pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go index 2c4393566d02..d84d3780c89b 100644 --- a/pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go +++ b/pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go @@ -135,7 +135,10 @@ func UploadVersion( newVersion *Version, ) (string, error) { dstBinary := BinaryPathForVersion(t, newVersion) - srcBinary := t.Cockroach() + // Run with standard binary as older versions retrieved through roachprod stage + // are not currently available with crdb_test enabled. + // TODO(DarrylWong): Compile older versions with crdb_test flag. + srcBinary := t.StandardCockroach() overrideBinary, isOverriden := t.VersionsBinaryOverride()[newVersion.String()] if isOverriden { diff --git a/pkg/cmd/roachtest/run.go b/pkg/cmd/roachtest/run.go index 7edacd7de831..5fcdde4efa53 100644 --- a/pkg/cmd/roachtest/run.go +++ b/pkg/cmd/roachtest/run.go @@ -13,6 +13,7 @@ package main import ( "context" "fmt" + "math/rand" "net/http" "os" "os/signal" @@ -29,6 +30,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/log/logconfig" "github.com/cockroachdb/cockroach/pkg/util/log/logpb" + "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/stop" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" @@ -41,6 +43,7 @@ var ( parallelism int cpuQuota int + globalSeed int64 // Path to a local dir where the test logs and artifacts collected from // cluster will be placed. @@ -153,6 +156,9 @@ func addRunBenchCommonFlags(cmd *cobra.Command) { cmd.Flags().BoolVar( &forceCloudCompat, "force-cloud-compat", false, "Includes tests that are not marked as compatible with the cloud used") addSuiteAndOwnerFlags(cmd) + cmd.Flags().Int64Var( + &globalSeed, "global-seed", randutil.NewPseudoSeed(), + "The global random seed used for all tests.") } func addRunFlags(runCmd *cobra.Command) { @@ -179,6 +185,8 @@ func addBenchFlags(benchCmd *cobra.Command) { // runTests is the main function for the run and bench commands. // Assumes initRunFlagsBinariesAndLibraries was called. func runTests(register func(registry.Registry), filter *registry.TestFilter) error { + //lint:ignore SA1019 deprecated + rand.Seed(globalSeed) r := makeTestRegistry(cloud) // actual registering of tests @@ -257,6 +265,7 @@ func runTests(register func(registry.Registry), filter *registry.TestFilter) err literalArtifactsDir: literalArtifactsDir, runnerLogPath: runnerLogPath, } + l.Printf("global random seed: %d", globalSeed) go func() { if err := http.ListenAndServe( fmt.Sprintf(":%d", promPort), diff --git a/pkg/cmd/roachtest/test/test_interface.go b/pkg/cmd/roachtest/test/test_interface.go index 358ff240cb3f..19d09f6ef7c2 100644 --- a/pkg/cmd/roachtest/test/test_interface.go +++ b/pkg/cmd/roachtest/test/test_interface.go @@ -20,13 +20,25 @@ import ( // cluster. const DefaultCockroachPath = "./cockroach-default" +// EnvAssertionsEnabledSeed is the name of the environment variable +// that, when set, causes roachtest to use a binary with runtime +// assertions enabled (if available), using the random seed contained +// in that environment variable. +var EnvAssertionsEnabledSeed = "ROACHTEST_ASSERTIONS_ENABLED_SEED" + // Test is the interface through which roachtests interact with the // test harness. type Test interface { - Cockroach() string // path to main cockroach binary - // CockroachShort returns the path to cockroach-short binary compiled with - // --crdb_test build tag, or an empty string if no such binary was given. - CockroachShort() string + // StandardCockroach returns path to main cockroach binary, compiled + // without runtime assertions. + StandardCockroach() string + // RuntimeAssertionsCockroach returns the path to cockroach-short + // binary compiled with --crdb_test build tag, or an empty string if + // no such binary was given. + RuntimeAssertionsCockroach() string + // Cockroach returns either StandardCockroach or RuntimeAssertionsCockroach, + // picked randomly. + Cockroach() string Name() string BuildVersion() *version.Version IsBuildVersion(string) bool // "vXX.YY" diff --git a/pkg/cmd/roachtest/test_impl.go b/pkg/cmd/roachtest/test_impl.go index 4e405fef2b55..38f7a007d719 100644 --- a/pkg/cmd/roachtest/test_impl.go +++ b/pkg/cmd/roachtest/test_impl.go @@ -14,11 +14,14 @@ import ( "context" "fmt" "io" + "math/rand" "os" "strings" + "sync" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/util/syncutil" @@ -59,8 +62,12 @@ type failure struct { type testImpl struct { spec *registry.TestSpec - cockroach string // path to main cockroach binary - cockroachShort string // path to cockroach-short binary compiled with --crdb_test build tag + cockroach string // path to main cockroach binary + cockroachEA string // path to cockroach-short binary compiled with --crdb_test build tag + + randomCockroachOnce sync.Once + randomizedCockroach string // either `cockroach` or `cockroach-short`, picked randomly + deprecatedWorkload string // path to workload binary debug bool // whether the test is in debug mode. // buildVersion is the version of the Cockroach binary that the test will run @@ -131,13 +138,51 @@ func (t *testImpl) BuildVersion() *version.Version { return t.buildVersion } -// Cockroach returns the path to the cockroach binary. +// Cockroach will return either `RuntimeAssertionsCockroach()` or +// `StandardCockroach()`, picked randomly. Once a random choice has +// been made, the same binary will be returned on every call to +// `Cockroach`, to avoid errors that may arise from binaries having a +// different value for metamorphic constants. func (t *testImpl) Cockroach() string { - return t.cockroach + // If the test is a benchmark test, we don't want to enable assertions + // as it will slow down performance. + if t.spec.Benchmark { + t.l.Printf("Benchmark test, running with standard cockroach") + return t.StandardCockroach() + } + t.randomCockroachOnce.Do(func() { + assertionsEnabledProbability := 0.5 + // If the user specified a custom seed to be used with runtime + // assertions, assume they want to run the test with assertions + // enabled, making it easier to reproduce issues. + if os.Getenv(test.EnvAssertionsEnabledSeed) != "" { + assertionsEnabledProbability = 1 + } + + if rand.Float64() < assertionsEnabledProbability { + // The build with runtime assertions should exist in every nightly + // CI build, but we can't assume it exists in every roachtest call. + if path := t.RuntimeAssertionsCockroach(); path != "" { + t.l.Printf("Runtime assertions enabled") + t.randomizedCockroach = path + return + } else { + t.l.Printf("WARNING: running without runtime assertions since the corresponding binary was not specified") + } + } + t.l.Printf("Runtime assertions disabled") + t.randomizedCockroach = t.StandardCockroach() + }) + + return t.randomizedCockroach } -func (t *testImpl) CockroachShort() string { - return t.cockroachShort +func (t *testImpl) RuntimeAssertionsCockroach() string { + return t.cockroachEA +} + +func (t *testImpl) StandardCockroach() string { + return t.cockroach } func (t *testImpl) DeprecatedWorkload() string { diff --git a/pkg/cmd/roachtest/test_runner.go b/pkg/cmd/roachtest/test_runner.go index 3db4a6edf5af..0327e10bd42d 100644 --- a/pkg/cmd/roachtest/test_runner.go +++ b/pkg/cmd/roachtest/test_runner.go @@ -679,7 +679,7 @@ func (r *testRunner) runWorker( t := &testImpl{ spec: &testToRun.spec, cockroach: cockroach[arch], - cockroachShort: cockroachEA[arch], + cockroachEA: cockroachEA[arch], deprecatedWorkload: workload[arch], buildVersion: binaryVersion, artifactsDir: testArtifactsDir, diff --git a/pkg/cmd/roachtest/tests/allocator.go b/pkg/cmd/roachtest/tests/allocator.go index a638c54ebeeb..a69c60e953c1 100644 --- a/pkg/cmd/roachtest/tests/allocator.go +++ b/pkg/cmd/roachtest/tests/allocator.go @@ -172,10 +172,12 @@ func registerAllocator(r registry.Registry) { }, }) r.Add(registry.TestSpec{ - Name: `replicate/wide`, - Owner: registry.OwnerKV, - Benchmark: true, - Timeout: 10 * time.Minute, + Name: `replicate/wide`, + Owner: registry.OwnerKV, + Benchmark: true, + // Allow a longer running time to account for runs that use a + // cockroach build with runtime assertions enabled. + Timeout: 30 * time.Minute, Cluster: r.MakeClusterSpec(9, spec.CPU(1)), Leases: registry.MetamorphicLeases, CompatibleClouds: registry.AllExceptAWS, diff --git a/pkg/cmd/roachtest/tests/asyncpg.go b/pkg/cmd/roachtest/tests/asyncpg.go index d69e30cdc012..24a52667686f 100644 --- a/pkg/cmd/roachtest/tests/asyncpg.go +++ b/pkg/cmd/roachtest/tests/asyncpg.go @@ -47,7 +47,16 @@ func registerAsyncpg(r registry.Registry) { node := c.Node(1) t.Status("setting up cockroach") c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) - c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), install.MakeClusterSettings(), c.All()) + + // This test assumes that multiple_active_portals_enabled is false, but through + // metamorphic constants, it is possible for them to be enabled. We disable + // metamorphic testing to avoid this. Note the asyncpg test suite drops the + // database so we can't set the session variable like we do in pgjdbc. + // TODO(DarrylWong): Use a metamorphic constants exclusion list instead. + // See: https://github.com/cockroachdb/cockroach/issues/113164 + settings := install.MakeClusterSettings() + settings.Env = append(settings.Env, "COCKROACH_INTERNAL_DISABLE_METAMORPHIC_TESTING=true") + c.Start(ctx, t.L(), option.DefaultStartOptsInMemory(), settings, c.All()) version, err := fetchCockroachVersion(ctx, t.L(), c, node[0]) if err != nil { diff --git a/pkg/cmd/roachtest/tests/backup.go b/pkg/cmd/roachtest/tests/backup.go index 82fa92c385ef..76da662ff014 100644 --- a/pkg/cmd/roachtest/tests/backup.go +++ b/pkg/cmd/roachtest/tests/backup.go @@ -96,12 +96,13 @@ func importBankDataSplit( ) string { dest := destinationName(c) + cockroach := t.Cockroach() + c.Put(ctx, cockroach, "./cockroach") c.Put(ctx, t.DeprecatedWorkload(), "./workload") - c.Put(ctx, t.Cockroach(), "./cockroach") // NB: starting the cluster creates the logs dir as a side effect, // needed below. - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings()) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings()) runImportBankDataSplit(ctx, rows, ranges, t, c) return dest } @@ -194,7 +195,7 @@ func runImportBankDataSplit(ctx context.Context, rows, ranges int, t test.Test, } func importBankData(ctx context.Context, rows int, t test.Test, c cluster.Cluster) string { - return importBankDataSplit(ctx, rows, 0 /* ranges */, t, c) + return importBankDataSplit(ctx, rows, 0, t, c) } func registerBackupNodeShutdown(r registry.Registry) { @@ -746,7 +747,7 @@ func runBackupMVCCRangeTombstones( if !config.skipClusterSetup { c.Put(ctx, t.Cockroach(), "./cockroach") c.Put(ctx, t.DeprecatedWorkload(), "./workload") // required for tpch - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings()) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings()) } t.Status("starting csv servers") c.Run(ctx, c.All(), `./cockroach workload csv-server --port=8081 &> logs/workload-csv-server.log < /dev/null &`) diff --git a/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go b/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go index 33222fa54553..835b49997f8f 100644 --- a/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go +++ b/pkg/cmd/roachtest/tests/backup_restore_roundtrip.go @@ -103,7 +103,7 @@ func backupRestoreRoundTrip( "COCKROACH_MIN_RANGE_MAX_BYTES=1", }) - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 50), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes) m := c.NewMonitor(ctx, roachNodes) m.Go(func(ctx context.Context) error { diff --git a/pkg/cmd/roachtest/tests/cdc.go b/pkg/cmd/roachtest/tests/cdc.go index aa558fee7c9a..a4548de6a797 100644 --- a/pkg/cmd/roachtest/tests/cdc.go +++ b/pkg/cmd/roachtest/tests/cdc.go @@ -619,7 +619,10 @@ func newCDCTester(ctx context.Context, t test.Test, c cluster.Cluster) cdcTester settings.Env = append(settings.Env, envVars...) - c.Put(ctx, t.Cockroach(), "./cockroach") + // Allow cockroach with runtime assertions enabled unless this is a + // performance test. + cockroach := t.Cockroach() + c.Put(ctx, cockroach, "./cockroach") c.Start(ctx, t.L(), startOpts, settings, tester.crdbNodes) c.Put(ctx, t.DeprecatedWorkload(), "./workload", tester.workloadNode) tester.startGrafana() diff --git a/pkg/cmd/roachtest/tests/copy.go b/pkg/cmd/roachtest/tests/copy.go index 828b9da4425d..f1081730868d 100644 --- a/pkg/cmd/roachtest/tests/copy.go +++ b/pkg/cmd/roachtest/tests/copy.go @@ -49,7 +49,20 @@ func registerCopy(r registry.Registry) { c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.All()) - c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.All()) + // We run this without metamorphic constants as kv-batch-size = 1 makes + // this test take far too long to complete. + // TODO(DarrylWong): Use a metamorphic constants exclusion list instead. + // See: https://github.com/cockroachdb/cockroach/issues/113164 + settings := install.MakeClusterSettings() + settings.Env = append(settings.Env, "COCKROACH_INTERNAL_DISABLE_METAMORPHIC_TESTING=true") + c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, c.All()) + + // Make sure the copy commands have sufficient time to finish when + // runtime assertions are enabled. + copyTimeout := 10 * time.Minute + if usingRuntimeAssertions(t) { + copyTimeout = 20 * time.Minute + } m := c.NewMonitor(ctx, c.All()) m.Go(func(ctx context.Context) error { @@ -100,7 +113,7 @@ func registerCopy(r registry.Registry) { QueryRowContext(ctx context.Context, query string, args ...interface{}) *gosql.Row } runCopy := func(ctx context.Context, qu querier) error { - ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) // avoid infinite internal retries + ctx, cancel := context.WithTimeout(ctx, copyTimeout) // avoid infinite internal retries defer cancel() for lastID := -1; lastID+1 < rows; { diff --git a/pkg/cmd/roachtest/tests/import_cancellation.go b/pkg/cmd/roachtest/tests/import_cancellation.go index b925f3cc51a6..77393b80ff6d 100644 --- a/pkg/cmd/roachtest/tests/import_cancellation.go +++ b/pkg/cmd/roachtest/tests/import_cancellation.go @@ -20,7 +20,6 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" - "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" @@ -51,7 +50,9 @@ func registerImportCancellation(r registry.Registry) { func runImportCancellation(ctx context.Context, t test.Test, c cluster.Cluster) { c.Put(ctx, t.Cockroach(), "./cockroach") c.Put(ctx, t.DeprecatedWorkload(), "./workload") // required for tpch - c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) + startOpts := maybeUseMemoryBudget(t, 50) + startOpts.RoachprodOpts.ScheduleBackups = true + c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings()) t.Status("starting csv servers") c.Run(ctx, c.All(), `./cockroach workload csv-server --port=8081 &> logs/workload-csv-server.log < /dev/null &`) diff --git a/pkg/cmd/roachtest/tests/kvbench.go b/pkg/cmd/roachtest/tests/kvbench.go index 462ce6d2acc0..9afe8dd81c16 100644 --- a/pkg/cmd/roachtest/tests/kvbench.go +++ b/pkg/cmd/roachtest/tests/kvbench.go @@ -96,6 +96,7 @@ func registerKVBenchSpec(r registry.Registry, b kvBenchSpec) { Suites: registry.ManualOnly, Tags: registry.Tags("manual"), Owner: registry.OwnerKV, + Benchmark: true, Cluster: nodes, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runKVBench(ctx, t, c, b) diff --git a/pkg/cmd/roachtest/tests/multitenant_distsql.go b/pkg/cmd/roachtest/tests/multitenant_distsql.go index af24cc4fd19e..edef1bd5fa52 100644 --- a/pkg/cmd/roachtest/tests/multitenant_distsql.go +++ b/pkg/cmd/roachtest/tests/multitenant_distsql.go @@ -59,6 +59,8 @@ func runMultiTenantDistSQL( bundle bool, timeoutMillis int, ) { + // This test fails when runtime assertions are enabled. + // See: https://github.com/cockroachdb/cockroach/issues/113170 c.Put(ctx, t.Cockroach(), "./cockroach") // This test sets a smaller default range size than the default due to // performance and resource limitations. We set the minimum range max bytes to diff --git a/pkg/cmd/roachtest/tests/multitenant_utils.go b/pkg/cmd/roachtest/tests/multitenant_utils.go index ab48b15c516d..3ad051cc6d04 100644 --- a/pkg/cmd/roachtest/tests/multitenant_utils.go +++ b/pkg/cmd/roachtest/tests/multitenant_utils.go @@ -14,6 +14,7 @@ import ( "context" gosql "database/sql" "fmt" + "math/rand" "net/url" "os" "path/filepath" @@ -187,10 +188,12 @@ func (tn *tenantNode) start(ctx context.Context, t test.Test, c cluster.Cluster, "--store=" + tn.storeDir()} internalIPs, err := c.InternalIP(ctx, t.L(), c.Node(tn.node)) + randomSeed := rand.Int63() + c.SetRandomSeed(randomSeed) require.NoError(t, err) tn.errCh = startTenantServer( ctx, c, c.Node(tn.node), internalIPs[0], binary, tn.kvAddrs, tn.tenantID, - tn.httpPort, tn.sqlPort, tn.envVars, + tn.httpPort, tn.sqlPort, tn.envVars, randomSeed, extraArgs..., ) @@ -254,6 +257,7 @@ func startTenantServer( httpPort int, sqlPort int, envVars []string, + randomSeed int64, extraFlags ...string, ) chan error { args := []string{ @@ -268,6 +272,10 @@ func startTenantServer( errCh := make(chan error, 1) go func() { + // Set the same random seed for every tenant; this is picked up by + // runs that use a build with runtime assertions enabled, and + // ignored otherwise. + envVars = append(envVars, fmt.Sprintf("COCKROACH_RANDOM_SEED=%d", randomSeed)) errCh <- c.RunE(tenantCtx, node, append(append(append([]string{}, envVars...), binary, "mt", "start-sql"), args...)..., ) diff --git a/pkg/cmd/roachtest/tests/mvcc_gc.go b/pkg/cmd/roachtest/tests/mvcc_gc.go index 2c83a961e481..1aee02e3798c 100644 --- a/pkg/cmd/roachtest/tests/mvcc_gc.go +++ b/pkg/cmd/roachtest/tests/mvcc_gc.go @@ -78,6 +78,15 @@ func runMVCCGC(ctx context.Context, t test.Test, c cluster.Cluster) { // How long to wait for data to be GCd during assert loop. const gcRetryTimeout = 7 * time.Minute + var randomSeed int64 + if usingRuntimeAssertions(t) { + // Do not use `0` as that is reserved to mean that we are running + // without runtime assertions. + for randomSeed == 0 { + randomSeed = rand.Int63() + } + c.SetRandomSeed(randomSeed) + } c.Put(ctx, t.Cockroach(), "./cockroach") s := install.MakeClusterSettings() s.Env = append(s.Env, "COCKROACH_SCAN_INTERVAL=30s") @@ -139,7 +148,7 @@ func runMVCCGC(ctx context.Context, t test.Test, c cluster.Cluster) { t.L().Printf("performing clean-assert cycle #%d", i) if err := retry.WithMaxAttempts(ctx, retry.Options{}, 3, func() error { - return deleteSomeTableDataWithOverlappingTombstones(ctx, t, c, conn, rng, m, 5) + return deleteSomeTableDataWithOverlappingTombstones(ctx, t, c, conn, rng, m, 5, randomSeed) }); err != nil { t.Fatal(err) } @@ -163,7 +172,7 @@ func runMVCCGC(ctx context.Context, t test.Test, c cluster.Cluster) { wlCancel() if err := retry.WithMaxAttempts(ctx, retry.Options{}, 3, func() error { - err := deleteAllTableDataWithOverlappingTombstones(ctx, t, c, conn, rng, m, 5) + err := deleteAllTableDataWithOverlappingTombstones(ctx, t, c, conn, rng, m, 5, randomSeed) if err != nil { return err } @@ -481,6 +490,7 @@ func deleteAllTableDataWithOverlappingTombstones( rng *rand.Rand, tm tableMetadata, fragments int, + randomSeed int64, ) error { t.Helper() encodeKey := func(index int64) roachpb.Key { @@ -514,7 +524,7 @@ func deleteAllTableDataWithOverlappingTombstones( t.L().Printf("adding range tombstone [%s, %s)", startKey, endKey) addDeleteRangeUsingTombstone(&ba, startKey, endKey) } - br, err := sendBatchRequest(ctx, t, c, 1, ba) + br, err := sendBatchRequest(ctx, t, c, 1, ba, randomSeed) if err != nil { return err } @@ -533,6 +543,7 @@ func deleteSomeTableDataWithOverlappingTombstones( rng *rand.Rand, tm tableMetadata, rangeKeys int, + randomSeed int64, ) error { t.Helper() encodeKey := func(index int64) roachpb.Key { @@ -559,7 +570,7 @@ func deleteSomeTableDataWithOverlappingTombstones( t.L().Printf("adding range tombstone [%s, %s)", startKey, endKey) addDeleteRangeUsingTombstone(&ba, startKey, endKey) } - br, err := sendBatchRequest(ctx, t, c, 1, ba) + br, err := sendBatchRequest(ctx, t, c, 1, ba, randomSeed) if err != nil { return err } @@ -595,7 +606,12 @@ func addDeleteRangeUsingTombstone(ba *kvpb.BatchRequest, startKey, endKey roachp } func sendBatchRequest( - ctx context.Context, t test.Test, c cluster.Cluster, node int, ba kvpb.BatchRequest, + ctx context.Context, + t test.Test, + c cluster.Cluster, + node int, + ba kvpb.BatchRequest, + randomSeed int64, ) (kvpb.BatchResponse, error) { reqArg, err := batchToJSONOrFatal(ba) if err != nil { @@ -605,8 +621,12 @@ func sendBatchRequest( if err := c.PutString(ctx, reqArg, requestFileName, 0755, c.Node(node)); err != nil { return kvpb.BatchResponse{}, err } - res, err := c.RunWithDetailsSingleNode(ctx, t.L(), c.Node(node), "./cockroach", "debug", - "send-kv-batch", "--insecure", requestFileName) + var debugEnv string + if randomSeed != 0 { + debugEnv = fmt.Sprintf("COCKROACH_RANDOM_SEED=%d ", randomSeed) + } + res, err := c.RunWithDetailsSingleNode( + ctx, t.L(), c.Node(node), debugEnv+"./cockroach debug send-kv-batch --insecure", requestFileName) if err != nil { return kvpb.BatchResponse{}, err } diff --git a/pkg/cmd/roachtest/tests/pgjdbc.go b/pkg/cmd/roachtest/tests/pgjdbc.go index 914aa88264ad..ac3af40fb6d7 100644 --- a/pkg/cmd/roachtest/tests/pgjdbc.go +++ b/pkg/cmd/roachtest/tests/pgjdbc.go @@ -72,6 +72,17 @@ func registerPgjdbc(r registry.Registry) { } } + if usingRuntimeAssertions(t) { + // This test assumes that multiple_active_portals_enabled is false, but through + // metamorphic constants, it is possible for them to be enabled. + if _, err = db.ExecContext(ctx, "SET multiple_active_portals_enabled=false"); err != nil { + t.Fatal(err) + } + if _, err = db.ExecContext(ctx, "ALTER DATABASE defaultdb SET multiple_active_portals_enabled=false"); err != nil { + t.Fatal(err) + } + } + t.Status("cloning pgjdbc and installing prerequisites") // Report the latest tag, but do not use it. The newest versions produces output that breaks our xml parser, // and we want to pin to the working version for now. diff --git a/pkg/cmd/roachtest/tests/schemachange.go b/pkg/cmd/roachtest/tests/schemachange.go index ff4d94203cba..a501b1a40ce5 100644 --- a/pkg/cmd/roachtest/tests/schemachange.go +++ b/pkg/cmd/roachtest/tests/schemachange.go @@ -341,7 +341,9 @@ func makeIndexAddTpccTest( } func registerSchemaChangeBulkIngest(r registry.Registry) { - r.Add(makeSchemaChangeBulkIngestTest(r, 5, 100000000, time.Minute*20)) + // Allow a long running time to account for runs that use a + // cockroach build with runtime assertions enabled. + r.Add(makeSchemaChangeBulkIngestTest(r, 5, 100000000, time.Minute*60)) } func makeSchemaChangeBulkIngestTest( diff --git a/pkg/cmd/roachtest/tests/split.go b/pkg/cmd/roachtest/tests/split.go index 9dd90e3e1528..c3d49b287094 100644 --- a/pkg/cmd/roachtest/tests/split.go +++ b/pkg/cmd/roachtest/tests/split.go @@ -429,12 +429,18 @@ func runLoadSplits(ctx context.Context, t test.Test, c cluster.Cluster, params s c.Put(ctx, t.Cockroach(), "./cockroach", crdbNodes) c.Put(ctx, t.DeprecatedWorkload(), "./workload", workloadNode) + // We run this without metamorphic constants as the tests make + // incorrect assumptions about the absolute values of QPS. + // See: https://github.com/cockroachdb/cockroach/issues/112664 + // TODO(DarrylWong): enable metamorphic contants once issue is resolved + settings := install.MakeClusterSettings() + settings.Env = append(settings.Env, "COCKROACH_INTERNAL_DISABLE_METAMORPHIC_TESTING=true") startOpts := option.DefaultStartOptsNoBackups() startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs, "--vmodule=split_queue=2,store_rebalancer=2,allocator=2,replicate_queue=2,"+ "decider=3,replica_split_load=1", ) - c.Start(ctx, t.L(), startOpts, install.MakeClusterSettings(), crdbNodes) + c.Start(ctx, t.L(), startOpts, settings, crdbNodes) m := c.NewMonitor(ctx, c.All()) m.Go(func(ctx context.Context) error { @@ -581,7 +587,9 @@ func runLargeRangeSplits(ctx context.Context, t test.Test, c cluster.Cluster, si rows := size / rowEstimate const minBytes = 16 << 20 // 16 MB - c.Put(ctx, t.Cockroach(), "./cockroach", c.All()) + // Never run with runtime assertions as this makes this test take + // too long to complete. + c.Put(ctx, t.StandardCockroach(), "./cockroach", c.All()) c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.All()) numNodes := c.Spec().NodeCount c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings(), c.Node(1)) diff --git a/pkg/cmd/roachtest/tests/sqlsmith.go b/pkg/cmd/roachtest/tests/sqlsmith.go index 071fc807dd8f..a6a6580cc8cc 100644 --- a/pkg/cmd/roachtest/tests/sqlsmith.go +++ b/pkg/cmd/roachtest/tests/sqlsmith.go @@ -21,10 +21,12 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" "github.com/cockroachdb/cockroach/pkg/internal/sqlsmith" + "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/errors" ) @@ -98,9 +100,9 @@ WITH into_db = 'defaultdb', unsafe_restore_incompatible_version; rng, seed := randutil.NewTestRand() t.L().Printf("seed: %d", seed) - // With 50% chance use the cockroach-short binary that was compiled with - // --crdb_test build tag. - maybeUseBuildWithEnabledAssertions(ctx, t, c, rng, 0.5 /* eaProb */) + c.SetRandomSeed(rng.Int63()) + c.Put(ctx, t.Cockroach(), "./cockroach") + c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) setupFunc, ok := setups[setupName] if !ok { diff --git a/pkg/cmd/roachtest/tests/tpce.go b/pkg/cmd/roachtest/tests/tpce.go index 4e17fefd441e..f970da6fddc4 100644 --- a/pkg/cmd/roachtest/tests/tpce.go +++ b/pkg/cmd/roachtest/tests/tpce.go @@ -145,7 +145,9 @@ func runTPCE(ctx context.Context, t test.Test, c cluster.Cluster, opts tpceOptio if opts.start == nil { opts.start = func(ctx context.Context, t test.Test, c cluster.Cluster) { t.Status("installing cockroach") - c.Put(ctx, t.Cockroach(), "./cockroach", crdbNodes) + // Never run with runtime assertions as this makes this test take + // too long to complete. + c.Put(ctx, t.StandardCockroach(), "./cockroach", crdbNodes) startOpts := option.DefaultStartOpts() startOpts.RoachprodOpts.StoreCount = opts.ssds @@ -258,7 +260,7 @@ func registerTPCE(r registry.Registry) { }, }) - // Weekly, large sclae configuration. + // Weekly, large scale configuration. largeWeekly := tpceOptions{ customers: 100_000, nodes: 5, diff --git a/pkg/cmd/roachtest/tests/tpch_concurrency.go b/pkg/cmd/roachtest/tests/tpch_concurrency.go index 82acfca333d2..17ab82fb5a55 100644 --- a/pkg/cmd/roachtest/tests/tpch_concurrency.go +++ b/pkg/cmd/roachtest/tests/tpch_concurrency.go @@ -33,7 +33,9 @@ func registerTPCHConcurrency(r registry.Registry) { c cluster.Cluster, disableStreamer bool, ) { - c.Put(ctx, t.Cockroach(), "./cockroach", c.Range(1, numNodes-1)) + // We run this test without runtime assertions as it uses too much memory + // budget. Even increasing the max to 80% still flaked. + c.Put(ctx, t.StandardCockroach(), "./cockroach", c.Range(1, numNodes-1)) c.Put(ctx, t.DeprecatedWorkload(), "./workload", c.Node(numNodes)) c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(), c.Range(1, numNodes-1)) @@ -54,7 +56,7 @@ func registerTPCHConcurrency(r registry.Registry) { restartCluster := func(ctx context.Context, c cluster.Cluster, t test.Test) { c.Stop(ctx, t.L(), option.DefaultStopOpts(), c.Range(1, numNodes-1)) - c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(), c.Range(1, numNodes-1)) + c.Start(ctx, t.L(), maybeUseMemoryBudget(t, 35), install.MakeClusterSettings(), c.Range(1, numNodes-1)) } // checkConcurrency returns an error if at least one node of the cluster diff --git a/pkg/cmd/roachtest/tests/util.go b/pkg/cmd/roachtest/tests/util.go index df13b17ef40d..131561f69746 100644 --- a/pkg/cmd/roachtest/tests/util.go +++ b/pkg/cmd/roachtest/tests/util.go @@ -15,14 +15,12 @@ import ( gosql "database/sql" "fmt" "io" - "math/rand" "net/http" "time" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/option" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" - "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/util/httputil" "github.com/cockroachdb/cockroach/pkg/util/retry" "github.com/cockroachdb/cockroach/pkg/util/timeutil" @@ -207,31 +205,27 @@ func setAdmissionControl(ctx context.Context, t test.Test, c cluster.Cluster, en } } -// maybeUseBuildWithEnabledAssertions stages the cockroach-short binary with -// enabled assertions with eaProb probability if that binary is available, -// otherwise stages the regular cockroach binary, and starts the cluster. -func maybeUseBuildWithEnabledAssertions( - ctx context.Context, t test.Test, c cluster.Cluster, rng *rand.Rand, eaProb float64, -) { - if rng.Float64() < eaProb { - // Check whether the cockroach-short binary is available. - if t.CockroachShort() != "" { - randomSeed := rng.Int63() - t.Status( - "using cockroach-short binary compiled with --crdb_test "+ - "build tag and COCKROACH_RANDOM_SEED=", randomSeed, - ) - c.Put(ctx, t.CockroachShort(), "./cockroach") - // We need to ensure that all nodes in the cluster start with the - // same random seed (if not, some assumptions can be violated - for - // example that coldata.BatchSize() values are the same on all - // nodes). - settings := install.MakeClusterSettings() - settings.Env = append(settings.Env, fmt.Sprintf("COCKROACH_RANDOM_SEED=%d", randomSeed)) - c.Start(ctx, t.L(), option.DefaultStartOpts(), settings) - return - } +// usingRuntimeAssertions returns true if calls to `t.Cockroach()` for +// this test will return the cockroach build with runtime +// assertions. Note that calling this function only makes sense if the +// test uploads cockroach using `t.Cockroach` (instead of calling +// t.StandardCockroach or t.RuntimeAssertionsCockroach directly). +func usingRuntimeAssertions(t test.Test) bool { + return t.Cockroach() == t.RuntimeAssertionsCockroach() +} + +// maybeUseMemoryBudget returns a StartOpts with the specified --max-sql-memory +// if runtime assertions are enabled, and the default values otherwise. +// A scheduled backup will not begin at the start of the roachtest. +func maybeUseMemoryBudget(t test.Test, budget int) option.StartOpts { + startOpts := option.DefaultStartOptsNoBackups() + if usingRuntimeAssertions(t) { + // When running tests with runtime assertions enabled, increase + // SQL's memory budget to avoid 'budget exceeded' failures. + startOpts.RoachprodOpts.ExtraArgs = append( + startOpts.RoachprodOpts.ExtraArgs, + fmt.Sprintf("--max-sql-memory=%d%%", budget), + ) } - c.Put(ctx, t.Cockroach(), "./cockroach") - c.Start(ctx, t.L(), option.DefaultStartOpts(), install.MakeClusterSettings()) + return startOpts } diff --git a/pkg/roachprod/install/cockroach.go b/pkg/roachprod/install/cockroach.go index 5d5cfa8bd629..2be1e626a704 100644 --- a/pkg/roachprod/install/cockroach.go +++ b/pkg/roachprod/install/cockroach.go @@ -19,6 +19,7 @@ import ( "os" "os/exec" "path/filepath" + "regexp" "runtime" "strconv" "strings" @@ -911,12 +912,30 @@ func (c *SyncedCluster) generateStartFlagsKV(node Node, startOpts StartOpts) []s return args } +var maxSQLMemoryRE = regexp.MustCompile(`^--max-sql-memory=(\d+)%$`) + // generateStartFlagsSQL generates `cockroach start` and `cockroach mt // start-sql` arguments that are relevant for the SQL layers, used by both KV // and storage layers. func (c *SyncedCluster) generateStartFlagsSQL(node Node, startOpts StartOpts) []string { var args []string - args = append(args, fmt.Sprintf("--max-sql-memory=%d%%", c.maybeScaleMem(25))) + formatArg := func(m int) string { + return fmt.Sprintf("--max-sql-memory=%d%%", c.maybeScaleMem(m)) + } + + if idx := argExists(startOpts.ExtraArgs, "--max-sql-memory"); idx == -1 { + args = append(args, formatArg(25)) + } else { + arg := startOpts.ExtraArgs[idx] + matches := maxSQLMemoryRE.FindStringSubmatch(arg) + mem, err := strconv.ParseInt(matches[1], 10, 64) + if err != nil { + panic(fmt.Sprintf("invalid --max-sql-memory parameter: %s", arg)) + } + + startOpts.ExtraArgs[idx] = formatArg(int(mem)) + } + if startOpts.Target == StartServiceForVirtualCluster { args = append(args, "--store", c.InstanceStoreDir(node, startOpts.VirtualClusterName, startOpts.SQLInstance)) } From 01d34f50d1c8db4f99cdc50d7eb9e996fa7326fa Mon Sep 17 00:00:00 2001 From: DarrylWong Date: Thu, 2 Nov 2023 13:35:31 -0400 Subject: [PATCH 2/2] roachtest: add metamorphicbuild label to github posting Adds a label indicating that a roachtest failure was on a metamorphic build on github issue posts. --- pkg/cmd/roachtest/github.go | 11 +++-- pkg/cmd/roachtest/github_test.go | 71 ++++++++++++++++-------------- pkg/cmd/roachtest/tests/copy.go | 2 +- pkg/cmd/roachtest/tests/mvcc_gc.go | 2 +- pkg/cmd/roachtest/tests/pgjdbc.go | 2 +- pkg/cmd/roachtest/tests/util.go | 6 +-- 6 files changed, 51 insertions(+), 43 deletions(-) diff --git a/pkg/cmd/roachtest/github.go b/pkg/cmd/roachtest/github.go index 172bf472fb2a..f577c5fb56ca 100644 --- a/pkg/cmd/roachtest/github.go +++ b/pkg/cmd/roachtest/github.go @@ -20,6 +20,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec" "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test" + "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/tests" "github.com/cockroachdb/cockroach/pkg/internal/team" rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" @@ -139,6 +140,7 @@ func (g *githubIssues) createPostRequest( spec *registry.TestSpec, firstFailure failure, message string, + metamorphicBuild bool, ) (issues.PostRequest, error) { var mention []string var projColID int @@ -205,9 +207,10 @@ func (g *githubIssues) createPostRequest( artifacts := fmt.Sprintf("/%s", testName) clusterParams := map[string]string{ - roachtestPrefix("cloud"): spec.Cluster.Cloud, - roachtestPrefix("cpu"): fmt.Sprintf("%d", spec.Cluster.CPUs), - roachtestPrefix("ssd"): fmt.Sprintf("%d", spec.Cluster.SSDs), + roachtestPrefix("cloud"): spec.Cluster.Cloud, + roachtestPrefix("cpu"): fmt.Sprintf("%d", spec.Cluster.CPUs), + roachtestPrefix("ssd"): fmt.Sprintf("%d", spec.Cluster.SSDs), + roachtestPrefix("metamorphicBuild"): fmt.Sprintf("%t", metamorphicBuild), } // Emit CPU architecture only if it was specified; otherwise, it's captured below, assuming cluster was created. if spec.Cluster.Arch != "" { @@ -256,7 +259,7 @@ func (g *githubIssues) MaybePost(t *testImpl, l *logger.Logger, message string) return nil } - postRequest, err := g.createPostRequest(t.Name(), t.start, t.end, t.spec, t.firstFailure(), message) + postRequest, err := g.createPostRequest(t.Name(), t.start, t.end, t.spec, t.firstFailure(), message, tests.UsingRuntimeAssertions(t)) if err != nil { return err } diff --git a/pkg/cmd/roachtest/github_test.go b/pkg/cmd/roachtest/github_test.go index 85d9aa103f87..4f3f0e6c1fa1 100644 --- a/pkg/cmd/roachtest/github_test.go +++ b/pkg/cmd/roachtest/github_test.go @@ -132,6 +132,7 @@ func TestCreatePostRequest(t *testing.T) { clusterCreationFailed bool loadTeamsFailed bool localSSD bool + metamorphicBuild bool extraLabels []string arch vm.CPUArch failure failure @@ -140,55 +141,59 @@ func TestCreatePostRequest(t *testing.T) { expectedSkipTestFailure bool expectedParams map[string]string }{ - {true, false, false, false, nil, "", createFailure(errors.New("other")), true, false, false, + {true, false, false, false, false, nil, "", createFailure(errors.New("other")), true, false, false, prefixAll(map[string]string{ - "cloud": "gce", - "encrypted": "false", - "fs": "ext4", - "ssd": "0", - "cpu": "4", - "arch": "amd64", - "localSSD": "false", + "cloud": "gce", + "encrypted": "false", + "fs": "ext4", + "ssd": "0", + "cpu": "4", + "arch": "amd64", + "localSSD": "false", + "metamorphicBuild": "false", }), }, - {true, false, false, true, nil, vm.ArchARM64, createFailure(errClusterProvisioningFailed), true, false, true, + {true, false, false, true, true, nil, vm.ArchARM64, createFailure(errClusterProvisioningFailed), true, false, true, prefixAll(map[string]string{ - "cloud": "gce", - "encrypted": "false", - "fs": "ext4", - "ssd": "0", - "cpu": "4", - "arch": "arm64", - "localSSD": "true", + "cloud": "gce", + "encrypted": "false", + "fs": "ext4", + "ssd": "0", + "cpu": "4", + "arch": "arm64", + "localSSD": "true", + "metamorphicBuild": "true", }), }, // Assert that release-blocker label doesn't exist when // !nonReleaseBlocker and issue is an SSH flake. Also ensure that // in the event of a failed cluster creation, nil `vmOptions` and // `clusterImpl` are not dereferenced - {false, true, false, false, nil, "", createFailure(rperrors.ErrSSH255), true, false, true, + {false, true, false, false, false, nil, "", createFailure(rperrors.ErrSSH255), true, false, true, prefixAll(map[string]string{ - "cloud": "gce", - "ssd": "0", - "cpu": "4", + "cloud": "gce", + "ssd": "0", + "cpu": "4", + "metamorphicBuild": "false", }), }, //Simulate failure loading TEAMS.yaml - {true, false, true, false, nil, "", createFailure(errors.New("other")), false, false, false, nil}, + {true, false, true, false, false, nil, "", createFailure(errors.New("other")), false, false, false, nil}, //Error during post test assertions - {true, false, false, false, nil, "", createFailure(errDuringPostAssertions), false, false, false, nil}, + {true, false, false, false, false, nil, "", createFailure(errDuringPostAssertions), false, false, false, nil}, //Error during dns operation - {true, false, false, false, nil, "", createFailure(gce.ErrDNSOperation), true, false, true, nil}, + {true, false, false, false, false, nil, "", createFailure(gce.ErrDNSOperation), true, false, true, nil}, // Assert that extra labels in the test spec are added to the issue. - {true, false, false, false, []string{"foo-label"}, "", createFailure(errors.New("other")), true, false, false, + {true, false, false, false, false, []string{"foo-label"}, "", createFailure(errors.New("other")), true, false, false, prefixAll(map[string]string{ - "cloud": "gce", - "encrypted": "false", - "fs": "ext4", - "ssd": "0", - "cpu": "4", - "arch": "amd64", - "localSSD": "false", + "cloud": "gce", + "encrypted": "false", + "fs": "ext4", + "ssd": "0", + "cpu": "4", + "arch": "amd64", + "localSSD": "false", + "metamorphicBuild": "false", }), }, } @@ -239,10 +244,10 @@ func TestCreatePostRequest(t *testing.T) { if c.loadTeamsFailed { // Assert that if TEAMS.yaml cannot be loaded then function errors. - _, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, c.failure, "message") + _, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, c.failure, "message", c.metamorphicBuild) assert.Error(t, err, "Expected an error in createPostRequest when loading teams fails, but got nil") } else { - req, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, c.failure, "message") + req, err := github.createPostRequest("github_test", ti.start, ti.end, testSpec, c.failure, "message", c.metamorphicBuild) assert.NoError(t, err, "Expected no error in createPostRequest") r := &issues.Renderer{} diff --git a/pkg/cmd/roachtest/tests/copy.go b/pkg/cmd/roachtest/tests/copy.go index f1081730868d..a8152c2c2e5c 100644 --- a/pkg/cmd/roachtest/tests/copy.go +++ b/pkg/cmd/roachtest/tests/copy.go @@ -60,7 +60,7 @@ func registerCopy(r registry.Registry) { // Make sure the copy commands have sufficient time to finish when // runtime assertions are enabled. copyTimeout := 10 * time.Minute - if usingRuntimeAssertions(t) { + if UsingRuntimeAssertions(t) { copyTimeout = 20 * time.Minute } diff --git a/pkg/cmd/roachtest/tests/mvcc_gc.go b/pkg/cmd/roachtest/tests/mvcc_gc.go index 1aee02e3798c..bc7281e88695 100644 --- a/pkg/cmd/roachtest/tests/mvcc_gc.go +++ b/pkg/cmd/roachtest/tests/mvcc_gc.go @@ -79,7 +79,7 @@ func runMVCCGC(ctx context.Context, t test.Test, c cluster.Cluster) { const gcRetryTimeout = 7 * time.Minute var randomSeed int64 - if usingRuntimeAssertions(t) { + if UsingRuntimeAssertions(t) { // Do not use `0` as that is reserved to mean that we are running // without runtime assertions. for randomSeed == 0 { diff --git a/pkg/cmd/roachtest/tests/pgjdbc.go b/pkg/cmd/roachtest/tests/pgjdbc.go index ac3af40fb6d7..936407c55dd3 100644 --- a/pkg/cmd/roachtest/tests/pgjdbc.go +++ b/pkg/cmd/roachtest/tests/pgjdbc.go @@ -72,7 +72,7 @@ func registerPgjdbc(r registry.Registry) { } } - if usingRuntimeAssertions(t) { + if UsingRuntimeAssertions(t) { // This test assumes that multiple_active_portals_enabled is false, but through // metamorphic constants, it is possible for them to be enabled. if _, err = db.ExecContext(ctx, "SET multiple_active_portals_enabled=false"); err != nil { diff --git a/pkg/cmd/roachtest/tests/util.go b/pkg/cmd/roachtest/tests/util.go index 131561f69746..4edfa28ad676 100644 --- a/pkg/cmd/roachtest/tests/util.go +++ b/pkg/cmd/roachtest/tests/util.go @@ -205,12 +205,12 @@ func setAdmissionControl(ctx context.Context, t test.Test, c cluster.Cluster, en } } -// usingRuntimeAssertions returns true if calls to `t.Cockroach()` for +// UsingRuntimeAssertions returns true if calls to `t.Cockroach()` for // this test will return the cockroach build with runtime // assertions. Note that calling this function only makes sense if the // test uploads cockroach using `t.Cockroach` (instead of calling // t.StandardCockroach or t.RuntimeAssertionsCockroach directly). -func usingRuntimeAssertions(t test.Test) bool { +func UsingRuntimeAssertions(t test.Test) bool { return t.Cockroach() == t.RuntimeAssertionsCockroach() } @@ -219,7 +219,7 @@ func usingRuntimeAssertions(t test.Test) bool { // A scheduled backup will not begin at the start of the roachtest. func maybeUseMemoryBudget(t test.Test, budget int) option.StartOpts { startOpts := option.DefaultStartOptsNoBackups() - if usingRuntimeAssertions(t) { + if UsingRuntimeAssertions(t) { // When running tests with runtime assertions enabled, increase // SQL's memory budget to avoid 'budget exceeded' failures. startOpts.RoachprodOpts.ExtraArgs = append(