From c013b22f88dc6b1925149ac9ccc0e8449070a4cd Mon Sep 17 00:00:00 2001 From: Nick Travers Date: Mon, 27 Mar 2023 22:19:29 +0000 Subject: [PATCH] roachtest: use persistent disks for disk-stall tests Currently, the `disk-stall` tests use local SSDs. When run on GCE VMs, a higher test flake rate is observed due to known issues with fsync latency for local SSDs. Switch the test to use persistent disks instead. Touches: #99372. Release note: None. --- pkg/cmd/roachtest/tests/disk_stall.go | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pkg/cmd/roachtest/tests/disk_stall.go b/pkg/cmd/roachtest/tests/disk_stall.go index a3349fdc0cb7..0c0918c986e0 100644 --- a/pkg/cmd/roachtest/tests/disk_stall.go +++ b/pkg/cmd/roachtest/tests/disk_stall.go @@ -45,12 +45,17 @@ func registerDiskStalledDetection(r registry.Registry) { return &cgroupDiskStaller{t: t, c: c, readOrWrite: []string{"write"}, logsToo: true} }, } + makeSpec := func() spec.ClusterSpec { + s := r.MakeClusterSpec(4, spec.ReuseNone()) + s.PreferLocalSSD = false + return s + } for name, makeStaller := range stallers { name, makeStaller := name, makeStaller r.Add(registry.TestSpec{ Name: fmt.Sprintf("disk-stalled/%s", name), Owner: registry.OwnerStorage, - Cluster: r.MakeClusterSpec(4, spec.ReuseNone()), + Cluster: makeSpec(), Timeout: 30 * time.Minute, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runDiskStalledDetection(ctx, t, c, makeStaller(t, c), true /* doStall */) @@ -76,7 +81,7 @@ func registerDiskStalledDetection(r registry.Registry) { stallLogDir, stallDataDir, ), Owner: registry.OwnerStorage, - Cluster: r.MakeClusterSpec(4, spec.ReuseNone()), + Cluster: makeSpec(), Timeout: 30 * time.Minute, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runDiskStalledDetection(ctx, t, c, &fuseDiskStaller{ @@ -95,7 +100,7 @@ func registerDiskStalledDetection(r registry.Registry) { func runDiskStalledDetection( ctx context.Context, t test.Test, c cluster.Cluster, s diskStaller, doStall bool, ) { - startOpts := option.DefaultStartOpts() + startOpts := option.DefaultStartOptsNoBackups() startOpts.RoachprodOpts.ExtraArgs = []string{ "--store", s.DataDir(), "--log", fmt.Sprintf(`{sinks: {stderr: {filter: INFO}}, file-defaults: {dir: "%s"}}`, s.LogDir()), @@ -376,9 +381,9 @@ func (s *cgroupDiskStaller) device() (major, minor int) { // `cat /proc/partitions` and find `deviceName` switch s.c.Spec().Cloud { case spec.GCE: - // ls -l /dev/nvme0n1 - // brw-rw---- 1 root disk 259, 0 Jan 26 20:05 /dev/nvme0n1 - return 259, 0 + // ls -l /dev/sdb + // brw-rw---- 1 root disk 8, 16 Mar 27 22:08 /dev/sdb + return 8, 16 default: s.t.Fatalf("unsupported cloud %q", s.c.Spec().Cloud) return 0, 0 @@ -455,7 +460,7 @@ func (s *fuseDiskStaller) Unstall(ctx context.Context, nodes option.NodeListOpti func getDevice(t test.Test, s spec.ClusterSpec) string { switch s.Cloud { case spec.GCE: - return "/dev/nvme0n1" + return "/dev/sdb" case spec.AWS: return "/dev/nvme1n1" default: