Skip to content

Commit

Permalink
roachtest: use persistent disks for disk-stall tests
Browse files Browse the repository at this point in the history
Currently, the `disk-stall` tests use local SSDs. When run on GCE VMs, a
higher test flake rate is observed due to known issues with fsync
latency for local SSDs.

Switch the test to use persistent disks instead.

Touches: #99372.

Release note: None.
  • Loading branch information
nicktrav committed Mar 27, 2023
1 parent bb11b30 commit c013b22
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions pkg/cmd/roachtest/tests/disk_stall.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,17 @@ func registerDiskStalledDetection(r registry.Registry) {
return &cgroupDiskStaller{t: t, c: c, readOrWrite: []string{"write"}, logsToo: true}
},
}
makeSpec := func() spec.ClusterSpec {
s := r.MakeClusterSpec(4, spec.ReuseNone())
s.PreferLocalSSD = false
return s
}
for name, makeStaller := range stallers {
name, makeStaller := name, makeStaller
r.Add(registry.TestSpec{
Name: fmt.Sprintf("disk-stalled/%s", name),
Owner: registry.OwnerStorage,
Cluster: r.MakeClusterSpec(4, spec.ReuseNone()),
Cluster: makeSpec(),
Timeout: 30 * time.Minute,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runDiskStalledDetection(ctx, t, c, makeStaller(t, c), true /* doStall */)
Expand All @@ -76,7 +81,7 @@ func registerDiskStalledDetection(r registry.Registry) {
stallLogDir, stallDataDir,
),
Owner: registry.OwnerStorage,
Cluster: r.MakeClusterSpec(4, spec.ReuseNone()),
Cluster: makeSpec(),
Timeout: 30 * time.Minute,
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
runDiskStalledDetection(ctx, t, c, &fuseDiskStaller{
Expand All @@ -95,7 +100,7 @@ func registerDiskStalledDetection(r registry.Registry) {
func runDiskStalledDetection(
ctx context.Context, t test.Test, c cluster.Cluster, s diskStaller, doStall bool,
) {
startOpts := option.DefaultStartOpts()
startOpts := option.DefaultStartOptsNoBackups()
startOpts.RoachprodOpts.ExtraArgs = []string{
"--store", s.DataDir(),
"--log", fmt.Sprintf(`{sinks: {stderr: {filter: INFO}}, file-defaults: {dir: "%s"}}`, s.LogDir()),
Expand Down Expand Up @@ -376,9 +381,9 @@ func (s *cgroupDiskStaller) device() (major, minor int) {
// `cat /proc/partitions` and find `deviceName`
switch s.c.Spec().Cloud {
case spec.GCE:
// ls -l /dev/nvme0n1
// brw-rw---- 1 root disk 259, 0 Jan 26 20:05 /dev/nvme0n1
return 259, 0
// ls -l /dev/sdb
// brw-rw---- 1 root disk 8, 16 Mar 27 22:08 /dev/sdb
return 8, 16
default:
s.t.Fatalf("unsupported cloud %q", s.c.Spec().Cloud)
return 0, 0
Expand Down Expand Up @@ -455,7 +460,7 @@ func (s *fuseDiskStaller) Unstall(ctx context.Context, nodes option.NodeListOpti
func getDevice(t test.Test, s spec.ClusterSpec) string {
switch s.Cloud {
case spec.GCE:
return "/dev/nvme0n1"
return "/dev/sdb"
case spec.AWS:
return "/dev/nvme1n1"
default:
Expand Down

0 comments on commit c013b22

Please sign in to comment.