From e29011db847b03afd25c5d54b32778bb76f8a0cf Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Tue, 2 Jun 2020 11:51:58 -0400 Subject: [PATCH] roachtest: log on error in isAlive Closes #49358. --- pkg/cmd/roachtest/clock_jump_crash.go | 6 +++--- pkg/cmd/roachtest/clock_monotonic.go | 8 ++++---- pkg/cmd/roachtest/clock_util.go | 19 +++++++++++-------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/pkg/cmd/roachtest/clock_jump_crash.go b/pkg/cmd/roachtest/clock_jump_crash.go index 0615c9799803..80b02198c636 100644 --- a/pkg/cmd/roachtest/clock_jump_crash.go +++ b/pkg/cmd/roachtest/clock_jump_crash.go @@ -49,7 +49,7 @@ func runClockJump(ctx context.Context, t *test, c *cluster, tc clockJumpTestCase // Wait for Cockroach to process the above cluster setting time.Sleep(10 * time.Second) - if !isAlive(db) { + if !isAlive(db, c.l) { t.Fatal("Node unexpectedly crashed") } @@ -65,7 +65,7 @@ func runClockJump(ctx context.Context, t *test, c *cluster, tc clockJumpTestCase // seconds before checking whether the node is alive and // restarting it if not. time.Sleep(3 * time.Second) - if !isAlive(db) { + if !isAlive(db, c.l) { c.Start(ctx, t, c.Node(1)) } }() @@ -76,7 +76,7 @@ func runClockJump(ctx context.Context, t *test, c *cluster, tc clockJumpTestCase time.Sleep(3 * time.Second) t.Status("validating health") - aliveAfterOffset = isAlive(db) + aliveAfterOffset = isAlive(db, c.l) if aliveAfterOffset != tc.aliveAfterOffset { t.Fatalf("Expected node health %v, got %v", tc.aliveAfterOffset, aliveAfterOffset) } diff --git a/pkg/cmd/roachtest/clock_monotonic.go b/pkg/cmd/roachtest/clock_monotonic.go index 7844b19c94fc..47220a0bfcc6 100644 --- a/pkg/cmd/roachtest/clock_monotonic.go +++ b/pkg/cmd/roachtest/clock_monotonic.go @@ -48,7 +48,7 @@ func runClockMonotonicity(ctx context.Context, t *test, c *cluster, tc clockMono // Wait for Cockroach to process the above cluster setting time.Sleep(10 * time.Second) - if !isAlive(db) { + if !isAlive(db, c.l) { t.Fatal("Node unexpectedly crashed") } @@ -59,7 +59,7 @@ func runClockMonotonicity(ctx context.Context, t *test, c *cluster, tc clockMono // Recover from the injected clock offset after validation completes. defer func() { - if !isAlive(db) { + if !isAlive(db, c.l) { t.Fatal("Node unexpectedly crashed") } // Stop cockroach node before recovering from clock offset as this clock @@ -70,7 +70,7 @@ func runClockMonotonicity(ctx context.Context, t *test, c *cluster, tc clockMono offsetInjector.recover(ctx, c.spec.NodeCount) c.Start(ctx, t, c.Node(c.spec.NodeCount)) - if !isAlive(db) { + if !isAlive(db, c.l) { t.Fatal("Node unexpectedly crashed") } }() @@ -83,7 +83,7 @@ func runClockMonotonicity(ctx context.Context, t *test, c *cluster, tc clockMono t.Status("starting cockroach post offset") c.Start(ctx, t, c.Node(c.spec.NodeCount)) - if !isAlive(db) { + if !isAlive(db, c.l) { t.Fatal("Node unexpectedly crashed") } diff --git a/pkg/cmd/roachtest/clock_util.go b/pkg/cmd/roachtest/clock_util.go index fd5887210e99..345675807faa 100644 --- a/pkg/cmd/roachtest/clock_util.go +++ b/pkg/cmd/roachtest/clock_util.go @@ -17,13 +17,16 @@ import ( "time" ) -// isAlive returns whether the node queried by db is alive -func isAlive(db *gosql.DB) bool { +// isAlive returns whether the node queried by db is alive. +func isAlive(db *gosql.DB, l *logger) bool { _, err := db.Exec("SHOW DATABASES") + if err != nil { + l.Printf("isAlive returned err=%v\n", err) + } return err == nil } -// dbUnixEpoch returns the current time in db +// dbUnixEpoch returns the current time in db. func dbUnixEpoch(db *gosql.DB) (float64, error) { var epoch float64 if err := db.QueryRow("SELECT now()::DECIMAL").Scan(&epoch); err != nil { @@ -32,13 +35,13 @@ func dbUnixEpoch(db *gosql.DB) (float64, error) { return epoch, nil } -// offsetInjector is used to inject clock offsets in roachtests +// offsetInjector is used to inject clock offsets in roachtests. type offsetInjector struct { c *cluster deployed bool } -// deploy installs ntp and downloads / compiles bumptime used to create a clock offset +// deploy installs ntp and downloads / compiles bumptime used to create a clock offset. func (oi *offsetInjector) deploy(ctx context.Context) error { if err := oi.c.RunE(ctx, oi.c.All(), "test -x ./bumptime"); err == nil { oi.deployed = true @@ -71,7 +74,7 @@ func (oi *offsetInjector) deploy(ctx context.Context) error { return nil } -// offset injects a offset of s into the node with the given nodeID +// offset injects a offset of s into the node with the given nodeID. func (oi *offsetInjector) offset(ctx context.Context, nodeID int, s time.Duration) { if !oi.deployed { oi.c.t.Fatal("Offset injector must be deployed before injecting a clock offset") @@ -85,7 +88,7 @@ func (oi *offsetInjector) offset(ctx context.Context, nodeID int, s time.Duratio } // recover force syncs time on the node with the given nodeID to recover -// from any offsets +// from any offsets. func (oi *offsetInjector) recover(ctx context.Context, nodeID int) { if !oi.deployed { oi.c.t.Fatal("Offset injector must be deployed before recovering from clock offsets") @@ -106,7 +109,7 @@ func (oi *offsetInjector) recover(ctx context.Context, nodeID int) { } // newOffsetInjector creates a offsetInjector which can be used to inject -// and recover from clock offsets +// and recover from clock offsets. func newOffsetInjector(c *cluster) *offsetInjector { return &offsetInjector{c: c} }