From b7c596dca9b90c1a7d77620b7e784f45f6b272f7 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 11 Dec 2018 13:36:38 +0100 Subject: [PATCH 1/2] roachtest: don't fail tests based on slow health checker We know there can be a backlog of Raft snapshots at the beginning of the test. This isn't ideal, but we know about it and have #32046 tracking it. Closes #32859. Release note: None --- pkg/cmd/roachtest/restore.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/cmd/roachtest/restore.go b/pkg/cmd/roachtest/restore.go index 702ebd35f5e8..679d38c23a8d 100644 --- a/pkg/cmd/roachtest/restore.go +++ b/pkg/cmd/roachtest/restore.go @@ -134,7 +134,10 @@ func (hc *HealthChecker) Runner(ctx context.Context) (err error) { } if elapsed := timeutil.Since(tBegin); elapsed > 10*time.Second { - return errors.Errorf("health check against node %d took %s", nodeIdx, elapsed) + err := errors.Errorf("health check against node %d took %s", nodeIdx, elapsed) + logger.Printf(err.Error() + "\n") + // TODO(tschottdorf): see method comment. + // return err } } } From 82277afb35aa131f43338bdc6d3d94112348c9e4 Mon Sep 17 00:00:00 2001 From: Tobias Schottdorf Date: Tue, 11 Dec 2018 13:38:02 +0100 Subject: [PATCH 2/2] roachtest: don't filter gossip alerts We don't fail the test run on them anyway, so we might as well log them all. Release note: None --- pkg/cmd/roachtest/restore.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cmd/roachtest/restore.go b/pkg/cmd/roachtest/restore.go index 679d38c23a8d..42cd8ebe468c 100644 --- a/pkg/cmd/roachtest/restore.go +++ b/pkg/cmd/roachtest/restore.go @@ -110,7 +110,7 @@ func (hc *HealthChecker) Runner(ctx context.Context) (err error) { } // TODO(tschottdorf): remove replicate queue failures when the cluster first starts. // Ditto queue.raftsnapshot.process.failure. - rows, err := db.QueryContext(ctx, `SELECT * FROM crdb_internal.gossip_alerts WHERE description != 'queue.replicate.process.failure' AND description != 'ranges.underreplicated' AND description != 'queue.raftsnapshot.process.failure' ORDER BY node_id ASC, store_id ASC`) + rows, err := db.QueryContext(ctx, `SELECT * FROM crdb_internal.gossip_alerts ORDER BY node_id ASC, store_id ASC`) _ = db.Close() if err != nil { return err