Skip to content

Commit af6a6e8

Browse files
knztbg
andcommitted
storage: deflake TestNodeLivenessStatusMap
Prior to this patch, this test would fail `stressrace` after a few dozen iterations. The root cause of this was the invalid call to `t.Parallel()`, which this patch removes. Additionally, this patch adapts TimeUntilStoreDead for each test case to avoid flakes, and removes a previous hack obviated by this simplification. Release note: None Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
1 parent d50ebea commit af6a6e8

File tree

1 file changed

+15
-17
lines changed

1 file changed

+15
-17
lines changed

pkg/storage/node_liveness_test.go

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -855,21 +855,23 @@ func TestNodeLivenessStatusMap(t *testing.T) {
855855
// See what comes up in the status.
856856
callerNodeLiveness := firstServer.GetNodeLiveness()
857857

858-
type expectedStatus struct {
858+
type testCase struct {
859859
nodeID roachpb.NodeID
860860
expectedStatus storagepb.NodeLivenessStatus
861861
}
862-
testData := []expectedStatus{
862+
863+
// Below we're going to check that all statuses converge and stabilize
864+
// to a known situation.
865+
testData := []testCase{
863866
{liveNodeID, storagepb.NodeLivenessStatus_LIVE},
864867
{deadNodeID, storagepb.NodeLivenessStatus_DEAD},
865868
{decommissioningNodeID, storagepb.NodeLivenessStatus_DECOMMISSIONING},
866869
{removedNodeID, storagepb.NodeLivenessStatus_DECOMMISSIONED},
867870
}
868871

869872
for _, test := range testData {
870-
t.Run(test.expectedStatus.String(), func(t *testing.T) {
873+
t.Run(fmt.Sprintf("n%d->%s", test.nodeID, test.expectedStatus), func(t *testing.T) {
871874
nodeID, expectedStatus := test.nodeID, test.expectedStatus
872-
t.Parallel()
873875

874876
testutils.SucceedsSoon(t, func() error {
875877
// Ensure that dead nodes are quickly recognized as dead by
@@ -882,21 +884,17 @@ func TestNodeLivenessStatusMap(t *testing.T) {
882884
storage.TimeUntilStoreDead.Override(&firstServer.ClusterSettings().SV,
883885
storage.TestTimeUntilStoreDead)
884886

885-
log.Infof(ctx, "checking expected status for node %d", nodeID)
887+
log.Infof(ctx, "checking expected status (%s) for node %d", expectedStatus, nodeID)
886888
nodeStatuses := callerNodeLiveness.GetLivenessStatusMap()
887-
if st, ok := nodeStatuses[nodeID]; !ok {
888-
return fmt.Errorf("%s node not in statuses", expectedStatus)
889-
} else {
890-
if st != expectedStatus {
891-
if expectedStatus == storagepb.NodeLivenessStatus_DECOMMISSIONING && st == storagepb.NodeLivenessStatus_DECOMMISSIONED {
892-
// Server somehow shut down super-fast. Tolerating the mismatch.
893-
return nil
894-
}
895-
return fmt.Errorf("unexpected status: got %s, expected %s",
896-
st, expectedStatus)
897-
}
889+
st, ok := nodeStatuses[nodeID]
890+
if !ok {
891+
return errors.Errorf("node %d: not in statuses\n", nodeID)
892+
}
893+
if st != expectedStatus {
894+
return errors.Errorf("node %d: unexpected status: got %s, expected %s\n",
895+
nodeID, st, expectedStatus,
896+
)
898897
}
899-
log.Infof(ctx, "node %d status ok", nodeID)
900898
return nil
901899
})
902900
})

0 commit comments

Comments
 (0)