From f4008d6f0334373deec1ceb4e700e7fa07734763 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Wed, 17 Oct 2018 19:57:22 -0400 Subject: [PATCH] storage: re-enqueue Raft groups on paginated application Fixes #31330. This change re-enqueues Raft groups for processing immediately if they still have more to do after a Raft ready iteration. This comes up in practice when a Range has sufficient load to force Raft application pagination. See #31330 for a discussion on the symptoms this can cause. Release note (bug fix): Fix bug where Raft followers could fall behind leaders will entry application, causing stalls during splits. --- pkg/storage/replica.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/storage/replica.go b/pkg/storage/replica.go index 57a6427d822f..e99a0ac69da6 100644 --- a/pkg/storage/replica.go +++ b/pkg/storage/replica.go @@ -3837,6 +3837,14 @@ func (r *Replica) handleRaftReadyRaftMuLocked( const expl = "during advance" if err := r.withRaftGroup(func(raftGroup *raft.RawNode) (bool, error) { raftGroup.Advance(rd) + + // If the Raft group still has more to process then we immediately + // re-enqueue it for another round of processing. This is possible if + // the group's committed entries were paginated due to size limitations + // and we didn't apply all of them in this pass. + if raftGroup.HasReady() { + r.store.enqueueRaftUpdateCheck(r.RangeID) + } return true, nil }); err != nil { return stats, expl, errors.Wrap(err, expl)