Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

storage: more aggressively replica GC learner replicas #41300

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pkg/storage/client_replica_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1723,6 +1723,9 @@ func TestSystemZoneConfigs(t *testing.T) {
replicas := make(map[roachpb.RangeID]roachpb.RangeDescriptor)
for _, s := range tc.Servers {
if err := storage.IterateRangeDescriptors(ctx, s.Engines()[0], func(desc roachpb.RangeDescriptor) (bool, error) {
if len(desc.Replicas().Learners()) > 0 {
return false, fmt.Errorf("descriptor contains learners: %v", desc)
}
if existing, ok := replicas[desc.RangeID]; ok && !existing.Equal(desc) {
return false, fmt.Errorf("mismatch between\n%s\n%s", &existing, &desc)
}
Expand Down
36 changes: 22 additions & 14 deletions pkg/storage/replica_gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,13 @@ const (
// ReplicaGCQueueInactivityThreshold is the inactivity duration after which
// a range will be considered for garbage collection. Exported for testing.
ReplicaGCQueueInactivityThreshold = 10 * 24 * time.Hour // 10 days
// ReplicaGCQueueCandidateTimeout is the duration after which a range in
// ReplicaGCQueueSuspectTimeout is the duration after which a Replica which
// is suspected to be removed should be processed by the queue.
// A Replica is suspected to have been removed if either it is in the
// candidate Raft state (which is a typical sign of having been removed
// from the group) will be considered for garbage collection.
ReplicaGCQueueCandidateTimeout = 1 * time.Second
// from the group) or it is a learner which has been removed but never heard
// about that removal.
ReplicaGCQueueSuspectTimeout = 1 * time.Second
)

// Priorities for the replica GC queue.
Expand All @@ -44,7 +47,10 @@ const (

// Replicas that have been removed from the range spend a lot of
// time in the candidate state, so treat them as higher priority.
replicaGCPriorityCandidate = 1.0
// Learner replicas which have been removed never enter the candidate state
// but in the common case a replica should not be a learner for long so
// treat it the same as a candidate.
replicaGCPrioritySuspect = 1.0

// The highest priority is used when we have definite evidence
// (external to replicaGCQueue) that the replica has been removed.
Expand Down Expand Up @@ -120,7 +126,8 @@ func (rgcq *replicaGCQueue) shouldQueue(
log.Errorf(ctx, "could not read last replica GC timestamp: %+v", err)
return false, 0
}
if _, currentMember := repl.Desc().GetReplicaDescriptor(repl.store.StoreID()); !currentMember {
replDesc, currentMember := repl.Desc().GetReplicaDescriptor(repl.store.StoreID())
if !currentMember {
return true, replicaGCPriorityRemoved
}

Expand All @@ -132,10 +139,11 @@ func (rgcq *replicaGCQueue) shouldQueue(
lastActivity.Forward(*lease.ProposedTS)
}

var isCandidate bool
isSuspect := replDesc.GetType() == roachpb.LEARNER
if raftStatus := repl.RaftStatus(); raftStatus != nil {
isCandidate = (raftStatus.SoftState.RaftState == raft.StateCandidate ||
raftStatus.SoftState.RaftState == raft.StatePreCandidate)
isSuspect = isSuspect ||
(raftStatus.SoftState.RaftState == raft.StateCandidate ||
raftStatus.SoftState.RaftState == raft.StatePreCandidate)
} else {
// If a replica doesn't have an active raft group, we should check whether
// we're decommissioning. If so, we should process the replica because it
Expand All @@ -148,20 +156,20 @@ func (rgcq *replicaGCQueue) shouldQueue(
}
}
}
return replicaGCShouldQueueImpl(now, lastCheck, lastActivity, isCandidate)
return replicaGCShouldQueueImpl(now, lastCheck, lastActivity, isSuspect)
}

func replicaGCShouldQueueImpl(
now, lastCheck, lastActivity hlc.Timestamp, isCandidate bool,
now, lastCheck, lastActivity hlc.Timestamp, isSuspect bool,
) (bool, float64) {
timeout := ReplicaGCQueueInactivityThreshold
priority := replicaGCPriorityDefault

if isCandidate {
// If the range is a candidate (which happens if its former replica set
if isSuspect {
// If the range is suspect (which happens if its former replica set
// ignores it), let it expire much earlier.
timeout = ReplicaGCQueueCandidateTimeout
priority = replicaGCPriorityCandidate
timeout = ReplicaGCQueueSuspectTimeout
priority = replicaGCPrioritySuspect
} else if now.Less(lastCheck.Add(ReplicaGCQueueInactivityThreshold.Nanoseconds(), 0)) {
// Return false immediately if the previous check was less than the
// check interval in the past. Note that we don't do this if the
Expand Down
6 changes: 3 additions & 3 deletions pkg/storage/replica_gc_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ func TestReplicaGCShouldQueue(t *testing.T) {
return hlc.Timestamp{WallTime: t.Nanoseconds()}
}

base := 2 * (ReplicaGCQueueCandidateTimeout + ReplicaGCQueueInactivityThreshold)
base := 2 * (ReplicaGCQueueSuspectTimeout + ReplicaGCQueueInactivityThreshold)
var (
z = ts(0)
bTS = ts(base)
cTS = ts(base + ReplicaGCQueueCandidateTimeout)
cTSnext = ts(base + ReplicaGCQueueCandidateTimeout + 1)
cTS = ts(base + ReplicaGCQueueSuspectTimeout)
cTSnext = ts(base + ReplicaGCQueueSuspectTimeout + 1)
iTSprev = ts(base + ReplicaGCQueueInactivityThreshold - 1)
iTS = ts(base + ReplicaGCQueueInactivityThreshold)
)
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/store_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -718,7 +718,7 @@ func (s *Store) checkSnapshotOverlapLocked(
// inactive and thus unlikely to be about to process a split.
gcPriority := replicaGCPriorityDefault
if inactive(exReplica) {
gcPriority = replicaGCPriorityCandidate
gcPriority = replicaGCPrioritySuspect
}

msg += "; initiated GC:"
Expand Down