Skip to content

Commit

Permalink
kvserver: prevent the StoreRebalancer from downreplicating a range
Browse files Browse the repository at this point in the history
Prior to this commit, we had a bug inside one of the methods used by the
`StoreRebalancer` where we had two variables referring to a slice that
was subsequently being appended to.

The code was making the implicit assumption that both of these slices
would point to the same underlying array, which is not true since any of
the additions mentioned above could result in the underlying array for
one of the slices being resized.

This commit fixes this bug.

Resolves #64064

Release note (bug fix): A bug in previous 21.1 betas allowed the store
rebalancer to spuriously downreplicate a range during normal operation.
This bug is now fixed.
  • Loading branch information
aayushshah15 committed Apr 27, 2021
1 parent 1fc0e3d commit b00f923
Showing 1 changed file with 36 additions and 13 deletions.
49 changes: 36 additions & 13 deletions pkg/kv/kvserver/store_rebalancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package kvserver

import (
"context"
"fmt"
"math"
"math/rand"
"sort"
Expand Down Expand Up @@ -309,8 +310,18 @@ func (sr *StoreRebalancer) rebalanceStore(
}

descBeforeRebalance := replWithStats.repl.Desc()
log.VEventf(ctx, 1, "rebalancing r%d (%.2f qps) from %v to %v to better balance load",
replWithStats.repl.RangeID, replWithStats.qps, descBeforeRebalance.Replicas(), voterTargets)
log.VEventf(
ctx,
1,
"rebalancing r%d (%.2f qps) to better balance load: voters from %v to %v; non-voters from %v to %v",
replWithStats.repl.RangeID,
replWithStats.qps,
descBeforeRebalance.Replicas().Voters(),
voterTargets,
descBeforeRebalance.Replicas().NonVoters(),
nonVoterTargets,
)

timeout := sr.rq.processTimeoutFunc(sr.st, replWithStats.repl)
if err := contextutil.RunWithTimeout(ctx, "relocate range", timeout, func(ctx context.Context) error {
return sr.rq.store.AdminRelocateRange(ctx, *descBeforeRebalance, voterTargets, nonVoterTargets)
Expand Down Expand Up @@ -473,6 +484,17 @@ type rangeRebalanceContext struct {
numDesiredVoters, numDesiredNonVoters int
}

func (rbc *rangeRebalanceContext) numDesiredReplicas(targetType targetReplicaType) int {
switch targetType {
case voterTarget:
return rbc.numDesiredVoters
case nonVoterTarget:
return rbc.numDesiredNonVoters
default:
panic(fmt.Sprintf("unknown targetReplicaType %s", targetType))
}
}

func (sr *StoreRebalancer) chooseRangeToRebalance(
ctx context.Context,
hottestRanges *[]replicaWithStats,
Expand Down Expand Up @@ -730,20 +752,21 @@ func (sr *StoreRebalancer) pickRemainingRepls(
options scorerOptions,
minQPS, maxQPS float64,
targetType targetReplicaType,
) (finalTargetsForType []roachpb.ReplicaDescriptor) {
var numDesiredReplsForType int
) []roachpb.ReplicaDescriptor {
// Alias the slice that corresponds to the set of replicas that is being
// appended to. This is because we want subsequent calls to
// `allocateTargetFromList` to observe the results of previous calls (note the
// append to the slice referenced by `finalTargetsForType`).
var finalTargetsForType *[]roachpb.ReplicaDescriptor
switch targetType {
case voterTarget:
finalTargetsForType = partialVoterTargets
numDesiredReplsForType = rebalanceCtx.numDesiredVoters
finalTargetsForType = &partialVoterTargets
case nonVoterTarget:
finalTargetsForType = partialNonVoterTargets
numDesiredReplsForType = rebalanceCtx.numDesiredNonVoters
finalTargetsForType = &partialNonVoterTargets
default:
log.Fatalf(ctx, "unknown targetReplicaType %s", targetType)
log.Fatalf(ctx, "unknown targetReplicaType: %s", targetType)
}

for len(finalTargetsForType) < numDesiredReplsForType {
for len(*finalTargetsForType) < rebalanceCtx.numDesiredReplicas(targetType) {
// Use the preexisting Allocate{Non}Voter logic to ensure that
// considerations such as zone constraints, locality diversity, and full
// disk come into play.
Expand Down Expand Up @@ -781,12 +804,12 @@ func (sr *StoreRebalancer) pickRemainingRepls(
break
}

finalTargetsForType = append(finalTargetsForType, roachpb.ReplicaDescriptor{
*finalTargetsForType = append(*finalTargetsForType, roachpb.ReplicaDescriptor{
NodeID: target.Node.NodeID,
StoreID: target.StoreID,
})
}
return finalTargetsForType
return *finalTargetsForType
}

// pickReplsToKeep determines the set of existing replicas for a range which
Expand Down

0 comments on commit b00f923

Please sign in to comment.