From 67bf65348a69975d637cb53ce5d30aebeeb5833c Mon Sep 17 00:00:00 2001 From: Connor1996 Date: Thu, 22 Nov 2018 17:20:40 +0800 Subject: [PATCH 1/6] change merge leader strategy --- server/schedule/operator.go | 17 ++++++++++---- server/schedulers/balance_test.go | 37 +++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/server/schedule/operator.go b/server/schedule/operator.go index ee6c06b26b2..c905145b22c 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -503,6 +503,11 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio storeIDs[peer.GetStoreId()] = struct{}{} } + // there is a case that a follower is added and transfer leader to it, + // and the apply process of it is slow so leader regards it as voter + // but actually it is stll learner. Once that, the follower can't be leader, + // but old leader can't know that so there is no leader to serve for a while. + var targetLeader uint64 // Add missing peers. for id := range storeIDs { if source.GetStorePeer(id) != nil { @@ -521,21 +526,25 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio } else { steps = append(steps, AddPeer{ToStore: id, PeerID: peer.Id}) } + if targetLeader == 0 { + targetLeader = id + } kind |= OpRegion } // Check whether to transfer leader or not intersection := getIntersectionStores(sourcePeers, targetPeers) leaderID := source.GetLeader().GetStoreId() - isFound := false for _, storeID := range intersection { if storeID == leaderID { - isFound = true + targetLeader = 0 break } + targetLeader = storeID } - if !isFound { - steps = append(steps, TransferLeader{FromStore: source.GetLeader().GetStoreId(), ToStore: target.GetLeader().GetStoreId()}) + if targetLeader != 0 { + // target leader should be trans + steps = append(steps, TransferLeader{FromStore: source.GetLeader().GetStoreId(), ToStore: targetLeader}) kind |= OpLeader } diff --git a/server/schedulers/balance_test.go b/server/schedulers/balance_test.go index 5e57d5373e8..6881c75d29a 100644 --- a/server/schedulers/balance_test.go +++ b/server/schedulers/balance_test.go @@ -945,7 +945,7 @@ func (s *testMergeCheckerSuite) TestMatchPeers(c *C) { s.checkSteps(c, ops[0], []schedule.OperatorStep{ schedule.AddLearner{ToStore: 4, PeerID: 1}, schedule.PromoteLearner{ToStore: 4, PeerID: 1}, - schedule.TransferLeader{FromStore: 6, ToStore: 4}, + schedule.TransferLeader{FromStore: 6, ToStore: 5}, schedule.RemovePeer{FromStore: 6}, schedule.MergeRegion{ FromRegion: s.regions[2].GetMeta(), @@ -984,7 +984,7 @@ func (s *testMergeCheckerSuite) TestMatchPeers(c *C) { }, }) - // all store overlap + // all stores overlap s.regions[2] = s.regions[2].Clone(core.SetPeers([]*metapb.Peer{ {Id: 106, StoreId: 1}, {Id: 107, StoreId: 5}, @@ -1006,6 +1006,39 @@ func (s *testMergeCheckerSuite) TestMatchPeers(c *C) { IsPassive: true, }, }) + + // all stores not overlap + s.regions[2] = s.regions[2].Clone(core.SetPeers([]*metapb.Peer{ + {Id: 109, StoreId: 2}, + {Id: 110, StoreId: 3}, + {Id: 111, StoreId: 6}, + }), core.WithLeader(&metapb.Peer{Id: 109, StoreId: 2})) + s.cluster.PutRegion(s.regions[2]) + ops = s.mc.Check(s.regions[2]) + s.checkSteps(c, ops[0], []schedule.OperatorStep{ + schedule.AddLearner{ToStore: 1, PeerID: 3}, + schedule.PromoteLearner{ToStore: 1, PeerID: 3}, + schedule.AddLearner{ToStore: 4, PeerID: 4}, + schedule.PromoteLearner{ToStore: 4, PeerID: 4}, + schedule.AddLearner{ToStore: 5, PeerID: 5}, + schedule.PromoteLearner{ToStore: 5, PeerID: 5}, + schedule.TransferLeader{FromStore: 2, ToStore: 1}, + schedule.RemovePeer{FromStore: 2}, + schedule.RemovePeer{FromStore: 3}, + schedule.RemovePeer{FromStore: 6}, + schedule.MergeRegion{ + FromRegion: s.regions[2].GetMeta(), + ToRegion: s.regions[1].GetMeta(), + IsPassive: false, + }, + }) + s.checkSteps(c, ops[1], []schedule.OperatorStep{ + schedule.MergeRegion{ + FromRegion: s.regions[2].GetMeta(), + ToRegion: s.regions[1].GetMeta(), + IsPassive: true, + }, + }) } var _ = Suite(&testBalanceHotWriteRegionSchedulerSuite{}) From 51acd2ee9fe3b8b01ab28a84df847e01c56ac3f7 Mon Sep 17 00:00:00 2001 From: Connor1996 Date: Thu, 22 Nov 2018 17:24:15 +0800 Subject: [PATCH 2/6] fix typo --- server/schedule/operator.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/schedule/operator.go b/server/schedule/operator.go index c905145b22c..ece68ea567a 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -505,7 +505,7 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio // there is a case that a follower is added and transfer leader to it, // and the apply process of it is slow so leader regards it as voter - // but actually it is stll learner. Once that, the follower can't be leader, + // but actually it is still learner. Once that, the follower can't be leader, // but old leader can't know that so there is no leader to serve for a while. var targetLeader uint64 // Add missing peers. @@ -543,7 +543,7 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio targetLeader = storeID } if targetLeader != 0 { - // target leader should be trans + // target leader should be to the first added follower if there is no transection stores. steps = append(steps, TransferLeader{FromStore: source.GetLeader().GetStoreId(), ToStore: targetLeader}) kind |= OpLeader } From 61ed90002b05d324c4ad36949db49b01ea8b3453 Mon Sep 17 00:00:00 2001 From: Connor1996 Date: Fri, 30 Nov 2018 14:13:26 +0800 Subject: [PATCH 3/6] remove after add Signed-off-by: Connor1996 --- server/schedule/operator.go | 92 ++++++++++++++++++++----------- server/schedulers/balance_test.go | 6 +- 2 files changed, 62 insertions(+), 36 deletions(-) diff --git a/server/schedule/operator.go b/server/schedule/operator.go index ece68ea567a..9f3cb50920b 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -15,6 +15,7 @@ package schedule import ( "bytes" + "errors" "fmt" "reflect" "sync/atomic" @@ -492,15 +493,15 @@ func CreateMergeRegionOperator(desc string, cluster Cluster, source *core.Region // matchPeerSteps returns the steps to match the location of peer stores of source region with target's. func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.RegionInfo) ([]OperatorStep, OperatorKind, error) { - storeIDs := make(map[uint64]struct{}) var steps []OperatorStep var kind OperatorKind sourcePeers := source.GetPeers() targetPeers := target.GetPeers() - for _, peer := range targetPeers { - storeIDs[peer.GetStoreId()] = struct{}{} + // make sure the peer count is same + if len(sourcePeers) != len(targetPeers) { + return nil, kind, errors.New("mismatch count of peer") } // there is a case that a follower is added and transfer leader to it, @@ -508,61 +509,86 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio // but actually it is still learner. Once that, the follower can't be leader, // but old leader can't know that so there is no leader to serve for a while. var targetLeader uint64 - // Add missing peers. - for id := range storeIDs { - if source.GetStorePeer(id) != nil { - continue - } - peer, err := cluster.AllocPeer(id) - if err != nil { - log.Debugf("peer alloc failed: %v", err) - return nil, kind, err - } - if cluster.IsRaftLearnerEnabled() { - steps = append(steps, - AddLearner{ToStore: id, PeerID: peer.Id}, - PromoteLearner{ToStore: id, PeerID: peer.Id}, - ) - } else { - steps = append(steps, AddPeer{ToStore: id, PeerID: peer.Id}) - } - if targetLeader == 0 { - targetLeader = id - } - kind |= OpRegion - } + var toAdds [][]OperatorStep // Check whether to transfer leader or not intersection := getIntersectionStores(sourcePeers, targetPeers) + for _, peer := range targetPeers { + storeID := peer.GetStoreId() + if _, found := intersection[storeID]; !found { + // Find missing peers. + var addSteps []OperatorStep + + peer, err := cluster.AllocPeer(storeID) + if err != nil { + log.Debugf("peer alloc failed: %v", err) + return nil, kind, err + } + if cluster.IsRaftLearnerEnabled() { + addSteps = append(addSteps, + AddLearner{ToStore: storeID, PeerID: peer.Id}, + PromoteLearner{ToStore: storeID, PeerID: peer.Id}, + ) + } else { + addSteps = append(addSteps, AddPeer{ToStore: storeID, PeerID: peer.Id}) + } + toAdds = append(toAdds, addSteps) + + if targetLeader == 0 { + targetLeader = storeID + } + kind |= OpRegion + } + } + leaderID := source.GetLeader().GetStoreId() - for _, storeID := range intersection { + for storeID := range intersection { if storeID == leaderID { targetLeader = 0 break } targetLeader = storeID } - if targetLeader != 0 { - // target leader should be to the first added follower if there is no transection stores. + + if len(intersection) != 0 && targetLeader != 0 { + // if intersection is not empty and leader doesn't belong to intersection, transfer leader to store in intersection steps = append(steps, TransferLeader{FromStore: source.GetLeader().GetStoreId(), ToStore: targetLeader}) kind |= OpLeader + targetLeader = 0 } + // target leader should be the first added follower if there is no transection stores. + index := 0 // Remove redundant peers. for _, peer := range sourcePeers { - if _, ok := storeIDs[peer.GetStoreId()]; ok { + if _, found := intersection[peer.GetStoreId()]; found { continue } + + // the leader should be the last to remove + if targetLeader != 0 && peer.GetStoreId() == leaderID { + continue + } + + steps = append(steps, toAdds[index]...) steps = append(steps, RemovePeer{FromStore: peer.GetStoreId()}) kind |= OpRegion + index++ } + // remove leader + if targetLeader != 0 { + steps = append(steps, toAdds[index]...) + steps = append(steps, TransferLeader{FromStore: leaderID, ToStore: targetLeader}) + steps = append(steps, RemovePeer{FromStore: leaderID}) + kind |= OpLeader | OpRegion + } return steps, kind, nil } // getIntersectionStores returns the stores included in two region's peers. -func getIntersectionStores(a []*metapb.Peer, b []*metapb.Peer) []uint64 { - set := make([]uint64, 0) +func getIntersectionStores(a []*metapb.Peer, b []*metapb.Peer) map[uint64]struct{} { + set := make(map[uint64]struct{}) hash := make(map[uint64]struct{}) for _, peer := range a { @@ -571,7 +597,7 @@ func getIntersectionStores(a []*metapb.Peer, b []*metapb.Peer) []uint64 { for _, peer := range b { if _, found := hash[peer.GetStoreId()]; found { - set = append(set, peer.GetStoreId()) + set[peer.GetStoreId()] = struct{}{} } } diff --git a/server/schedulers/balance_test.go b/server/schedulers/balance_test.go index 6881c75d29a..1424c77de06 100644 --- a/server/schedulers/balance_test.go +++ b/server/schedulers/balance_test.go @@ -943,9 +943,9 @@ func (s *testMergeCheckerSuite) TestMatchPeers(c *C) { // partial store overlap not including leader ops := s.mc.Check(s.regions[2]) s.checkSteps(c, ops[0], []schedule.OperatorStep{ + schedule.TransferLeader{FromStore: 6, ToStore: 5}, schedule.AddLearner{ToStore: 4, PeerID: 1}, schedule.PromoteLearner{ToStore: 4, PeerID: 1}, - schedule.TransferLeader{FromStore: 6, ToStore: 5}, schedule.RemovePeer{FromStore: 6}, schedule.MergeRegion{ FromRegion: s.regions[2].GetMeta(), @@ -1018,14 +1018,14 @@ func (s *testMergeCheckerSuite) TestMatchPeers(c *C) { s.checkSteps(c, ops[0], []schedule.OperatorStep{ schedule.AddLearner{ToStore: 1, PeerID: 3}, schedule.PromoteLearner{ToStore: 1, PeerID: 3}, + schedule.RemovePeer{FromStore: 3}, schedule.AddLearner{ToStore: 4, PeerID: 4}, schedule.PromoteLearner{ToStore: 4, PeerID: 4}, + schedule.RemovePeer{FromStore: 6}, schedule.AddLearner{ToStore: 5, PeerID: 5}, schedule.PromoteLearner{ToStore: 5, PeerID: 5}, schedule.TransferLeader{FromStore: 2, ToStore: 1}, schedule.RemovePeer{FromStore: 2}, - schedule.RemovePeer{FromStore: 3}, - schedule.RemovePeer{FromStore: 6}, schedule.MergeRegion{ FromRegion: s.regions[2].GetMeta(), ToRegion: s.regions[1].GetMeta(), From 310fab827036b4451e1fc60d4ea853c8c3c36e3d Mon Sep 17 00:00:00 2001 From: Connor1996 Date: Fri, 30 Nov 2018 14:17:44 +0800 Subject: [PATCH 4/6] add comment Signed-off-by: Connor1996 --- server/schedule/operator.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/server/schedule/operator.go b/server/schedule/operator.go index 9f3cb50920b..dca90fbf773 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -504,19 +504,20 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio return nil, kind, errors.New("mismatch count of peer") } - // there is a case that a follower is added and transfer leader to it, + // There is a case that a follower is added and transfer leader to it, // and the apply process of it is slow so leader regards it as voter // but actually it is still learner. Once that, the follower can't be leader, // but old leader can't know that so there is no leader to serve for a while. + // So target leader should be the first added follower if there is no transection stores. var targetLeader uint64 var toAdds [][]OperatorStep - // Check whether to transfer leader or not + // get overlapped part of the peers of two regions intersection := getIntersectionStores(sourcePeers, targetPeers) for _, peer := range targetPeers { storeID := peer.GetStoreId() + // find missing peers. if _, found := intersection[storeID]; !found { - // Find missing peers. var addSteps []OperatorStep peer, err := cluster.AllocPeer(storeID) @@ -534,6 +535,7 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio } toAdds = append(toAdds, addSteps) + // record the first added peer if targetLeader == 0 { targetLeader = storeID } @@ -543,6 +545,7 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio leaderID := source.GetLeader().GetStoreId() for storeID := range intersection { + // if leader belongs to overlapped part, no need to transfer if storeID == leaderID { targetLeader = 0 break @@ -550,16 +553,15 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio targetLeader = storeID } + // if intersection is not empty and leader doesn't belong to intersection, transfer leader to store in overlapped part if len(intersection) != 0 && targetLeader != 0 { - // if intersection is not empty and leader doesn't belong to intersection, transfer leader to store in intersection steps = append(steps, TransferLeader{FromStore: source.GetLeader().GetStoreId(), ToStore: targetLeader}) kind |= OpLeader targetLeader = 0 } - // target leader should be the first added follower if there is no transection stores. index := 0 - // Remove redundant peers. + // remove redundant peers. for _, peer := range sourcePeers { if _, found := intersection[peer.GetStoreId()]; found { continue @@ -576,7 +578,7 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio index++ } - // remove leader + // transfer leader before remove leader if targetLeader != 0 { steps = append(steps, toAdds[index]...) steps = append(steps, TransferLeader{FromStore: leaderID, ToStore: targetLeader}) From 2ea0175ce295ae7f0e23d38cfb6ec2b139568118 Mon Sep 17 00:00:00 2001 From: Connor1996 Date: Fri, 30 Nov 2018 15:05:40 +0800 Subject: [PATCH 5/6] statle test Signed-off-by: Connor1996 --- server/schedulers/balance_test.go | 34 ++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/server/schedulers/balance_test.go b/server/schedulers/balance_test.go index 1424c77de06..aa54b6b4ed5 100644 --- a/server/schedulers/balance_test.go +++ b/server/schedulers/balance_test.go @@ -880,7 +880,7 @@ func (s *testMergeCheckerSuite) SetUpTest(c *C) { StartKey: []byte("t"), EndKey: []byte("x"), Peers: []*metapb.Peer{ - {Id: 106, StoreId: 1}, + {Id: 106, StoreId: 2}, {Id: 107, StoreId: 5}, {Id: 108, StoreId: 6}, }, @@ -944,8 +944,11 @@ func (s *testMergeCheckerSuite) TestMatchPeers(c *C) { ops := s.mc.Check(s.regions[2]) s.checkSteps(c, ops[0], []schedule.OperatorStep{ schedule.TransferLeader{FromStore: 6, ToStore: 5}, - schedule.AddLearner{ToStore: 4, PeerID: 1}, - schedule.PromoteLearner{ToStore: 4, PeerID: 1}, + schedule.AddLearner{ToStore: 1, PeerID: 1}, + schedule.PromoteLearner{ToStore: 1, PeerID: 1}, + schedule.RemovePeer{FromStore: 2}, + schedule.AddLearner{ToStore: 4, PeerID: 2}, + schedule.PromoteLearner{ToStore: 4, PeerID: 2}, schedule.RemovePeer{FromStore: 6}, schedule.MergeRegion{ FromRegion: s.regions[2].GetMeta(), @@ -962,13 +965,20 @@ func (s *testMergeCheckerSuite) TestMatchPeers(c *C) { }) // partial store overlap including leader - newRegion := s.regions[2].Clone(core.WithLeader(&metapb.Peer{Id: 106, StoreId: 1})) + newRegion := s.regions[2].Clone( + core.SetPeers([]*metapb.Peer{ + {Id: 106, StoreId: 1}, + {Id: 107, StoreId: 5}, + {Id: 108, StoreId: 6}, + }), + core.WithLeader(&metapb.Peer{Id: 106, StoreId: 1}), + ) s.regions[2] = newRegion s.cluster.PutRegion(s.regions[2]) ops = s.mc.Check(s.regions[2]) s.checkSteps(c, ops[0], []schedule.OperatorStep{ - schedule.AddLearner{ToStore: 4, PeerID: 2}, - schedule.PromoteLearner{ToStore: 4, PeerID: 2}, + schedule.AddLearner{ToStore: 4, PeerID: 3}, + schedule.PromoteLearner{ToStore: 4, PeerID: 3}, schedule.RemovePeer{FromStore: 6}, schedule.MergeRegion{ FromRegion: s.regions[2].GetMeta(), @@ -1016,14 +1026,14 @@ func (s *testMergeCheckerSuite) TestMatchPeers(c *C) { s.cluster.PutRegion(s.regions[2]) ops = s.mc.Check(s.regions[2]) s.checkSteps(c, ops[0], []schedule.OperatorStep{ - schedule.AddLearner{ToStore: 1, PeerID: 3}, - schedule.PromoteLearner{ToStore: 1, PeerID: 3}, + schedule.AddLearner{ToStore: 1, PeerID: 4}, + schedule.PromoteLearner{ToStore: 1, PeerID: 4}, schedule.RemovePeer{FromStore: 3}, - schedule.AddLearner{ToStore: 4, PeerID: 4}, - schedule.PromoteLearner{ToStore: 4, PeerID: 4}, + schedule.AddLearner{ToStore: 4, PeerID: 5}, + schedule.PromoteLearner{ToStore: 4, PeerID: 5}, schedule.RemovePeer{FromStore: 6}, - schedule.AddLearner{ToStore: 5, PeerID: 5}, - schedule.PromoteLearner{ToStore: 5, PeerID: 5}, + schedule.AddLearner{ToStore: 5, PeerID: 6}, + schedule.PromoteLearner{ToStore: 5, PeerID: 6}, schedule.TransferLeader{FromStore: 2, ToStore: 1}, schedule.RemovePeer{FromStore: 2}, schedule.MergeRegion{ From a2d3274b99e5b1c4e3b2145d469f258d94599fb6 Mon Sep 17 00:00:00 2001 From: Connor1996 Date: Tue, 4 Dec 2018 17:43:04 +0800 Subject: [PATCH 6/6] address comment Signed-off-by: Connor1996 --- server/schedule/operator.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/server/schedule/operator.go b/server/schedule/operator.go index dca90fbf773..6364a84fcac 100644 --- a/server/schedule/operator.go +++ b/server/schedule/operator.go @@ -584,13 +584,19 @@ func matchPeerSteps(cluster Cluster, source *core.RegionInfo, target *core.Regio steps = append(steps, TransferLeader{FromStore: leaderID, ToStore: targetLeader}) steps = append(steps, RemovePeer{FromStore: leaderID}) kind |= OpLeader | OpRegion + index++ + } + + if index != len(toAdds) { + return nil, kind, errors.New("wrong count of add steps") } + return steps, kind, nil } // getIntersectionStores returns the stores included in two region's peers. func getIntersectionStores(a []*metapb.Peer, b []*metapb.Peer) map[uint64]struct{} { - set := make(map[uint64]struct{}) + intersection := make(map[uint64]struct{}) hash := make(map[uint64]struct{}) for _, peer := range a { @@ -599,9 +605,9 @@ func getIntersectionStores(a []*metapb.Peer, b []*metapb.Peer) map[uint64]struct for _, peer := range b { if _, found := hash[peer.GetStoreId()]; found { - set[peer.GetStoreId()] = struct{}{} + intersection[peer.GetStoreId()] = struct{}{} } } - return set + return intersection }