Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

server, core: support add learner node #896

Merged
merged 56 commits into from
Apr 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
3e8ce83
support add learner node
Connor1996 Dec 29, 2017
1f8a287
fix typo
Connor1996 Jan 2, 2018
d1cc8e0
take learner peers into consideration when schedule
Connor1996 Jan 3, 2018
35f9592
Merge branch 'master' into learner-node
Connor1996 Jan 17, 2018
498cd22
fix format
Connor1996 Jan 17, 2018
eefe137
address comment
Connor1996 Jan 17, 2018
e8dcf37
fix
Connor1996 Jan 17, 2018
11439e0
update kvproto
Connor1996 Jan 17, 2018
dd6fd83
fix format
Connor1996 Jan 17, 2018
8b6aafa
fix test
Connor1996 Jan 18, 2018
22ac86c
Merge branch 'master' into learner-node
Connor1996 Jan 30, 2018
93c3029
Merge branch 'master' into learner-node
Connor1996 Jan 31, 2018
250a703
adjust
Connor1996 Jan 31, 2018
d3749ec
check learners
Connor1996 Feb 8, 2018
2511bff
Merge branch 'master' into learner-node
Connor1996 Feb 8, 2018
48c5e88
tiny change
Connor1996 Feb 8, 2018
9cef7a3
tiny change
Connor1996 Feb 9, 2018
d2657a4
fix json unmarshal error
Connor1996 Feb 28, 2018
1e97221
use AddNode instead of PromoteLearnerNode
Connor1996 Mar 2, 2018
bdefe62
Merge branch 'master' into learner-node
Connor1996 Mar 2, 2018
0daba6e
Merge branch 'master' into learner-node
Connor1996 Mar 7, 2018
ebdd942
fix clone
Connor1996 Mar 7, 2018
85fb534
Merge branch 'master' into learner-node
Connor1996 Mar 14, 2018
e6213ad
update kvproto
Connor1996 Mar 14, 2018
753e934
check down learner
Connor1996 Mar 15, 2018
53a9c16
Merge branch 'master' into learner-node
Connor1996 Mar 15, 2018
15b8388
fix test
Connor1996 Mar 15, 2018
1370252
update kvproto
Connor1996 Mar 19, 2018
60b7c82
change
Connor1996 Mar 19, 2018
ee5db6e
update pd-ctl
Connor1996 Mar 19, 2018
a9302a5
Merge branch 'master' into learner-node
Connor1996 Mar 22, 2018
3d30a58
introduce voter and learner
Connor1996 Mar 26, 2018
1d9861a
Merge branch 'master' into learner-node
Connor1996 Mar 26, 2018
c1f230d
fix test
Connor1996 Mar 28, 2018
4184d76
Merge branch 'master' into learner-node
Connor1996 Mar 28, 2018
5a62b01
fix test
Connor1996 Mar 29, 2018
3333218
address comment
Connor1996 Mar 29, 2018
9b96b35
tiny change
Connor1996 Mar 29, 2018
4710645
avoid too many logs
Connor1996 Mar 30, 2018
22f9011
Merge branch 'master' into learner-node
Connor1996 Mar 30, 2018
c75af59
improve partrol
Connor1996 Mar 30, 2018
83bc681
add peer function for test
Connor1996 Mar 30, 2018
8d9b502
Merge branch 'master' into learner-node
Connor1996 Mar 30, 2018
28812e1
address comment
Connor1996 Apr 2, 2018
9ae33e5
Merge branch 'master' into learner-node
Connor1996 Apr 2, 2018
90d232e
remove pending learner count
Connor1996 Apr 2, 2018
85965a9
remove learner statistics
Connor1996 Apr 2, 2018
a3dc83c
address comment
Connor1996 Apr 3, 2018
654fa8e
fix admin merge command
Connor1996 Apr 4, 2018
772d4d4
tiny clean up
Connor1996 Apr 4, 2018
a728419
validity check
Connor1996 Apr 11, 2018
6adc7e7
Merge branch 'master' into learner-node
Connor1996 Apr 11, 2018
7553410
fix
Connor1996 Apr 11, 2018
bb0fc2e
fix test
Connor1996 Apr 12, 2018
9b69953
update kvproto
Connor1996 Apr 13, 2018
ef7ebe8
Merge branch 'master' into learner-node
Connor1996 Apr 13, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pdctl/command/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ func mergeRegionCommandFunc(cmd *cobra.Command, args []string) {
input := make(map[string]interface{})
input["name"] = cmd.Name()
input["source_region_id"] = ids[0]
input["merge_region_id"] = ids[1]
input["target_region_id"] = ids[1]
postJSON(cmd, operatorsPrefix, input)
}

Expand Down
2 changes: 1 addition & 1 deletion pdctl/command/region_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func decodeProtobufText(text string) (string, error) {
// NewRegionWithCheckCommand return a region with check subcommand of regionCmd
func NewRegionWithCheckCommand() *cobra.Command {
r := &cobra.Command{
Use: "check [miss-replica|extra-replica|down-replica|pending-replica|incorrect-ns]",
Use: "check [miss-peer|extra-peer|down-peer|pending-peer|incorrect-ns]",
Short: "show the region with check specific status",
Run: showRegionWithCheckCommandFunc,
}
Expand Down
11 changes: 5 additions & 6 deletions server/api/region.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,11 @@ func newRegionInfo(r *core.RegionInfo) *regionInfo {
return nil
}
return &regionInfo{
ID: r.Id,
StartKey: strings.Trim(fmt.Sprintf("%q", r.StartKey), "\""),
EndKey: strings.Trim(fmt.Sprintf("%q", r.EndKey), "\""),
RegionEpoch: r.RegionEpoch,
Peers: r.Peers,

ID: r.Id,
StartKey: strings.Trim(fmt.Sprintf("%q", r.StartKey), "\""),
EndKey: strings.Trim(fmt.Sprintf("%q", r.EndKey), "\""),
RegionEpoch: r.RegionEpoch,
Peers: r.Peers,
Leader: r.Leader,
DownPeers: r.DownPeers,
PendingPeers: r.PendingPeers,
Expand Down
8 changes: 4 additions & 4 deletions server/api/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ func createRouter(prefix string, svr *server.Server) *mux.Router {
router.HandleFunc("/api/v1/regions", regionsHandler.GetAll).Methods("GET")
router.HandleFunc("/api/v1/regions/writeflow", regionsHandler.GetTopWriteFlow).Methods("GET")
router.HandleFunc("/api/v1/regions/readflow", regionsHandler.GetTopReadFlow).Methods("GET")
router.HandleFunc("/api/v1/regions/check/miss-peer", regionsHandler.GetMissPeerRegions).Methods("GET")
router.HandleFunc("/api/v1/regions/check/extra-peer", regionsHandler.GetExtraPeerRegions).Methods("GET")
router.HandleFunc("/api/v1/regions/check/pending-peer", regionsHandler.GetPendingPeerRegions).Methods("GET")
router.HandleFunc("/api/v1/regions/check/down-peer", regionsHandler.GetDownPeerRegions).Methods("GET")
router.HandleFunc("/api/v1/regions/sibling/{id}", regionsHandler.GetRegionSiblings).Methods("GET")
router.HandleFunc("/api/v1/regions/check/miss-replica", regionsHandler.GetMissPeerRegions).Methods("GET")
router.HandleFunc("/api/v1/regions/check/extra-replica", regionsHandler.GetExtraPeerRegions).Methods("GET")
router.HandleFunc("/api/v1/regions/check/pending-replica", regionsHandler.GetPendingPeerRegions).Methods("GET")
router.HandleFunc("/api/v1/regions/check/down-replica", regionsHandler.GetDownPeerRegions).Methods("GET")
router.HandleFunc("/api/v1/regions/check/incorrect-ns", regionsHandler.GetIncorrectNamespaceRegions).Methods("GET")

router.Handle("/api/v1/version", newVersionHandler(rd)).Methods("GET")
Expand Down
28 changes: 20 additions & 8 deletions server/api/trend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/pingcap/kvproto/pkg/metapb"
"github.com/pingcap/pd/server"
"github.com/pingcap/pd/server/core"
"github.com/pingcap/pd/server/schedule"
)

var _ = Suite(&testTrendSuite{})
Expand Down Expand Up @@ -48,11 +49,22 @@ func (s *testTrendSuite) TestTend(c *C) {

// Complete the operators.
mustRegionHeartbeat(c, svr, s.newRegionInfo(4, "", "a", 2, 2, []uint64{1, 2}, 2))
mustRegionHeartbeat(c, svr, s.newRegionInfo(5, "a", "b", 3, 2, []uint64{1, 3}, 1))
mustRegionHeartbeat(c, svr, s.newRegionInfo(6, "b", "", 3, 2, []uint64{2, 3}, 2))
region := s.newRegionInfo(5, "a", "b", 3, 2, []uint64{1, 3}, 1)
op, err := svr.GetHandler().GetOperator(5)
c.Assert(op, NotNil)
region.Peers[1].Id = op.Step(0).(schedule.AddPeer).PeerID
region.Voters[1].Id = op.Step(0).(schedule.AddPeer).PeerID
mustRegionHeartbeat(c, svr, region)

op, err = svr.GetHandler().GetOperator(6)
c.Assert(op, NotNil)
region = s.newRegionInfo(6, "b", "", 3, 2, []uint64{2, 3}, 2)
region.Peers[1].Id = op.Step(0).(schedule.AddPeer).PeerID
region.Voters[1].Id = op.Step(0).(schedule.AddPeer).PeerID
mustRegionHeartbeat(c, svr, region)

var trend Trend
err := readJSONWithURL(fmt.Sprintf("%s%s/api/v1/trend", svr.GetAddr(), apiPrefix), &trend)
err = readJSONWithURL(fmt.Sprintf("%s%s/api/v1/trend", svr.GetAddr(), apiPrefix), &trend)
c.Assert(err, IsNil)

// Check store states.
Expand Down Expand Up @@ -82,20 +94,20 @@ func (s *testTrendSuite) newRegionInfo(id uint64, startKey, endKey string, confV
leader *metapb.Peer
)
for _, id := range stores {
p := &metapb.Peer{Id: id, StoreId: id}
p := &metapb.Peer{Id: 10 + id, StoreId: id}
if id == leaderStore {
leader = p
}
peers = append(peers, p)
}
return &core.RegionInfo{
Region: &metapb.Region{
return core.NewRegionInfo(
&metapb.Region{
Id: id,
StartKey: []byte(startKey),
EndKey: []byte(endKey),
RegionEpoch: &metapb.RegionEpoch{ConfVer: confVer, Version: ver},
Peers: peers,
},
Leader: leader,
}
leader,
)
}
4 changes: 4 additions & 0 deletions server/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,10 @@ func (c *clusterInfo) GetHotRegionLowThreshold() int {
return c.opt.GetHotRegionLowThreshold()
}

func (c *clusterInfo) IsRaftLearnerEnabled() bool {
return c.opt.IsRaftLearnerEnabled()
}

func (c *clusterInfo) CheckLabelProperty(typ string, labels []*metapb.StoreLabel) bool {
return c.opt.CheckLabelProperty(typ, labels)
}
3 changes: 3 additions & 0 deletions server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,8 @@ type ScheduleConfig struct {
MergeScheduleLimit uint64 `toml:"merge-schedule-limit,omitempty" json:"merge-schedule-limit"`
// TolerantSizeRatio is the ratio of buffer size for balance scheduler.
TolerantSizeRatio float64 `toml:"tolerant-size-ratio,omitempty" json:"tolerant-size-ratio"`
// EnableRaftLearner is the option for using AddLearnerNode instead of AddNode
EnableRaftLearner bool `toml:"enable-raft-learner" json:"enable-raft-learner,string"`
// Schedulers support for loding customized schedulers
Schedulers SchedulerConfigs `toml:"schedulers,omitempty" json:"schedulers-v2"` // json v2 is for the sake of compatible upgrade
}
Expand All @@ -369,6 +371,7 @@ func (c *ScheduleConfig) clone() *ScheduleConfig {
ReplicaScheduleLimit: c.ReplicaScheduleLimit,
MergeScheduleLimit: c.MergeScheduleLimit,
TolerantSizeRatio: c.TolerantSizeRatio,
EnableRaftLearner: c.EnableRaftLearner,
Schedulers: schedulers,
}
}
Expand Down
58 changes: 52 additions & 6 deletions server/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,21 @@ func (c *coordinator) dispatch(region *core.RegionInfo) {
c.removeOperator(op)
}
}
// If PD has restarted, it need to check learners added before and promote them.
if c.cluster.IsRaftLearnerEnabled() && c.getOperator(region.GetId()) == nil {
for _, p := range region.GetLearners() {
if region.GetPendingLearner(p.GetId()) != nil {
continue
}
step := schedule.PromoteLearner{
ToStore: p.GetStoreId(),
PeerID: p.GetId(),
}
op := schedule.NewOperator("promoteLearner", region.GetId(), schedule.OpRegion, step)
c.addOperator(op)
break
}
}
}

func (c *coordinator) patrolRegions() {
Expand Down Expand Up @@ -139,20 +154,23 @@ func (c *coordinator) patrolRegions() {
key = region.GetEndKey()

if op := c.namespaceChecker.Check(region); op != nil {
c.addOperator(op)
break
if c.addOperator(op) {
break
}
}
if c.limiter.OperatorCount(schedule.OpReplica) < c.cluster.GetReplicaScheduleLimit() {
if op := c.replicaChecker.Check(region); op != nil {
c.addOperator(op)
break
if c.addOperator(op) {
break
}
}
}
if c.limiter.OperatorCount(schedule.OpMerge) < c.cluster.GetMergeScheduleLimit() {
if op1, op2 := c.mergeChecker.Check(region); op1 != nil && op2 != nil {
// make sure two operators can add successfully altogether
c.addOperators(op1, op2)
break
if c.addOperators(op1, op2) {
break
}
}
}
}
Expand Down Expand Up @@ -550,6 +568,34 @@ func (c *coordinator) sendScheduleCommand(region *core.RegionInfo, step schedule
},
}
c.hbStreams.sendMsg(region, cmd)
case schedule.AddLearner:
if region.GetStorePeer(s.ToStore) != nil {
// The newly added peer is pending.
return
}
cmd := &pdpb.RegionHeartbeatResponse{
ChangePeer: &pdpb.ChangePeer{
ChangeType: eraftpb.ConfChangeType_AddLearnerNode,
Peer: &metapb.Peer{
Id: s.PeerID,
StoreId: s.ToStore,
IsLearner: true,
},
},
}
c.hbStreams.sendMsg(region, cmd)
case schedule.PromoteLearner:
cmd := &pdpb.RegionHeartbeatResponse{
ChangePeer: &pdpb.ChangePeer{
// reuse AddNode type
ChangeType: eraftpb.ConfChangeType_AddNode,
Peer: &metapb.Peer{
Id: s.PeerID,
StoreId: s.ToStore,
},
},
}
c.hbStreams.sendMsg(region, cmd)
case schedule.RemovePeer:
cmd := &pdpb.RegionHeartbeatResponse{
ChangePeer: &pdpb.ChangePeer{
Expand Down
2 changes: 1 addition & 1 deletion server/coordinator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ func waitAddPeer(c *C, stream *mockHeartbeatStream, region *core.RegionInfo, sto
}
return false
})
region.Peers = append(region.Peers, res.GetChangePeer().GetPeer())
region.AddPeer(res.GetChangePeer().GetPeer())
region.RegionEpoch = &metapb.RegionEpoch{
ConfVer: region.GetRegionEpoch().GetConfVer() + 1,
Version: region.GetRegionEpoch().GetVersion(),
Expand Down
Loading