Skip to content

Commit

Permalink
Merge pull request #10779 from tbg/jointq-pr
Browse files Browse the repository at this point in the history
raft: use half-populated joint quorum
  • Loading branch information
tbg authored Jun 20, 2019
2 parents 9ff7628 + e039629 commit 755aab6
Show file tree
Hide file tree
Showing 23 changed files with 2,584 additions and 176 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module go.etcd.io/etcd
require (
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 // indirect
github.com/bgentry/speakeasy v0.1.0
github.com/cockroachdb/datadriven v0.0.0-20190531201743-edce55837238
github.com/coreos/go-semver v0.2.0
github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7
github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973 h1:xJ4a3vCFaGF/jqvzLM
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/cockroachdb/datadriven v0.0.0-20190531201743-edce55837238 h1:uNljlOxtOHrPnRoPPx+JanqjAGZpNiqAGVBfGskd/pg=
github.com/cockroachdb/datadriven v0.0.0-20190531201743-edce55837238/go.mod h1:zn76sxSg3SzpJ0PPJaLDCu+Bu0Lg3sKTORVIj19EIF8=
github.com/coreos/go-semver v0.2.0 h1:3Jm3tLmsgAYcjC+4Up7hJrFBPr+n7rAqYeSw/SZazuY=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
github.com/coreos/go-systemd v0.0.0-20180511133405-39ca1b05acc7 h1:u9SHYsPQNyt5tgDm3YN7+9dYrpK96E5wFilTFWIDZOM=
Expand Down
129 changes: 63 additions & 66 deletions raft/progress.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ package raft
import (
"fmt"
"sort"

"go.etcd.io/etcd/raft/quorum"
)

const (
Expand Down Expand Up @@ -291,102 +293,96 @@ func (in *inflights) reset() {
// known about the nodes and learners in it. In particular, it tracks the match
// index for each peer which in turn allows reasoning about the committed index.
type progressTracker struct {
nodes map[uint64]*Progress
learners map[uint64]*Progress
voters quorum.JointConfig
learners map[uint64]struct{}
prs map[uint64]*Progress

votes map[uint64]bool

maxInflight int
matchBuf uint64Slice
}

func makePRS(maxInflight int) progressTracker {
func makeProgressTracker(maxInflight int) progressTracker {
p := progressTracker{
maxInflight: maxInflight,
nodes: map[uint64]*Progress{},
learners: map[uint64]*Progress{},
votes: map[uint64]bool{},
voters: quorum.JointConfig{
quorum.MajorityConfig{},
quorum.MajorityConfig{},
},
learners: map[uint64]struct{}{},
votes: map[uint64]bool{},
prs: map[uint64]*Progress{},
}
return p
}

// isSingleton returns true if (and only if) there is only one voting member
// (i.e. the leader) in the current configuration.
func (p *progressTracker) isSingleton() bool {
return len(p.nodes) == 1
return len(p.voters[0]) == 1 && len(p.voters[1]) == 0
}

func (p *progressTracker) quorum() int {
return len(p.nodes)/2 + 1
}
type progressAckIndexer map[uint64]*Progress

var _ quorum.AckedIndexer = progressAckIndexer(nil)

func (p *progressTracker) hasQuorum(m map[uint64]struct{}) bool {
return len(m) >= p.quorum()
func (l progressAckIndexer) AckedIndex(id uint64) (quorum.Index, bool) {
pr, ok := l[id]
if !ok {
return 0, false
}
return quorum.Index(pr.Match), true
}

// committed returns the largest log index known to be committed based on what
// the voting members of the group have acknowledged.
func (p *progressTracker) committed() uint64 {
// Preserving matchBuf across calls is an optimization
// used to avoid allocating a new slice on each call.
if cap(p.matchBuf) < len(p.nodes) {
p.matchBuf = make(uint64Slice, len(p.nodes))
}
p.matchBuf = p.matchBuf[:len(p.nodes)]
idx := 0
for _, pr := range p.nodes {
p.matchBuf[idx] = pr.Match
idx++
}
sort.Sort(&p.matchBuf)
return p.matchBuf[len(p.matchBuf)-p.quorum()]
return uint64(p.voters.CommittedIndex(progressAckIndexer(p.prs)))
}

func (p *progressTracker) removeAny(id uint64) {
pN := p.nodes[id]
pL := p.learners[id]
_, okPR := p.prs[id]
_, okV1 := p.voters[0][id]
_, okV2 := p.voters[1][id]
_, okL := p.learners[id]

okV := okV1 || okV2

if pN == nil && pL == nil {
if !okPR {
panic("attempting to remove unknown peer %x")
} else if !okV && !okL {
panic("attempting to remove unknown peer %x")
} else if pN != nil && pL != nil {
} else if okV && okL {
panic(fmt.Sprintf("peer %x is both voter and learner", id))
}

delete(p.nodes, id)
delete(p.voters[0], id)
delete(p.voters[1], id)
delete(p.learners, id)
delete(p.prs, id)
}

// initProgress initializes a new progress for the given node or learner. The
// node may not exist yet in either form or a panic will ensue.
func (p *progressTracker) initProgress(id, match, next uint64, isLearner bool) {
if pr := p.nodes[id]; pr != nil {
if pr := p.prs[id]; pr != nil {
panic(fmt.Sprintf("peer %x already tracked as node %v", id, pr))
}
if pr := p.learners[id]; pr != nil {
panic(fmt.Sprintf("peer %x already tracked as learner %v", id, pr))
}
if !isLearner {
p.nodes[id] = &Progress{Next: next, Match: match, ins: newInflights(p.maxInflight)}
return
p.voters[0][id] = struct{}{}
} else {
p.learners[id] = struct{}{}
}
p.learners[id] = &Progress{Next: next, Match: match, ins: newInflights(p.maxInflight), IsLearner: true}
p.prs[id] = &Progress{Next: next, Match: match, ins: newInflights(p.maxInflight), IsLearner: isLearner}
}

func (p *progressTracker) getProgress(id uint64) *Progress {
if pr, ok := p.nodes[id]; ok {
return pr
}

return p.learners[id]
return p.prs[id]
}

// visit invokes the supplied closure for all tracked progresses.
func (p *progressTracker) visit(f func(id uint64, pr *Progress)) {
for id, pr := range p.nodes {
f(id, pr)
}

for id, pr := range p.learners {
for id, pr := range p.prs {
f(id, pr)
}
}
Expand All @@ -395,19 +391,21 @@ func (p *progressTracker) visit(f func(id uint64, pr *Progress)) {
// the view of the local raft state machine. Otherwise, it returns
// false.
func (p *progressTracker) quorumActive() bool {
var act int
votes := map[uint64]bool{}
p.visit(func(id uint64, pr *Progress) {
if pr.RecentActive && !pr.IsLearner {
act++
if pr.IsLearner {
return
}
votes[id] = pr.RecentActive
})

return act >= p.quorum()
return p.voters.VoteResult(votes) == quorum.VoteWon
}

func (p *progressTracker) voterNodes() []uint64 {
nodes := make([]uint64, 0, len(p.nodes))
for id := range p.nodes {
m := p.voters.IDs()
nodes := make([]uint64, 0, len(m))
for id := range m {
nodes = append(nodes, id)
}
sort.Sort(uint64Slice(nodes))
Expand Down Expand Up @@ -439,22 +437,21 @@ func (p *progressTracker) recordVote(id uint64, v bool) {

// tallyVotes returns the number of granted and rejected votes, and whether the
// election outcome is known.
func (p *progressTracker) tallyVotes() (granted int, rejected int, result electionResult) {
for _, v := range p.votes {
if v {
func (p *progressTracker) tallyVotes() (granted int, rejected int, _ quorum.VoteResult) {
// Make sure to populate granted/rejected correctly even if the votes slice
// contains members no longer part of the configuration. This doesn't really
// matter in the way the numbers are used (they're informational), but might
// as well get it right.
for id, pr := range p.prs {
if pr.IsLearner {
continue
}
if p.votes[id] {
granted++
} else {
rejected++
}
}

q := p.quorum()

result = electionIndeterminate
if granted >= q {
result = electionWon
} else if rejected >= q {
result = electionLost
}
result := p.voters.VoteResult(p.votes)
return granted, rejected, result
}
40 changes: 40 additions & 0 deletions raft/quorum/bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright 2019 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package quorum

import (
"fmt"
"math"
"math/rand"
"testing"
)

func BenchmarkMajorityConfig_CommittedIndex(b *testing.B) {
// go test -run - -bench . -benchmem ./raft/quorum
for _, n := range []int{1, 3, 5, 7, 9, 11} {
b.Run(fmt.Sprintf("voters=%d", n), func(b *testing.B) {
c := MajorityConfig{}
l := mapAckIndexer{}
for i := uint64(0); i < uint64(n); i++ {
c[i+1] = struct{}{}
l[i+1] = Index(rand.Int63n(math.MaxInt64))
}

for i := 0; i < b.N; i++ {
_ = c.CommittedIndex(l)
}
})
}
}
Loading

0 comments on commit 755aab6

Please sign in to comment.