Skip to content

Commit

Permalink
swarm/network: Saturation check for healthy networks (ethereum#19071)
Browse files Browse the repository at this point in the history
* swarm/network: new saturation for  implementation

* swarm/network: re-added saturation func in Kademlia as it is used elsewhere

* swarm/network: saturation with higher MinBinSize

* swarm/network: PeersPerBin with depth check

* swarm/network: edited tests to pass new saturated check

* swarm/network: minor fix saturated check

* swarm/network/simulations/discovery: fixed renamed RPC call

* swarm/network: renamed to isSaturated and returns bool

* swarm/network: early depth check

(cherry picked from commit 2af2472)
  • Loading branch information
holisticode authored and dshulyak committed Mar 14, 2019
1 parent b772c22 commit 04feb8b
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 29 deletions.
74 changes: 64 additions & 10 deletions swarm/network/kademlia.go
Original file line number Diff line number Diff line change
Expand Up @@ -628,7 +628,8 @@ func (k *Kademlia) string() string {
// used for testing only
// TODO move to separate testing tools file
type PeerPot struct {
NNSet [][]byte
NNSet [][]byte
PeersPerBin []int
}

// NewPeerPotMap creates a map of pot record of *BzzAddr with keys
Expand All @@ -654,6 +655,7 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {

// all nn-peers
var nns [][]byte
peersPerBin := make([]int, depth)

// iterate through the neighbours, going from the deepest to the shallowest
np.EachNeighbour(a, Pof, func(val pot.Val, po int) bool {
Expand All @@ -667,14 +669,18 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {
// a neighbor is any peer in or deeper than the depth
if po >= depth {
nns = append(nns, addr)
return true
} else {
// for peers < depth, we just count the number in each bin
// the bin is the index of the slice
peersPerBin[po]++
}
return false
return true
})

log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s", addrs[i][:4], LogAddrs(nns)))
log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s, peersPerBin", addrs[i][:4], LogAddrs(nns)))
ppmap[common.Bytes2Hex(a)] = &PeerPot{
NNSet: nns,
NNSet: nns,
PeersPerBin: peersPerBin,
}
}
return ppmap
Expand All @@ -698,6 +704,39 @@ func (k *Kademlia) saturation() int {
return prev
}

// isSaturated returns true if the kademlia is considered saturated, or false if not.
// It checks this by checking an array of ints called unsaturatedBins; each item in that array corresponds
// to the bin which is unsaturated (number of connections < k.MinBinSize).
// The bin is considered unsaturated only if there are actual peers in that PeerPot's bin (peersPerBin)
// (if there is no peer for a given bin, then no connection could ever be established;
// in a God's view this is relevant as no more peers will ever appear on that bin)
func (k *Kademlia) isSaturated(peersPerBin []int, depth int) bool {
// depth could be calculated from k but as this is called from `GetHealthInfo()`,
// the depth has already been calculated so we can require it as a parameter

// early check for depth
if depth != len(peersPerBin) {
return false
}
unsaturatedBins := make([]int, 0)
k.conns.EachBin(k.base, Pof, 0, func(po, size int, f func(func(val pot.Val) bool) bool) bool {

if po >= depth {
return false
}
log.Trace("peers per bin", "peersPerBin[po]", peersPerBin[po], "po", po)
// if there are actually peers in the PeerPot who can fulfill k.MinBinSize
if size < k.MinBinSize && size < peersPerBin[po] {
log.Trace("connections for po", "po", po, "size", size)
unsaturatedBins = append(unsaturatedBins, po)
}
return true
})

log.Trace("list of unsaturated bins", "unsaturatedBins", unsaturatedBins)
return len(unsaturatedBins) == 0
}

// knowNeighbours tests if all neighbours in the peerpot
// are found among the peers known to the kademlia
// It is used in Healthy function for testing only
Expand Down Expand Up @@ -780,19 +819,21 @@ type Health struct {
ConnectNN bool // whether node is connected to all its neighbours
CountConnectNN int // amount of neighbours connected to
MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not
Saturated bool // whether we are connected to all the peers we would have liked to
Hive string
// Saturated: if in all bins < depth number of connections >= MinBinsize or,
// if number of connections < MinBinSize, to the number of available peers in that bin
Saturated bool
Hive string
}

// Healthy reports the health state of the kademlia connectivity
// GetHealthInfo reports the health state of the kademlia connectivity
//
// The PeerPot argument provides an all-knowing view of the network
// The resulting Health object is a result of comparisons between
// what is the actual composition of the kademlia in question (the receiver), and
// what SHOULD it have been when we take all we know about the network into consideration.
//
// used for testing only
func (k *Kademlia) Healthy(pp *PeerPot) *Health {
func (k *Kademlia) GetHealthInfo(pp *PeerPot) *Health {
k.lock.RLock()
defer k.lock.RUnlock()
if len(pp.NNSet) < k.NeighbourhoodSize {
Expand All @@ -801,7 +842,10 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet)
knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet)
depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base)
saturated := k.saturation() < depth

// check saturation
saturated := k.isSaturated(pp.PeersPerBin, depth)

log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated))
return &Health{
KnowNN: knownn,
Expand All @@ -814,3 +858,13 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
Hive: k.string(),
}
}

// Healthy return the strict interpretation of `Healthy` given a `Health` struct
// definition of strict health: all conditions must be true:
// - we at least know one peer
// - we know all neighbors
// - we are connected to all known neighbors
// - it is saturated
func (h *Health) Healthy() bool {
return h.KnowNN && h.ConnectNN && h.CountKnowNN > 0 && h.Saturated
}
126 changes: 111 additions & 15 deletions swarm/network/kademlia_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,46 @@ func TestNeighbourhoodDepth(t *testing.T) {
testNum++
}

// TestHighMinBinSize tests that the saturation function also works
// if MinBinSize is > 2, the connection count is < k.MinBinSize
// and there are more peers available than connected
func TestHighMinBinSize(t *testing.T) {
// a function to test for different MinBinSize values
testKad := func(minBinSize int) {
// create a test kademlia
tk := newTestKademlia(t, "11111111")
// set its MinBinSize to desired value
tk.KadParams.MinBinSize = minBinSize

// add a couple of peers (so we have NN and depth)
tk.On("00000000") // bin 0
tk.On("11100000") // bin 3
tk.On("11110000") // bin 4

first := "10000000" // add a first peer at bin 1
tk.Register(first) // register it
// we now have one registered peer at bin 1;
// iterate and connect one peer at each iteration;
// should be unhealthy until at minBinSize - 1
// we connect the unconnected but registered peer
for i := 1; i < minBinSize; i++ {
peer := fmt.Sprintf("1000%b", 8|i)
tk.On(peer)
if i == minBinSize-1 {
tk.On(first)
tk.checkHealth(true)
return
}
tk.checkHealth(false)
}
}
// test MinBinSizes of 3 to 5
testMinBinSizes := []int{3, 4, 5}
for _, k := range testMinBinSizes {
testKad(k)
}
}

// TestHealthStrict tests the simplest definition of health
// Which means whether we are connected to all neighbors we know of
func TestHealthStrict(t *testing.T) {
Expand All @@ -176,60 +216,116 @@ func TestHealthStrict(t *testing.T) {
// no peers
// unhealthy (and lonely)
tk := newTestKademlia(t, "11111111")
tk.checkHealth(false, false)
tk.checkHealth(false)

// know one peer but not connected
// unhealthy
tk.Register("11100000")
tk.checkHealth(false, false)
tk.checkHealth(false)

// know one peer and connected
// healthy
// unhealthy: not saturated
tk.On("11100000")
tk.checkHealth(true, false)
tk.checkHealth(true)

// know two peers, only one connected
// unhealthy
tk.Register("11111100")
tk.checkHealth(false, false)
tk.checkHealth(false)

// know two peers and connected to both
// healthy
tk.On("11111100")
tk.checkHealth(true, false)
tk.checkHealth(true)

// know three peers, connected to the two deepest
// healthy
tk.Register("00000000")
tk.checkHealth(true, false)
tk.checkHealth(false)

// know three peers, connected to all three
// healthy
tk.On("00000000")
tk.checkHealth(true, false)
tk.checkHealth(true)

// add fourth peer deeper than current depth
// unhealthy
tk.Register("11110000")
tk.checkHealth(false, false)
tk.checkHealth(false)

// connected to three deepest peers
// healthy
tk.On("11110000")
tk.checkHealth(true, false)
tk.checkHealth(true)

// add additional peer in same bin as deepest peer
// unhealthy
tk.Register("11111101")
tk.checkHealth(false, false)
tk.checkHealth(false)

// four deepest of five peers connected
// healthy
tk.On("11111101")
tk.checkHealth(true, false)
tk.checkHealth(true)

// add additional peer in bin 0
// unhealthy: unsaturated bin 0, 2 known but 1 connected
tk.Register("00000001")
tk.checkHealth(false)

// Connect second in bin 0
// healthy
tk.On("00000001")
tk.checkHealth(true)

// add peer in bin 1
// unhealthy, as it is known but not connected
tk.Register("10000000")
tk.checkHealth(false)

// connect peer in bin 1
// depth change, is now 1
// healthy, 1 peer in bin 1 known and connected
tk.On("10000000")
tk.checkHealth(true)

// add second peer in bin 1
// unhealthy, as it is known but not connected
tk.Register("10000001")
tk.checkHealth(false)

// connect second peer in bin 1
// healthy,
tk.On("10000001")
tk.checkHealth(true)

// connect third peer in bin 1
// healthy,
tk.On("10000011")
tk.checkHealth(true)

// add peer in bin 2
// unhealthy, no depth change
tk.Register("11000000")
tk.checkHealth(false)

// connect peer in bin 2
// depth change - as we already have peers in bin 3 and 4,
// we have contiguous bins, no bin < po 5 is empty -> depth 5
// healthy, every bin < depth has the max available peers,
// even if they are < MinBinSize
tk.On("11000000")
tk.checkHealth(true)

// add peer in bin 2
// unhealthy, peer bin is below depth 5 but
// has more available peers (2) than connected ones (1)
// --> unsaturated
tk.Register("11000011")
tk.checkHealth(false)
}

func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
func (tk *testKademlia) checkHealth(expectHealthy bool) {
tk.t.Helper()
kid := common.Bytes2Hex(tk.BaseAddr())
addrs := [][]byte{tk.BaseAddr()}
Expand All @@ -239,13 +335,13 @@ func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
})

pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs)
healthParams := tk.Healthy(pp[kid])
healthParams := tk.GetHealthInfo(pp[kid])

// definition of health, all conditions but be true:
// - we at least know one peer
// - we know all neighbors
// - we are connected to all known neighbors
health := healthParams.KnowNN && healthParams.ConnectNN && healthParams.CountKnowNN > 0
health := healthParams.Healthy()
if expectHealthy != health {
tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String())
}
Expand Down
2 changes: 1 addition & 1 deletion swarm/network/simulation/kademlia.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
addr := common.Bytes2Hex(k.BaseAddr())
pp := ppmap[addr]
//call Healthy RPC
h := k.Healthy(pp)
h := k.GetHealthInfo(pp)
//print info
log.Debug(k.String())
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)
Expand Down
6 changes: 3 additions & 3 deletions swarm/network/simulations/discovery/discovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simul
}

healthy := &network.Health{}
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return false, fmt.Errorf("error getting node health: %s", err)
}
log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive))
Expand Down Expand Up @@ -352,7 +352,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
healthy := &network.Health{}
addr := id.String()
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return fmt.Errorf("error getting node health: %s", err)
}

Expand Down Expand Up @@ -422,7 +422,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
healthy := &network.Health{}
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)

if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
return false, fmt.Errorf("error getting node health: %s", err)
}
log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))
Expand Down

0 comments on commit 04feb8b

Please sign in to comment.