From 973781e05d15b5bfde97e45e2062eb77af339cb4 Mon Sep 17 00:00:00 2001 From: Jeromy Date: Sat, 8 Jul 2017 09:44:57 -0700 Subject: [PATCH 01/58] first commit --- p2p/net/connmgr/connmgr.go | 170 +++++++++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 p2p/net/connmgr/connmgr.go diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go new file mode 100644 index 0000000000..94ea6c4f44 --- /dev/null +++ b/p2p/net/connmgr/connmgr.go @@ -0,0 +1,170 @@ +package connmgr + +import ( + "context" + "sort" + "sync" + "time" + + inet "gx/ipfs/QmRscs8KxrSmSv4iuevHv8JfuUzHBMoqiaHzxfDRiksd6e/go-libp2p-net" + logging "gx/ipfs/QmSpJByNKFX1sCsHBEp3R73FL4NF6FnQTEGyNAXHm2GS52/go-log" + ma "gx/ipfs/QmcyqRMCAXVtYPS4DiBrA7sezL9rRGfW8Ctx7cywL4TXJj/go-multiaddr" +) + +var log = logging.Logger("connmgr") + +type ConnManager struct { + HighWater int + LowWater int + + GracePeriod time.Duration + + conns map[inet.Conn]connInfo + + lk sync.Mutex +} + +func NewConnManager(low, hi int) *ConnManager { + return &ConnManager{ + HighWater: hi, + LowWater: low, + GracePeriod: time.Second * 10, + conns: make(map[inet.Conn]connInfo), + } +} + +type connInfo struct { + tags map[string]int + value int + c inet.Conn + closed bool + + firstSeen time.Time +} + +func (cm *ConnManager) TrimOpenConns(ctx context.Context) { + defer log.EventBegin(ctx, "connCleanup").Done() + cm.lk.Lock() + defer cm.lk.Unlock() + + if len(cm.conns) < cm.LowWater { + log.Info("open connection count below limit") + return + } + + var infos []connInfo + + for _, inf := range cm.conns { + infos = append(infos, inf) + } + + sort.Slice(infos, func(i, j int) bool { + if infos[i].closed { + return true + } + if infos[j].closed { + return false + } + + return infos[i].value < infos[j].value + }) + + toclose := infos[:len(infos)-cm.LowWater] + + for _, inf := range toclose { + if time.Since(inf.firstSeen) < cm.GracePeriod { + continue + } + + log.Info("closing conn: ", inf.c.RemotePeer()) + log.Event(ctx, "closeConn", inf.c.RemotePeer()) + inf.c.Close() + inf.closed = true + } + + if len(cm.conns) > cm.HighWater { + log.Error("still over high water mark after trimming connections") + } +} + +func (cm *ConnManager) TagConn(c inet.Conn, tag string, val int) { + cm.lk.Lock() + defer cm.lk.Unlock() + + ci, ok := cm.conns[c] + if !ok { + log.Error("tried to tag untracked conn: ", c.RemotePeer()) + return + } + + ci.value += (val - ci.tags[tag]) + ci.tags[tag] = val +} + +func (cm *ConnManager) UntagConn(c inet.Conn, tag string) { + cm.lk.Lock() + defer cm.lk.Unlock() + + ci, ok := cm.conns[c] + if !ok { + log.Error("tried to remove tag on untracked conn: ", c.RemotePeer()) + return + } + + ci.value -= ci.tags[tag] + delete(ci.tags, tag) +} + +func (cm *ConnManager) Notifee() *cmNotifee { + return (*cmNotifee)(cm) +} + +type cmNotifee ConnManager + +func (nn *cmNotifee) cm() *ConnManager { + return (*ConnManager)(nn) +} + +func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { + cm := nn.cm() + + cm.lk.Lock() + defer cm.lk.Unlock() + + cinfo, ok := cm.conns[c] + if ok { + log.Error("received connected notification for conn we are already tracking: ", c.RemotePeer()) + return + } + + cinfo = connInfo{ + firstSeen: time.Now(), + tags: make(map[string]int), + c: c, + } + cm.conns[c] = cinfo + + if len(cm.conns) > nn.HighWater { + go cm.TrimOpenConns(context.Background()) + } +} + +func (nn *cmNotifee) Disconnected(n inet.Network, c inet.Conn) { + cm := nn.cm() + + cm.lk.Lock() + defer cm.lk.Unlock() + + _, ok := cm.conns[c] + if !ok { + log.Error("received disconnected notification for conn we are not tracking: ", c.RemotePeer()) + return + } + + delete(cm.conns, c) +} + +func (nn *cmNotifee) Listen(n inet.Network, addr ma.Multiaddr) {} +func (nn *cmNotifee) ListenClose(n inet.Network, addr ma.Multiaddr) {} +func (nn *cmNotifee) OpenedStream(inet.Network, inet.Stream) {} +func (nn *cmNotifee) ClosedStream(inet.Network, inet.Stream) {} From 29a8c4e1bfdc378810c7713bf21b1efdd7736878 Mon Sep 17 00:00:00 2001 From: Jeromy Date: Mon, 10 Jul 2017 08:04:31 -0700 Subject: [PATCH 02/58] add base repo files --- p2p/net/connmgr/connmgr.go | 112 +++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 47 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 94ea6c4f44..f9f0b34f09 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -6,9 +6,10 @@ import ( "sync" "time" - inet "gx/ipfs/QmRscs8KxrSmSv4iuevHv8JfuUzHBMoqiaHzxfDRiksd6e/go-libp2p-net" - logging "gx/ipfs/QmSpJByNKFX1sCsHBEp3R73FL4NF6FnQTEGyNAXHm2GS52/go-log" - ma "gx/ipfs/QmcyqRMCAXVtYPS4DiBrA7sezL9rRGfW8Ctx7cywL4TXJj/go-multiaddr" + logging "github.com/ipfs/go-log" + inet "github.com/libp2p/go-libp2p-net" + peer "github.com/libp2p/go-libp2p-peer" + ma "github.com/multiformats/go-multiaddr" ) var log = logging.Logger("connmgr") @@ -19,9 +20,12 @@ type ConnManager struct { GracePeriod time.Duration - conns map[inet.Conn]connInfo + peers map[peer.ID]peerInfo + connCount int lk sync.Mutex + + lastTrim time.Time } func NewConnManager(low, hi int) *ConnManager { @@ -29,43 +33,37 @@ func NewConnManager(low, hi int) *ConnManager { HighWater: hi, LowWater: low, GracePeriod: time.Second * 10, - conns: make(map[inet.Conn]connInfo), + peers: make(map[peer.ID]peerInfo), } } -type connInfo struct { - tags map[string]int - value int - c inet.Conn - closed bool +type peerInfo struct { + tags map[string]int + value int + + conns map[inet.Conn]time.Time firstSeen time.Time } func (cm *ConnManager) TrimOpenConns(ctx context.Context) { - defer log.EventBegin(ctx, "connCleanup").Done() cm.lk.Lock() defer cm.lk.Unlock() + defer log.EventBegin(ctx, "connCleanup").Done() + cm.lastTrim = time.Now() - if len(cm.conns) < cm.LowWater { + if len(cm.peers) < cm.LowWater { log.Info("open connection count below limit") return } - var infos []connInfo + var infos []peerInfo - for _, inf := range cm.conns { + for _, inf := range cm.peers { infos = append(infos, inf) } sort.Slice(infos, func(i, j int) bool { - if infos[i].closed { - return true - } - if infos[j].closed { - return false - } - return infos[i].value < infos[j].value }) @@ -76,43 +74,45 @@ func (cm *ConnManager) TrimOpenConns(ctx context.Context) { continue } - log.Info("closing conn: ", inf.c.RemotePeer()) - log.Event(ctx, "closeConn", inf.c.RemotePeer()) - inf.c.Close() - inf.closed = true + // TODO: if a peer has more than one connection, maybe only close one? + for c, _ := range inf.conns { + log.Info("closing conn: ", c.RemotePeer()) + log.Event(ctx, "closeConn", c.RemotePeer()) + c.Close() + } } - if len(cm.conns) > cm.HighWater { + if len(cm.peers) > cm.HighWater { log.Error("still over high water mark after trimming connections") } } -func (cm *ConnManager) TagConn(c inet.Conn, tag string, val int) { +func (cm *ConnManager) TagConn(p peer.ID, tag string, val int) { cm.lk.Lock() defer cm.lk.Unlock() - ci, ok := cm.conns[c] + pi, ok := cm.peers[p] if !ok { - log.Error("tried to tag untracked conn: ", c.RemotePeer()) + log.Error("tried to tag conn from untracked peer: ", p) return } - ci.value += (val - ci.tags[tag]) - ci.tags[tag] = val + pi.value += (val - pi.tags[tag]) + pi.tags[tag] = val } -func (cm *ConnManager) UntagConn(c inet.Conn, tag string) { +func (cm *ConnManager) UntagConn(p peer.ID, tag string) { cm.lk.Lock() defer cm.lk.Unlock() - ci, ok := cm.conns[c] + pi, ok := cm.peers[p] if !ok { - log.Error("tried to remove tag on untracked conn: ", c.RemotePeer()) + log.Error("tried to remove tag from untracked peer: ", p) return } - ci.value -= ci.tags[tag] - delete(ci.tags, tag) + pi.value -= pi.tags[tag] + delete(pi.tags, tag) } func (cm *ConnManager) Notifee() *cmNotifee { @@ -131,21 +131,29 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { cm.lk.Lock() defer cm.lk.Unlock() - cinfo, ok := cm.conns[c] + pinfo, ok := cm.peers[c.RemotePeer()] + if !ok { + pinfo = peerInfo{ + firstSeen: time.Now(), + tags: make(map[string]int), + conns: make(map[inet.Conn]time.Time), + } + cm.peers[c.RemotePeer()] = pinfo + } + + _, ok = pinfo.conns[c] if ok { log.Error("received connected notification for conn we are already tracking: ", c.RemotePeer()) return } - cinfo = connInfo{ - firstSeen: time.Now(), - tags: make(map[string]int), - c: c, - } - cm.conns[c] = cinfo + pinfo.conns[c] = time.Now() + cm.connCount++ - if len(cm.conns) > nn.HighWater { - go cm.TrimOpenConns(context.Background()) + if cm.connCount > nn.HighWater { + if cm.lastTrim.IsZero() || time.Since(cm.lastTrim) > time.Second*10 { + go cm.TrimOpenConns(context.Background()) + } } } @@ -155,13 +163,23 @@ func (nn *cmNotifee) Disconnected(n inet.Network, c inet.Conn) { cm.lk.Lock() defer cm.lk.Unlock() - _, ok := cm.conns[c] + cinf, ok := cm.peers[c.RemotePeer()] + if !ok { + log.Error("received disconnected notification for peer we are not tracking: ", c.RemotePeer()) + return + } + + _, ok = cinf.conns[c] if !ok { log.Error("received disconnected notification for conn we are not tracking: ", c.RemotePeer()) return } - delete(cm.conns, c) + delete(cinf.conns, c) + cm.connCount-- + if len(cinf.conns) == 0 { + delete(cm.peers, c.RemotePeer()) + } } func (nn *cmNotifee) Listen(n inet.Network, addr ma.Multiaddr) {} From 4b860973dcbb27993f338401d422a51867587baf Mon Sep 17 00:00:00 2001 From: Jeromy Date: Mon, 10 Jul 2017 10:45:11 -0700 Subject: [PATCH 03/58] Add a test --- p2p/net/connmgr/connmgr.go | 60 +++++++++++++++++------------ p2p/net/connmgr/connmgr_test.go | 67 +++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+), 25 deletions(-) create mode 100644 p2p/net/connmgr/connmgr_test.go diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index f9f0b34f09..fb77484e01 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -14,13 +14,20 @@ import ( var log = logging.Logger("connmgr") -type ConnManager struct { - HighWater int - LowWater int +type ConnManager interface { + TagPeer(peer.ID, string, int) + UntagPeer(peer.ID, string) + TrimOpenConns(context.Context) + Notifee() inet.Notifiee +} + +type connManager struct { + highWater int + lowWater int - GracePeriod time.Duration + gracePeriod time.Duration - peers map[peer.ID]peerInfo + peers map[peer.ID]*peerInfo connCount int lk sync.Mutex @@ -28,12 +35,14 @@ type ConnManager struct { lastTrim time.Time } -func NewConnManager(low, hi int) *ConnManager { - return &ConnManager{ - HighWater: hi, - LowWater: low, - GracePeriod: time.Second * 10, - peers: make(map[peer.ID]peerInfo), +var DefaultGracePeriod = time.Second * 10 + +func NewConnManager(low, hi int, grace time.Duration) ConnManager { + return &connManager{ + highWater: hi, + lowWater: low, + gracePeriod: grace, + peers: make(map[peer.ID]*peerInfo), } } @@ -46,18 +55,18 @@ type peerInfo struct { firstSeen time.Time } -func (cm *ConnManager) TrimOpenConns(ctx context.Context) { +func (cm *connManager) TrimOpenConns(ctx context.Context) { cm.lk.Lock() defer cm.lk.Unlock() defer log.EventBegin(ctx, "connCleanup").Done() cm.lastTrim = time.Now() - if len(cm.peers) < cm.LowWater { + if len(cm.peers) < cm.lowWater { log.Info("open connection count below limit") return } - var infos []peerInfo + var infos []*peerInfo for _, inf := range cm.peers { infos = append(infos, inf) @@ -67,10 +76,11 @@ func (cm *ConnManager) TrimOpenConns(ctx context.Context) { return infos[i].value < infos[j].value }) - toclose := infos[:len(infos)-cm.LowWater] + close_count := len(infos) - cm.lowWater + toclose := infos[:close_count] for _, inf := range toclose { - if time.Since(inf.firstSeen) < cm.GracePeriod { + if time.Since(inf.firstSeen) < cm.gracePeriod { continue } @@ -82,12 +92,12 @@ func (cm *ConnManager) TrimOpenConns(ctx context.Context) { } } - if len(cm.peers) > cm.HighWater { + if len(cm.peers) > cm.highWater { log.Error("still over high water mark after trimming connections") } } -func (cm *ConnManager) TagConn(p peer.ID, tag string, val int) { +func (cm *connManager) TagPeer(p peer.ID, tag string, val int) { cm.lk.Lock() defer cm.lk.Unlock() @@ -101,7 +111,7 @@ func (cm *ConnManager) TagConn(p peer.ID, tag string, val int) { pi.tags[tag] = val } -func (cm *ConnManager) UntagConn(p peer.ID, tag string) { +func (cm *connManager) UntagPeer(p peer.ID, tag string) { cm.lk.Lock() defer cm.lk.Unlock() @@ -115,14 +125,14 @@ func (cm *ConnManager) UntagConn(p peer.ID, tag string) { delete(pi.tags, tag) } -func (cm *ConnManager) Notifee() *cmNotifee { +func (cm *connManager) Notifee() inet.Notifiee { return (*cmNotifee)(cm) } -type cmNotifee ConnManager +type cmNotifee connManager -func (nn *cmNotifee) cm() *ConnManager { - return (*ConnManager)(nn) +func (nn *cmNotifee) cm() *connManager { + return (*connManager)(nn) } func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { @@ -133,7 +143,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { pinfo, ok := cm.peers[c.RemotePeer()] if !ok { - pinfo = peerInfo{ + pinfo = &peerInfo{ firstSeen: time.Now(), tags: make(map[string]int), conns: make(map[inet.Conn]time.Time), @@ -150,7 +160,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { pinfo.conns[c] = time.Now() cm.connCount++ - if cm.connCount > nn.HighWater { + if cm.connCount > nn.highWater { if cm.lastTrim.IsZero() || time.Since(cm.lastTrim) > time.Second*10 { go cm.TrimOpenConns(context.Background()) } diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go new file mode 100644 index 0000000000..8c30e9e5d9 --- /dev/null +++ b/p2p/net/connmgr/connmgr_test.go @@ -0,0 +1,67 @@ +package connmgr + +import ( + "context" + "testing" + + inet "github.com/libp2p/go-libp2p-net" + peer "github.com/libp2p/go-libp2p-peer" + tu "github.com/libp2p/go-testutil" +) + +type tconn struct { + inet.Conn + peer peer.ID + closed bool +} + +func (c *tconn) Close() error { + c.closed = true + return nil +} + +func (c *tconn) RemotePeer() peer.ID { + return c.peer +} + +func randConn(t *testing.T) inet.Conn { + pid := tu.RandPeerIDFatal(t) + return &tconn{peer: pid} +} + +func TestConnTrimming(t *testing.T) { + cm := NewConnManager(200, 300, 0) + not := cm.Notifee() + + var conns []inet.Conn + for i := 0; i < 300; i++ { + rc := randConn(t) + conns = append(conns, rc) + not.Connected(nil, rc) + } + + for _, c := range conns { + if c.(*tconn).closed { + t.Fatal("nothing should be closed yet") + } + } + + for i := 0; i < 100; i++ { + cm.TagPeer(conns[i].RemotePeer(), "foo", 10) + } + + cm.TagPeer(conns[299].RemotePeer(), "badfoo", -5) + + cm.TrimOpenConns(context.Background()) + + for i := 0; i < 100; i++ { + c := conns[i] + if c.(*tconn).closed { + t.Fatal("these shouldnt be closed") + } + } + + if !conns[299].(*tconn).closed { + t.Fatal("conn with bad tag should have gotten closed") + } +} From e4074aaa497c7698e293267fa280c95b9f049b8c Mon Sep 17 00:00:00 2001 From: Jeromy Date: Mon, 10 Jul 2017 12:22:53 -0700 Subject: [PATCH 04/58] make it inspectable and disableable --- p2p/net/connmgr/connmgr.go | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index fb77484e01..cbad648130 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -17,6 +17,7 @@ var log = logging.Logger("connmgr") type ConnManager interface { TagPeer(peer.ID, string, int) UntagPeer(peer.ID, string) + GetTagInfo(peer.ID) *TagInfo TrimOpenConns(context.Context) Notifee() inet.Notifiee } @@ -55,10 +56,21 @@ type peerInfo struct { firstSeen time.Time } +type TagInfo struct { + FirstSeen time.Time + Value int + Tags map[string]int + Conns map[string]time.Time +} + func (cm *connManager) TrimOpenConns(ctx context.Context) { cm.lk.Lock() defer cm.lk.Unlock() defer log.EventBegin(ctx, "connCleanup").Done() + if cm.lowWater == 0 || cm.highWater == 0 { + // disabled + return + } cm.lastTrim = time.Now() if len(cm.peers) < cm.lowWater { @@ -97,6 +109,32 @@ func (cm *connManager) TrimOpenConns(ctx context.Context) { } } +func (cm *connManager) GetTagInfo(p peer.ID) *TagInfo { + cm.lk.Lock() + defer cm.lk.Unlock() + + pi, ok := cm.peers[p] + if !ok { + return nil + } + + out := &TagInfo{ + FirstSeen: pi.firstSeen, + Value: pi.value, + Tags: make(map[string]int), + Conns: make(map[string]time.Time), + } + + for t, v := range pi.tags { + out.Tags[t] = v + } + for c, t := range pi.conns { + out.Conns[c.RemoteMultiaddr().String()] = t + } + + return out +} + func (cm *connManager) TagPeer(p peer.ID, tag string, val int) { cm.lk.Lock() defer cm.lk.Unlock() From 93b0a9abf653fcbc0e36439416232653ff366090 Mon Sep 17 00:00:00 2001 From: Jeromy Date: Wed, 13 Sep 2017 17:34:13 -0700 Subject: [PATCH 05/58] don't try to close a connection while holding a lock that blocks the notifiees --- p2p/net/connmgr/connmgr.go | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index cbad648130..d06440d014 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -2,6 +2,7 @@ package connmgr import ( "context" + "io" "sort" "sync" "time" @@ -64,18 +65,24 @@ type TagInfo struct { } func (cm *connManager) TrimOpenConns(ctx context.Context) { + for _, c := range cm.getConnsToClose(ctx) { + c.Close() + } +} + +func (cm *connManager) getConnsToClose(ctx context.Context) []io.Closer { cm.lk.Lock() defer cm.lk.Unlock() defer log.EventBegin(ctx, "connCleanup").Done() if cm.lowWater == 0 || cm.highWater == 0 { // disabled - return + return nil } cm.lastTrim = time.Now() if len(cm.peers) < cm.lowWater { log.Info("open connection count below limit") - return + return nil } var infos []*peerInfo @@ -91,6 +98,8 @@ func (cm *connManager) TrimOpenConns(ctx context.Context) { close_count := len(infos) - cm.lowWater toclose := infos[:close_count] + var closed []io.Closer + for _, inf := range toclose { if time.Since(inf.firstSeen) < cm.gracePeriod { continue @@ -100,13 +109,12 @@ func (cm *connManager) TrimOpenConns(ctx context.Context) { for c, _ := range inf.conns { log.Info("closing conn: ", c.RemotePeer()) log.Event(ctx, "closeConn", c.RemotePeer()) - c.Close() + // TODO: probably don't want to always do this in a goroutine + closed = append(closed, c) } } - if len(cm.peers) > cm.highWater { - log.Error("still over high water mark after trimming connections") - } + return closed } func (cm *connManager) GetTagInfo(p peer.ID) *TagInfo { From 1fd3dd4bbfa883c03640b085251b27f9a48856dd Mon Sep 17 00:00:00 2001 From: Jeromy Date: Fri, 6 Oct 2017 09:01:08 -0700 Subject: [PATCH 06/58] extract interface --- p2p/net/connmgr/connmgr.go | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index d06440d014..610e3bec1d 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -8,6 +8,7 @@ import ( "time" logging "github.com/ipfs/go-log" + ifconnmgr "github.com/libp2p/go-libp2p-interface-connmgr" inet "github.com/libp2p/go-libp2p-net" peer "github.com/libp2p/go-libp2p-peer" ma "github.com/multiformats/go-multiaddr" @@ -15,14 +16,6 @@ import ( var log = logging.Logger("connmgr") -type ConnManager interface { - TagPeer(peer.ID, string, int) - UntagPeer(peer.ID, string) - GetTagInfo(peer.ID) *TagInfo - TrimOpenConns(context.Context) - Notifee() inet.Notifiee -} - type connManager struct { highWater int lowWater int @@ -37,9 +30,11 @@ type connManager struct { lastTrim time.Time } +var _ ifconnmgr.ConnManager = (*connManager)(nil) + var DefaultGracePeriod = time.Second * 10 -func NewConnManager(low, hi int, grace time.Duration) ConnManager { +func NewConnManager(low, hi int, grace time.Duration) ifconnmgr.ConnManager { return &connManager{ highWater: hi, lowWater: low, @@ -57,13 +52,6 @@ type peerInfo struct { firstSeen time.Time } -type TagInfo struct { - FirstSeen time.Time - Value int - Tags map[string]int - Conns map[string]time.Time -} - func (cm *connManager) TrimOpenConns(ctx context.Context) { for _, c := range cm.getConnsToClose(ctx) { c.Close() @@ -117,7 +105,7 @@ func (cm *connManager) getConnsToClose(ctx context.Context) []io.Closer { return closed } -func (cm *connManager) GetTagInfo(p peer.ID) *TagInfo { +func (cm *connManager) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { cm.lk.Lock() defer cm.lk.Unlock() @@ -126,7 +114,7 @@ func (cm *connManager) GetTagInfo(p peer.ID) *TagInfo { return nil } - out := &TagInfo{ + out := &ifconnmgr.TagInfo{ FirstSeen: pi.firstSeen, Value: pi.value, Tags: make(map[string]int), From ef6047635c8d3af7b1e9d60b124d7880a34cacc4 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Mon, 16 Oct 2017 10:30:52 -0700 Subject: [PATCH 07/58] faster, cleaner connection closing 1. Don't ask the system for the time when checking if we should close each connection (potentially thousands of systemcalls!). 2. Log from outside the lock. 3. Log the event around the entire connection closing operation. 4. Preallocate the slice holding the connections to be closed. --- p2p/net/connmgr/connmgr.go | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 610e3bec1d..4305424400 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -2,7 +2,6 @@ package connmgr import ( "context" - "io" "sort" "sync" "time" @@ -53,20 +52,23 @@ type peerInfo struct { } func (cm *connManager) TrimOpenConns(ctx context.Context) { + defer log.EventBegin(ctx, "connCleanup").Done() for _, c := range cm.getConnsToClose(ctx) { + log.Info("closing conn: ", c.RemotePeer()) + log.Event(ctx, "closeConn", c.RemotePeer()) c.Close() } } -func (cm *connManager) getConnsToClose(ctx context.Context) []io.Closer { +func (cm *connManager) getConnsToClose(ctx context.Context) []inet.Conn { cm.lk.Lock() defer cm.lk.Unlock() - defer log.EventBegin(ctx, "connCleanup").Done() if cm.lowWater == 0 || cm.highWater == 0 { // disabled return nil } - cm.lastTrim = time.Now() + now := time.Now() + cm.lastTrim = now if len(cm.peers) < cm.lowWater { log.Info("open connection count below limit") @@ -83,20 +85,22 @@ func (cm *connManager) getConnsToClose(ctx context.Context) []io.Closer { return infos[i].value < infos[j].value }) - close_count := len(infos) - cm.lowWater - toclose := infos[:close_count] + closeCount := len(infos) - cm.lowWater + toclose := infos[:closeCount] - var closed []io.Closer + // 2x number of peers we're disconnecting from because we may have more + // than one connection per peer. Slightly over allocating isn't an issue + // as this is a very short-lived array. + closed := make([]inet.Conn, 0, len(toclose)*2) for _, inf := range toclose { - if time.Since(inf.firstSeen) < cm.gracePeriod { + // TODO: should we be using firstSeen or the time associated with the connection itself? + if inf.firstSeen.Add(cm.gracePeriod).After(now) { continue } // TODO: if a peer has more than one connection, maybe only close one? - for c, _ := range inf.conns { - log.Info("closing conn: ", c.RemotePeer()) - log.Event(ctx, "closeConn", c.RemotePeer()) + for c := range inf.conns { // TODO: probably don't want to always do this in a goroutine closed = append(closed, c) } From 25616aa1782c1147da53cada8d31ae9dbffd032f Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Mon, 16 Oct 2017 11:41:15 -0700 Subject: [PATCH 08/58] remove the DefaultGracePeriod It's unused. --- p2p/net/connmgr/connmgr.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 4305424400..cd712b1e3d 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -31,8 +31,6 @@ type connManager struct { var _ ifconnmgr.ConnManager = (*connManager)(nil) -var DefaultGracePeriod = time.Second * 10 - func NewConnManager(low, hi int, grace time.Duration) ifconnmgr.ConnManager { return &connManager{ highWater: hi, From d662cced87b74dc27044edad1d5aca3b2352fb49 Mon Sep 17 00:00:00 2001 From: Jeromy Date: Tue, 24 Oct 2017 11:18:46 -0700 Subject: [PATCH 09/58] add a way to expose info from the conn manager --- p2p/net/connmgr/connmgr.go | 47 +++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index cd712b1e3d..3b4aa4a8fb 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -15,7 +15,7 @@ import ( var log = logging.Logger("connmgr") -type connManager struct { +type BasicConnMgr struct { highWater int lowWater int @@ -29,10 +29,10 @@ type connManager struct { lastTrim time.Time } -var _ ifconnmgr.ConnManager = (*connManager)(nil) +var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) -func NewConnManager(low, hi int, grace time.Duration) ifconnmgr.ConnManager { - return &connManager{ +func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { + return &BasicConnMgr{ highWater: hi, lowWater: low, gracePeriod: grace, @@ -49,7 +49,7 @@ type peerInfo struct { firstSeen time.Time } -func (cm *connManager) TrimOpenConns(ctx context.Context) { +func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { defer log.EventBegin(ctx, "connCleanup").Done() for _, c := range cm.getConnsToClose(ctx) { log.Info("closing conn: ", c.RemotePeer()) @@ -58,7 +58,7 @@ func (cm *connManager) TrimOpenConns(ctx context.Context) { } } -func (cm *connManager) getConnsToClose(ctx context.Context) []inet.Conn { +func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { cm.lk.Lock() defer cm.lk.Unlock() if cm.lowWater == 0 || cm.highWater == 0 { @@ -107,7 +107,7 @@ func (cm *connManager) getConnsToClose(ctx context.Context) []inet.Conn { return closed } -func (cm *connManager) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { +func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { cm.lk.Lock() defer cm.lk.Unlock() @@ -133,7 +133,7 @@ func (cm *connManager) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { return out } -func (cm *connManager) TagPeer(p peer.ID, tag string, val int) { +func (cm *BasicConnMgr) TagPeer(p peer.ID, tag string, val int) { cm.lk.Lock() defer cm.lk.Unlock() @@ -147,7 +147,7 @@ func (cm *connManager) TagPeer(p peer.ID, tag string, val int) { pi.tags[tag] = val } -func (cm *connManager) UntagPeer(p peer.ID, tag string) { +func (cm *BasicConnMgr) UntagPeer(p peer.ID, tag string) { cm.lk.Lock() defer cm.lk.Unlock() @@ -161,14 +161,35 @@ func (cm *connManager) UntagPeer(p peer.ID, tag string) { delete(pi.tags, tag) } -func (cm *connManager) Notifee() inet.Notifiee { +type CMInfo struct { + LowWater int + HighWater int + LastTrim time.Time + GracePeriod time.Duration + ConnCount int +} + +func (cm *BasicConnMgr) GetInfo() CMInfo { + cm.lk.Lock() + defer cm.lk.Unlock() + + return CMInfo{ + HighWater: cm.highWater, + LowWater: cm.lowWater, + LastTrim: cm.lastTrim, + GracePeriod: cm.gracePeriod, + ConnCount: cm.connCount, + } +} + +func (cm *BasicConnMgr) Notifee() inet.Notifiee { return (*cmNotifee)(cm) } -type cmNotifee connManager +type cmNotifee BasicConnMgr -func (nn *cmNotifee) cm() *connManager { - return (*connManager)(nn) +func (nn *cmNotifee) cm() *BasicConnMgr { + return (*BasicConnMgr)(nn) } func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { From 88f2b707a88112dd622408325e08fac6abae2b3f Mon Sep 17 00:00:00 2001 From: Jeromy Date: Sat, 28 Oct 2017 06:08:21 -0700 Subject: [PATCH 10/58] demote error message to info --- p2p/net/connmgr/connmgr.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 3b4aa4a8fb..fc406bf8b8 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -139,7 +139,7 @@ func (cm *BasicConnMgr) TagPeer(p peer.ID, tag string, val int) { pi, ok := cm.peers[p] if !ok { - log.Error("tried to tag conn from untracked peer: ", p) + log.Info("tried to tag conn from untracked peer: ", p) return } @@ -153,7 +153,7 @@ func (cm *BasicConnMgr) UntagPeer(p peer.ID, tag string) { pi, ok := cm.peers[p] if !ok { - log.Error("tried to remove tag from untracked peer: ", p) + log.Info("tried to remove tag from untracked peer: ", p) return } From f724bff2f9e66aaf44e9a832c5aea0a6fc72508c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rau=CC=81l=20Kripalani?= Date: Mon, 27 Aug 2018 14:42:43 +0100 Subject: [PATCH 11/58] add docs to BasicConnMgr --- p2p/net/connmgr/connmgr.go | 70 ++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index fc406bf8b8..94eca7d445 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -7,14 +7,21 @@ import ( "time" logging "github.com/ipfs/go-log" - ifconnmgr "github.com/libp2p/go-libp2p-interface-connmgr" + "github.com/libp2p/go-libp2p-interface-connmgr" inet "github.com/libp2p/go-libp2p-net" - peer "github.com/libp2p/go-libp2p-peer" + "github.com/libp2p/go-libp2p-peer" ma "github.com/multiformats/go-multiaddr" ) var log = logging.Logger("connmgr") +// BasicConnMgr is a ConnManager that trims connections whenever the count exceeds the +// high watermark. New connections are given a grace period before they're subject +// to trimming. Trims are automatically run on demand, only if the time from the +// previous trim is higher than 10 seconds. Furthermore, trims can be explicitly +// requested through the public interface of this struct (see TrimOpenConns). + +// See configuration parameters in NewConnManager. type BasicConnMgr struct { highWater int lowWater int @@ -31,6 +38,12 @@ type BasicConnMgr struct { var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) +// NewConnManager creates a new BasicConnMgr with the provided params: +// * lo and hi are watermarks governing the number of connections that'll be maintained. +// When the peer count exceeds the 'high watermark', as many peers will be pruned (and +// their connections terminated) until 'low watermark' peers remain. +// * grace is the amount of time a newly opened connection is given before it becomes +// subject to pruning. func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { return &BasicConnMgr{ highWater: hi, @@ -40,15 +53,20 @@ func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { } } +// peerInfo stores metadata for a given peer. type peerInfo struct { - tags map[string]int - value int + tags map[string]int // value for each tag + value int // cached sum of all tag values - conns map[inet.Conn]time.Time + conns map[inet.Conn]time.Time // start time of each connection - firstSeen time.Time + firstSeen time.Time // timestamp when we began tracking this peer. } +// TrimOpenConns closes the connections of as many peers as needed to make the peer count +// equal the low watermark. Peers are sorted in ascending order based on their total value, +// pruning those peers with the lowest scores first, as long as they are not within their +// grace period. func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { defer log.EventBegin(ctx, "connCleanup").Done() for _, c := range cm.getConnsToClose(ctx) { @@ -58,9 +76,12 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { } } +// getConnsToClose runs the heuristics described in TrimOpenConns and returns the +// connections to close. func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { cm.lk.Lock() defer cm.lk.Unlock() + if cm.lowWater == 0 || cm.highWater == 0 { // disabled return nil @@ -79,6 +100,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { infos = append(infos, inf) } + // Sort peers according to their value. sort.Slice(infos, func(i, j int) bool { return infos[i].value < infos[j].value }) @@ -143,6 +165,7 @@ func (cm *BasicConnMgr) TagPeer(p peer.ID, tag string, val int) { return } + // Update the total value of the peer. pi.value += (val - pi.tags[tag]) pi.tags[tag] = val } @@ -157,18 +180,30 @@ func (cm *BasicConnMgr) UntagPeer(p peer.ID, tag string) { return } + // Update the total value of the peer. pi.value -= pi.tags[tag] delete(pi.tags, tag) } +// CMInfo holds the configuration for BasicConnMgr, as well as status data. type CMInfo struct { - LowWater int - HighWater int - LastTrim time.Time + // The low watermark, as described in NewConnManager. + LowWater int + + // The high watermark, as described in NewConnManager. + HighWater int + + // The timestamp when the last trim was triggered. + LastTrim time.Time + + // The configured grace period, as described in NewConnManager. GracePeriod time.Duration - ConnCount int + + // The current connection count. + ConnCount int } +// GetInfo returns the configuration and status data for this connection manager. func (cm *BasicConnMgr) GetInfo() CMInfo { cm.lk.Lock() defer cm.lk.Unlock() @@ -182,6 +217,9 @@ func (cm *BasicConnMgr) GetInfo() CMInfo { } } +// Notifee returns a sink through which Notifiers can inform the BasicConnMgr when +// events occur. Currently, the notifee only reacts upon connection events +// {Connected, Disconnected}. func (cm *BasicConnMgr) Notifee() inet.Notifiee { return (*cmNotifee)(cm) } @@ -192,6 +230,9 @@ func (nn *cmNotifee) cm() *BasicConnMgr { return (*BasicConnMgr)(nn) } +// Connected is called by notifiers to inform that a new connection has been established. +// The notifee updates the BasicConnMgr to start tracking the connection. If the new connection +// count exceeds the high watermark, a trim may be triggered. func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { cm := nn.cm() @@ -224,6 +265,8 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { } } +// Disconnected is called by notifiers to inform that an existing connection has been closed or terminated. +// The notifee updates the BasicConnMgr accordingly to stop tracking the connection, and performs housekeeping. func (nn *cmNotifee) Disconnected(n inet.Network, c inet.Conn) { cm := nn.cm() @@ -249,7 +292,14 @@ func (nn *cmNotifee) Disconnected(n inet.Network, c inet.Conn) { } } +// Listen is no-op in this implementation. func (nn *cmNotifee) Listen(n inet.Network, addr ma.Multiaddr) {} + +// ListenClose is no-op in this implementation. func (nn *cmNotifee) ListenClose(n inet.Network, addr ma.Multiaddr) {} + +// OpenedStream is no-op in this implementation. func (nn *cmNotifee) OpenedStream(inet.Network, inet.Stream) {} + +// ClosedStream is no-op in this implementation. func (nn *cmNotifee) ClosedStream(inet.Network, inet.Stream) {} From 02481e7afdaa14e659266b9791aaea0b7fd02523 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rau=CC=81l=20Kripalani?= Date: Mon, 27 Aug 2018 15:38:09 +0100 Subject: [PATCH 12/58] gofmt. --- p2p/net/connmgr/connmgr.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 94eca7d445..56ca27ec59 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -293,13 +293,13 @@ func (nn *cmNotifee) Disconnected(n inet.Network, c inet.Conn) { } // Listen is no-op in this implementation. -func (nn *cmNotifee) Listen(n inet.Network, addr ma.Multiaddr) {} +func (nn *cmNotifee) Listen(n inet.Network, addr ma.Multiaddr) {} // ListenClose is no-op in this implementation. func (nn *cmNotifee) ListenClose(n inet.Network, addr ma.Multiaddr) {} // OpenedStream is no-op in this implementation. -func (nn *cmNotifee) OpenedStream(inet.Network, inet.Stream) {} +func (nn *cmNotifee) OpenedStream(inet.Network, inet.Stream) {} // ClosedStream is no-op in this implementation. -func (nn *cmNotifee) ClosedStream(inet.Network, inet.Stream) {} +func (nn *cmNotifee) ClosedStream(inet.Network, inet.Stream) {} From 2ae3cfd7ef9e91999bf9db309da58af7614ebb8b Mon Sep 17 00:00:00 2001 From: James Wetter Date: Mon, 17 Sep 2018 01:50:30 +0900 Subject: [PATCH 13/58] lift test coverage --- p2p/net/connmgr/connmgr_test.go | 229 ++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 8c30e9e5d9..cf11f208db 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -3,10 +3,12 @@ package connmgr import ( "context" "testing" + "time" inet "github.com/libp2p/go-libp2p-net" peer "github.com/libp2p/go-libp2p-peer" tu "github.com/libp2p/go-testutil" + ma "github.com/multiformats/go-multiaddr" ) type tconn struct { @@ -24,6 +26,14 @@ func (c *tconn) RemotePeer() peer.ID { return c.peer } +func (c *tconn) RemoteMultiaddr() ma.Multiaddr { + addr, err := ma.NewMultiaddr("/ip4/127.0.0.1/udp/1234") + if err != nil { + panic("cannot create multiaddr") + } + return addr +} + func randConn(t *testing.T) inet.Conn { pid := tu.RandPeerIDFatal(t) return &tconn{peer: pid} @@ -65,3 +75,222 @@ func TestConnTrimming(t *testing.T) { t.Fatal("conn with bad tag should have gotten closed") } } + +func TestConnsToClose(t *testing.T) { + cm := NewConnManager(0, 10, 0) + conns := cm.getConnsToClose(context.Background()) + if conns != nil { + t.Fatal("expected no connections") + } + + cm = NewConnManager(10, 0, 0) + conns = cm.getConnsToClose(context.Background()) + if conns != nil { + t.Fatal("expected no connections") + } + + cm = NewConnManager(1, 1, 0) + conns = cm.getConnsToClose(context.Background()) + if conns != nil { + t.Fatal("expected no connections") + } + + cm = NewConnManager(1, 1, time.Duration(10*time.Minute)) + not := cm.Notifee() + for i := 0; i < 5; i++ { + conn := randConn(t) + not.Connected(nil, conn) + } + conns = cm.getConnsToClose(context.Background()) + if len(conns) != 0 { + t.Fatal("expected no connections") + } +} + +func TestGetTagInfo(t *testing.T) { + start := time.Now() + cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + not := cm.Notifee() + conn := randConn(t) + not.Connected(nil, conn) + end := time.Now() + + other := tu.RandPeerIDFatal(t) + tag := cm.GetTagInfo(other) + if tag != nil { + t.Fatal("expected no tag") + } + + tag = cm.GetTagInfo(conn.RemotePeer()) + if tag == nil { + t.Fatal("expected tag") + } + if tag.FirstSeen.Before(start) || tag.FirstSeen.After(end) { + t.Fatal("expected first seen time") + } + if tag.Value != 0 { + t.Fatal("expected zero value") + } + if len(tag.Tags) != 0 { + t.Fatal("expected no tags") + } + if len(tag.Conns) != 1 { + t.Fatal("expected one connection") + } + for s, tm := range tag.Conns { + if s != conn.RemoteMultiaddr().String() { + t.Fatal("unexpected multiaddr") + } + if tm.Before(start) || tm.After(end) { + t.Fatal("unexpected connection time") + } + } + + cm.TagPeer(conn.RemotePeer(), "tag", 5) + tag = cm.GetTagInfo(conn.RemotePeer()) + if tag == nil { + t.Fatal("expected tag") + } + if tag.FirstSeen.Before(start) || tag.FirstSeen.After(end) { + t.Fatal("expected first seen time") + } + if tag.Value != 5 { + t.Fatal("expected five value") + } + if len(tag.Tags) != 1 { + t.Fatal("expected no tags") + } + for tString, v := range tag.Tags { + if tString != "tag" || v != 5 { + t.Fatal("expected tag value") + } + } + if len(tag.Conns) != 1 { + t.Fatal("expected one connection") + } + for s, tm := range tag.Conns { + if s != conn.RemoteMultiaddr().String() { + t.Fatal("unexpected multiaddr") + } + if tm.Before(start) || tm.After(end) { + t.Fatal("unexpected connection time") + } + } +} + +func TestTagPeerNonExistant(t *testing.T) { + cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + + id := tu.RandPeerIDFatal(t) + cm.TagPeer(id, "test", 1) + + if len(cm.peers) != 0 { + t.Fatal("expected zero peers") + } +} + +func TestUntagPeer(t *testing.T) { + cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + not := cm.Notifee() + conn := randConn(t) + not.Connected(nil, conn) + rp := conn.RemotePeer() + cm.TagPeer(rp, "tag", 5) + cm.TagPeer(rp, "tag two", 5) + + id := tu.RandPeerIDFatal(t) + cm.UntagPeer(id, "test") + if len(cm.peers[rp].tags) != 2 { + t.Fatal("expected tags to be uneffected") + } + + cm.UntagPeer(conn.RemotePeer(), "test") + if len(cm.peers[rp].tags) != 2 { + t.Fatal("expected tags to be uneffected") + } + + cm.UntagPeer(conn.RemotePeer(), "tag") + if len(cm.peers[rp].tags) != 1 { + t.Fatal("expected tag to be removed") + } + if cm.peers[rp].value != 5 { + t.Fatal("expected aggreagte tag value to be 5") + } +} + +func TestGetInfo(t *testing.T) { + start := time.Now() + gp := time.Duration(10 * time.Minute) + cm := NewConnManager(1, 5, gp) + not := cm.Notifee() + conn := randConn(t) + not.Connected(nil, conn) + cm.TrimOpenConns(context.Background()) + end := time.Now() + + info := cm.GetInfo() + if info.HighWater != 5 { + t.Fatal("expected highwater to be 5") + } + if info.LowWater != 1 { + t.Fatal("expected highwater to be 1") + } + if info.LastTrim.Before(start) || info.LastTrim.After(end) { + t.Fatal("unexpected last trim time") + } + if info.GracePeriod != gp { + t.Fatal("unexpected grace period") + } + if info.ConnCount != 1 { + t.Fatal("unexpected number of connections") + } +} + +func TestDoubleConnection(t *testing.T) { + gp := time.Duration(10 * time.Minute) + cm := NewConnManager(1, 5, gp) + not := cm.Notifee() + conn := randConn(t) + not.Connected(nil, conn) + cm.TagPeer(conn.RemotePeer(), "foo", 10) + not.Connected(nil, conn) + if cm.connCount != 1 { + t.Fatal("unexpected number of connections") + } + if cm.peers[conn.RemotePeer()].value != 10 { + t.Fatal("unexpected peer value") + } +} + +func TestDisconnected(t *testing.T) { + gp := time.Duration(10 * time.Minute) + cm := NewConnManager(1, 5, gp) + not := cm.Notifee() + conn := randConn(t) + not.Connected(nil, conn) + cm.TagPeer(conn.RemotePeer(), "foo", 10) + + not.Disconnected(nil, randConn(t)) + if cm.connCount != 1 { + t.Fatal("unexpected number of connections") + } + if cm.peers[conn.RemotePeer()].value != 10 { + t.Fatal("unexpected peer value") + } + + not.Disconnected(nil, &tconn{peer: conn.RemotePeer()}) + if cm.connCount != 1 { + t.Fatal("unexpected number of connections") + } + if cm.peers[conn.RemotePeer()].value != 10 { + t.Fatal("unexpected peer value") + } + + not.Disconnected(nil, conn) + if cm.connCount != 0 { + t.Fatal("unexpected number of connections") + } + if len(cm.peers) != 0 { + t.Fatal("unexpected number of peers") + } +} From 2e2a793e30d87e75cb76f09c51fe14f1e8452caa Mon Sep 17 00:00:00 2001 From: James Wetter Date: Mon, 17 Sep 2018 01:56:46 +0900 Subject: [PATCH 14/58] add comments for tag functions to silence golint --- p2p/net/connmgr/connmgr.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 56ca27ec59..8e7d5aaafb 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -20,7 +20,7 @@ var log = logging.Logger("connmgr") // to trimming. Trims are automatically run on demand, only if the time from the // previous trim is higher than 10 seconds. Furthermore, trims can be explicitly // requested through the public interface of this struct (see TrimOpenConns). - +// // See configuration parameters in NewConnManager. type BasicConnMgr struct { highWater int @@ -129,6 +129,8 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { return closed } +// GetTagInfo is called to fetch the tag information associated with a given +// peer, nil is returned if p refers to an unknown peer. func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { cm.lk.Lock() defer cm.lk.Unlock() @@ -155,6 +157,7 @@ func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { return out } +// TagPeer is called to associate a string and integer with a given peer. func (cm *BasicConnMgr) TagPeer(p peer.ID, tag string, val int) { cm.lk.Lock() defer cm.lk.Unlock() @@ -170,6 +173,7 @@ func (cm *BasicConnMgr) TagPeer(p peer.ID, tag string, val int) { pi.tags[tag] = val } +// UntagPeer is called to disassociate a string and integer from a given peer. func (cm *BasicConnMgr) UntagPeer(p peer.ID, tag string) { cm.lk.Lock() defer cm.lk.Unlock() From 55a490b4ad12846bd710083d047c20a7a5597e40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Wed, 12 Dec 2018 18:30:02 +0000 Subject: [PATCH 15/58] fix silence period between trims not being honored. --- p2p/net/connmgr/connmgr.go | 39 +++++++++++++-------- p2p/net/connmgr/connmgr_test.go | 61 ++++++++++++++++++++++++++------- 2 files changed, 74 insertions(+), 26 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 8e7d5aaafb..af88319cc0 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -4,6 +4,7 @@ import ( "context" "sort" "sync" + "sync/atomic" "time" logging "github.com/ipfs/go-log" @@ -13,6 +14,8 @@ import ( ma "github.com/multiformats/go-multiaddr" ) +const silencePeriod = 10 * time.Second + var log = logging.Logger("connmgr") // BasicConnMgr is a ConnManager that trims connections whenever the count exceeds the @@ -23,17 +26,16 @@ var log = logging.Logger("connmgr") // // See configuration parameters in NewConnManager. type BasicConnMgr struct { - highWater int - lowWater int - + lk sync.Mutex + highWater int + lowWater int + connCount int gracePeriod time.Duration + peers map[peer.ID]*peerInfo - peers map[peer.ID]*peerInfo - connCount int - - lk sync.Mutex - - lastTrim time.Time + // guarded by atomic; 0=not running, 1=running. + trimRunning int32 + lastTrim time.Time } var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) @@ -68,12 +70,25 @@ type peerInfo struct { // pruning those peers with the lowest scores first, as long as they are not within their // grace period. func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { + if !atomic.CompareAndSwapInt32(&cm.trimRunning, 0, 1) { + // a trim is running, skip this one. + return + } + + defer atomic.StoreInt32(&cm.trimRunning, 0) + if time.Since(cm.lastTrim) < silencePeriod { + // skip this attempt to trim as the last one just took place. + return + } + defer log.EventBegin(ctx, "connCleanup").Done() for _, c := range cm.getConnsToClose(ctx) { log.Info("closing conn: ", c.RemotePeer()) log.Event(ctx, "closeConn", c.RemotePeer()) c.Close() } + + cm.lastTrim = time.Now() } // getConnsToClose runs the heuristics described in TrimOpenConns and returns the @@ -87,8 +102,6 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { return nil } now := time.Now() - cm.lastTrim = now - if len(cm.peers) < cm.lowWater { log.Info("open connection count below limit") return nil @@ -263,9 +276,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { cm.connCount++ if cm.connCount > nn.highWater { - if cm.lastTrim.IsZero() || time.Since(cm.lastTrim) > time.Second*10 { - go cm.TrimOpenConns(context.Background()) - } + go cm.TrimOpenConns(context.Background()) } } diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index cf11f208db..90285c5955 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -13,12 +13,17 @@ import ( type tconn struct { inet.Conn - peer peer.ID - closed bool + + peer peer.ID + closed bool + disconnectNotify func(net inet.Network, conn inet.Conn) } func (c *tconn) Close() error { c.closed = true + if c.disconnectNotify != nil { + c.disconnectNotify(nil, c) + } return nil } @@ -34,9 +39,9 @@ func (c *tconn) RemoteMultiaddr() ma.Multiaddr { return addr } -func randConn(t *testing.T) inet.Conn { +func randConn(t *testing.T, discNotify func(inet.Network, inet.Conn)) inet.Conn { pid := tu.RandPeerIDFatal(t) - return &tconn{peer: pid} + return &tconn{peer: pid, disconnectNotify: discNotify} } func TestConnTrimming(t *testing.T) { @@ -45,7 +50,7 @@ func TestConnTrimming(t *testing.T) { var conns []inet.Conn for i := 0; i < 300; i++ { - rc := randConn(t) + rc := randConn(t, nil) conns = append(conns, rc) not.Connected(nil, rc) } @@ -98,7 +103,7 @@ func TestConnsToClose(t *testing.T) { cm = NewConnManager(1, 1, time.Duration(10*time.Minute)) not := cm.Notifee() for i := 0; i < 5; i++ { - conn := randConn(t) + conn := randConn(t, nil) not.Connected(nil, conn) } conns = cm.getConnsToClose(context.Background()) @@ -111,7 +116,7 @@ func TestGetTagInfo(t *testing.T) { start := time.Now() cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) not := cm.Notifee() - conn := randConn(t) + conn := randConn(t, nil) not.Connected(nil, conn) end := time.Now() @@ -192,7 +197,7 @@ func TestTagPeerNonExistant(t *testing.T) { func TestUntagPeer(t *testing.T) { cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) not := cm.Notifee() - conn := randConn(t) + conn := randConn(t, nil) not.Connected(nil, conn) rp := conn.RemotePeer() cm.TagPeer(rp, "tag", 5) @@ -223,7 +228,7 @@ func TestGetInfo(t *testing.T) { gp := time.Duration(10 * time.Minute) cm := NewConnManager(1, 5, gp) not := cm.Notifee() - conn := randConn(t) + conn := randConn(t, nil) not.Connected(nil, conn) cm.TrimOpenConns(context.Background()) end := time.Now() @@ -250,7 +255,7 @@ func TestDoubleConnection(t *testing.T) { gp := time.Duration(10 * time.Minute) cm := NewConnManager(1, 5, gp) not := cm.Notifee() - conn := randConn(t) + conn := randConn(t, nil) not.Connected(nil, conn) cm.TagPeer(conn.RemotePeer(), "foo", 10) not.Connected(nil, conn) @@ -266,11 +271,11 @@ func TestDisconnected(t *testing.T) { gp := time.Duration(10 * time.Minute) cm := NewConnManager(1, 5, gp) not := cm.Notifee() - conn := randConn(t) + conn := randConn(t, nil) not.Connected(nil, conn) cm.TagPeer(conn.RemotePeer(), "foo", 10) - not.Disconnected(nil, randConn(t)) + not.Disconnected(nil, randConn(t, nil)) if cm.connCount != 1 { t.Fatal("unexpected number of connections") } @@ -294,3 +299,35 @@ func TestDisconnected(t *testing.T) { t.Fatal("unexpected number of peers") } } + +// see https://github.com/libp2p/go-libp2p-connmgr/issues/23 +func TestQuickBurstRespectsSilencePeriod(t *testing.T) { + cm := NewConnManager(10, 20, 0) + not := cm.Notifee() + + var conns []inet.Conn + + // quickly produce 30 connections (sending us above the high watermark) + for i := 0; i < 30; i++ { + rc := randConn(t, not.Disconnected) + conns = append(conns, rc) + not.Connected(nil, rc) + } + + // wait for a few seconds + time.Sleep(time.Second * 3) + + // only the first trim is allowed in; make sure we close at most 20 connections, not all of them. + var closed int + for _, c := range conns { + if c.(*tconn).closed { + closed++ + } + } + if closed > 20 { + t.Fatalf("should have closed at most 20 connections, closed: %d", closed) + } + if total := closed + cm.connCount; total != 30 { + t.Fatalf("expected closed connections + open conn count to equal 30, value: %d", total) + } +} From 97f80a6ca59e929383849085a8822d6898c76f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 13 Dec 2018 12:58:51 +0000 Subject: [PATCH 16/58] switch to channel-based semaphore to ensure one trim in progress. --- p2p/net/connmgr/connmgr.go | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index af88319cc0..626fe76175 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -4,13 +4,12 @@ import ( "context" "sort" "sync" - "sync/atomic" "time" logging "github.com/ipfs/go-log" - "github.com/libp2p/go-libp2p-interface-connmgr" + ifconnmgr "github.com/libp2p/go-libp2p-interface-connmgr" inet "github.com/libp2p/go-libp2p-net" - "github.com/libp2p/go-libp2p-peer" + peer "github.com/libp2p/go-libp2p-peer" ma "github.com/multiformats/go-multiaddr" ) @@ -33,9 +32,9 @@ type BasicConnMgr struct { gracePeriod time.Duration peers map[peer.ID]*peerInfo - // guarded by atomic; 0=not running, 1=running. - trimRunning int32 - lastTrim time.Time + // channel-based semaphore that enforces only a single trim is in progress + trimRunningCh chan struct{} + lastTrim time.Time } var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) @@ -48,10 +47,11 @@ var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) // subject to pruning. func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { return &BasicConnMgr{ - highWater: hi, - lowWater: low, - gracePeriod: grace, - peers: make(map[peer.ID]*peerInfo), + highWater: hi, + lowWater: low, + gracePeriod: grace, + peers: make(map[peer.ID]*peerInfo), + trimRunningCh: make(chan struct{}, 1), } } @@ -69,25 +69,26 @@ type peerInfo struct { // equal the low watermark. Peers are sorted in ascending order based on their total value, // pruning those peers with the lowest scores first, as long as they are not within their // grace period. +// +// TODO: error return value so we can cleanly signal we are aborting because: +// (a) there's another trim in progress, or (b) the silence period is in effect. func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { - if !atomic.CompareAndSwapInt32(&cm.trimRunning, 0, 1) { - // a trim is running, skip this one. + select { + case cm.trimRunningCh <- struct{}{}: + default: return } - - defer atomic.StoreInt32(&cm.trimRunning, 0) + defer func() { <-cm.trimRunningCh }() if time.Since(cm.lastTrim) < silencePeriod { // skip this attempt to trim as the last one just took place. return } - defer log.EventBegin(ctx, "connCleanup").Done() for _, c := range cm.getConnsToClose(ctx) { log.Info("closing conn: ", c.RemotePeer()) log.Event(ctx, "closeConn", c.RemotePeer()) c.Close() } - cm.lastTrim = time.Now() } From 3f15992cdf63710fe15005bd7bfcce7dcfad0ba6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Wed, 27 Mar 2019 23:18:02 +0000 Subject: [PATCH 17/58] add peer protection capability to conn mgr. Users can call Protect() to save a peer from pruning, and Unprotect() to remove the protection. Protected peers don't count towards the target quantity to prune during a connection manager cycle. --- p2p/net/connmgr/connmgr.go | 30 ++++++++++++++-- p2p/net/connmgr/connmgr_test.go | 61 +++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 3 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 626fe76175..15adec9de7 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -13,7 +13,7 @@ import ( ma "github.com/multiformats/go-multiaddr" ) -const silencePeriod = 10 * time.Second +var SilencePeriod = 10 * time.Second var log = logging.Logger("connmgr") @@ -32,9 +32,13 @@ type BasicConnMgr struct { gracePeriod time.Duration peers map[peer.ID]*peerInfo + plk sync.RWMutex + protected map[peer.ID]struct{} + // channel-based semaphore that enforces only a single trim is in progress trimRunningCh chan struct{} lastTrim time.Time + silencePeriod time.Duration } var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) @@ -52,9 +56,23 @@ func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { gracePeriod: grace, peers: make(map[peer.ID]*peerInfo), trimRunningCh: make(chan struct{}, 1), + protected: make(map[peer.ID]struct{}, 16), + silencePeriod: SilencePeriod, } } +func (cm *BasicConnMgr) Protect(id peer.ID) { + cm.plk.Lock() + defer cm.plk.Unlock() + cm.protected[id] = struct{}{} +} + +func (cm *BasicConnMgr) Unprotect(id peer.ID) { + cm.plk.Lock() + defer cm.plk.Unlock() + delete(cm.protected, id) +} + // peerInfo stores metadata for a given peer. type peerInfo struct { tags map[string]int // value for each tag @@ -79,7 +97,7 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { return } defer func() { <-cm.trimRunningCh }() - if time.Since(cm.lastTrim) < silencePeriod { + if time.Since(cm.lastTrim) < cm.silencePeriod { // skip this attempt to trim as the last one just took place. return } @@ -110,9 +128,15 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { var infos []*peerInfo - for _, inf := range cm.peers { + cm.plk.RLock() + for id, inf := range cm.peers { + if _, ok := cm.protected[id]; ok { + // skip protected peer; it's not eligible for pruning. + continue + } infos = append(infos, inf) } + cm.plk.RUnlock() // Sort peers according to their value. sort.Slice(infos, func(i, j int) bool { diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 90285c5955..0e1e323c2d 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -331,3 +331,64 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { t.Fatalf("expected closed connections + open conn count to equal 30, value: %d", total) } } + +func TestPeerProtection(t *testing.T) { + SilencePeriod = 0 + cm := NewConnManager(10, 20, 0) + SilencePeriod = 10 * time.Second + + not := cm.Notifee() + + // produce 20 connections with unique peers. + var conns []inet.Conn + for i := 0; i < 20; i++ { + rc := randConn(t, not.Disconnected) + conns = append(conns, rc) + not.Connected(nil, rc) + cm.TagPeer(rc.RemotePeer(), "test", 20) + } + + // protect the first 5 peers. + var protected []inet.Conn + for _, c := range conns[0:5] { + cm.Protect(c.RemotePeer()) + protected = append(protected, c) + // remove the tag to make them even more eligible for pruning. + cm.UntagPeer(c.RemotePeer(), "test") + } + + // add one more connection, sending the connection manager overboard. + not.Connected(nil, randConn(t, not.Disconnected)) + + // the pruning happens in the background -- this timing condition is not good. + time.Sleep(1 * time.Second) + + for _, c := range protected { + if c.(*tconn).closed { + t.Error("protected connection was closed by connection manager") + } + } + + // unprotect the first peer. + cm.Unprotect(protected[0].RemotePeer()) + + // add 11 more connections, sending the connection manager overboard again. + for i := 0; i < 11; i++ { + rc := randConn(t, not.Disconnected) + conns = append(conns, rc) + not.Connected(nil, rc) + cm.TagPeer(rc.RemotePeer(), "test", 20) + } + + // the pruning happens in the background -- this timing condition is not good. + time.Sleep(1 * time.Second) + + if !protected[0].(*tconn).closed { + t.Error("unprotected connection was kept open by connection manager") + } + for _, c := range protected[1:] { + if c.(*tconn).closed { + t.Error("protected connection was closed by connection manager") + } + } +} From c19c1f34cc1d7511cd6057eb201c654968a39005 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Fri, 29 Mar 2019 13:57:36 +0000 Subject: [PATCH 18/58] scope protections by tag to avoid cross-interference. Also slightly reworked the loop that selects candidates to prune to make it better behaved. Before, if we needed to prune N connections, we preselected N and ended up doing nothing if they happened to be in the grace period. Now we skip over them and prune ungraced connections until we meet our target. --- p2p/net/connmgr/connmgr.go | 57 ++++++++----- p2p/net/connmgr/connmgr_test.go | 145 ++++++++++++++++++++++++++++++-- 2 files changed, 174 insertions(+), 28 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 15adec9de7..2629dffdf0 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -33,7 +33,7 @@ type BasicConnMgr struct { peers map[peer.ID]*peerInfo plk sync.RWMutex - protected map[peer.ID]struct{} + protected map[peer.ID]map[string]struct{} // channel-based semaphore that enforces only a single trim is in progress trimRunningCh chan struct{} @@ -56,25 +56,41 @@ func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { gracePeriod: grace, peers: make(map[peer.ID]*peerInfo), trimRunningCh: make(chan struct{}, 1), - protected: make(map[peer.ID]struct{}, 16), + protected: make(map[peer.ID]map[string]struct{}, 16), silencePeriod: SilencePeriod, } } -func (cm *BasicConnMgr) Protect(id peer.ID) { +func (cm *BasicConnMgr) Protect(id peer.ID, tag string) { cm.plk.Lock() defer cm.plk.Unlock() - cm.protected[id] = struct{}{} + + tags, ok := cm.protected[id] + if !ok { + tags = make(map[string]struct{}, 2) + cm.protected[id] = tags + } + tags[tag] = struct{}{} } -func (cm *BasicConnMgr) Unprotect(id peer.ID) { +func (cm *BasicConnMgr) Unprotect(id peer.ID, tag string) (protected bool) { cm.plk.Lock() defer cm.plk.Unlock() - delete(cm.protected, id) + + tags, ok := cm.protected[id] + if !ok { + return false + } + if delete(tags, tag); len(tags) == 0 { + delete(cm.protected, id) + return false + } + return true } // peerInfo stores metadata for a given peer. type peerInfo struct { + id peer.ID tags map[string]int // value for each tag value int // cached sum of all tag values @@ -126,45 +142,46 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { return nil } - var infos []*peerInfo - + var candidates []*peerInfo cm.plk.RLock() for id, inf := range cm.peers { if _, ok := cm.protected[id]; ok { - // skip protected peer; it's not eligible for pruning. + // skip over protected peer. continue } - infos = append(infos, inf) + candidates = append(candidates, inf) } cm.plk.RUnlock() // Sort peers according to their value. - sort.Slice(infos, func(i, j int) bool { - return infos[i].value < infos[j].value + sort.Slice(candidates, func(i, j int) bool { + return candidates[i].value < candidates[j].value }) - closeCount := len(infos) - cm.lowWater - toclose := infos[:closeCount] + target := len(cm.peers) - cm.lowWater // 2x number of peers we're disconnecting from because we may have more // than one connection per peer. Slightly over allocating isn't an issue // as this is a very short-lived array. - closed := make([]inet.Conn, 0, len(toclose)*2) + selected := make([]inet.Conn, 0, target*2) - for _, inf := range toclose { + for _, inf := range candidates { // TODO: should we be using firstSeen or the time associated with the connection itself? if inf.firstSeen.Add(cm.gracePeriod).After(now) { continue } - // TODO: if a peer has more than one connection, maybe only close one? for c := range inf.conns { - // TODO: probably don't want to always do this in a goroutine - closed = append(closed, c) + selected = append(selected, c) + } + + target-- + if target == 0 { + break } } - return closed + return selected } // GetTagInfo is called to fetch the tag information associated with a given diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 0e1e323c2d..a5057612ae 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -317,6 +317,9 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { // wait for a few seconds time.Sleep(time.Second * 3) + cm.lk.Lock() // pacify the race detector + defer cm.lk.Unlock() + // only the first trim is allowed in; make sure we close at most 20 connections, not all of them. var closed int for _, c := range conns { @@ -332,9 +335,9 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { } } -func TestPeerProtection(t *testing.T) { +func TestPeerProtectionSingleTag(t *testing.T) { SilencePeriod = 0 - cm := NewConnManager(10, 20, 0) + cm := NewConnManager(19, 20, 0) SilencePeriod = 10 * time.Second not := cm.Notifee() @@ -351,10 +354,10 @@ func TestPeerProtection(t *testing.T) { // protect the first 5 peers. var protected []inet.Conn for _, c := range conns[0:5] { - cm.Protect(c.RemotePeer()) + cm.Protect(c.RemotePeer(), "global") protected = append(protected, c) - // remove the tag to make them even more eligible for pruning. - cm.UntagPeer(c.RemotePeer(), "test") + // tag them negatively to make them preferred for pruning. + cm.TagPeer(c.RemotePeer(), "test", -100) } // add one more connection, sending the connection manager overboard. @@ -370,10 +373,79 @@ func TestPeerProtection(t *testing.T) { } // unprotect the first peer. - cm.Unprotect(protected[0].RemotePeer()) + cm.Unprotect(protected[0].RemotePeer(), "global") + + // add 2 more connections, sending the connection manager overboard again. + for i := 0; i < 2; i++ { + rc := randConn(t, not.Disconnected) + conns = append(conns, rc) + not.Connected(nil, rc) + cm.TagPeer(rc.RemotePeer(), "test", 20) + } + + // the pruning happens in the background -- this timing condition is not good. + time.Sleep(1 * time.Second) + + cm.lk.Lock() // pacify the race detector + defer cm.lk.Unlock() + + if !protected[0].(*tconn).closed { + t.Error("unprotected connection was kept open by connection manager") + } + for _, c := range protected[1:] { + if c.(*tconn).closed { + t.Error("protected connection was closed by connection manager") + } + } +} + +func TestPeerProtectionMultipleTags(t *testing.T) { + SilencePeriod = 0 + cm := NewConnManager(19, 20, 0) + SilencePeriod = 10 * time.Second + + not := cm.Notifee() + + // produce 20 connections with unique peers. + var conns []inet.Conn + for i := 0; i < 20; i++ { + rc := randConn(t, not.Disconnected) + conns = append(conns, rc) + not.Connected(nil, rc) + cm.TagPeer(rc.RemotePeer(), "test", 20) + } + + // protect the first 5 peers under two tags. + var protected []inet.Conn + for _, c := range conns[0:5] { + cm.Protect(c.RemotePeer(), "tag1") + cm.Protect(c.RemotePeer(), "tag2") + protected = append(protected, c) + // tag them negatively to make them preferred for pruning. + cm.TagPeer(c.RemotePeer(), "test", -100) + } + + // add one more connection, sending the connection manager overboard. + not.Connected(nil, randConn(t, not.Disconnected)) + + // the pruning happens in the background -- this timing condition is not good. + time.Sleep(1 * time.Second) + + for _, c := range protected { + if c.(*tconn).closed { + t.Error("protected connection was closed by connection manager") + } + } + + // remove the protection from one tag. + for _, c := range protected { + if !cm.Unprotect(c.RemotePeer(), "tag1") { + t.Error("peer should still be protected") + } + } - // add 11 more connections, sending the connection manager overboard again. - for i := 0; i < 11; i++ { + // add 2 more connections, sending the connection manager overboard again. + for i := 0; i < 2; i++ { rc := randConn(t, not.Disconnected) conns = append(conns, rc) not.Connected(nil, rc) @@ -383,6 +455,30 @@ func TestPeerProtection(t *testing.T) { // the pruning happens in the background -- this timing condition is not good. time.Sleep(1 * time.Second) + // connections should still remain open, as they were protected. + for _, c := range protected[0:] { + if c.(*tconn).closed { + t.Error("protected connection was closed by connection manager") + } + } + + // unprotect the first peer entirely. + cm.Unprotect(protected[0].RemotePeer(), "tag2") + + // add 2 more connections, sending the connection manager overboard again. + for i := 0; i < 2; i++ { + rc := randConn(t, not.Disconnected) + conns = append(conns, rc) + not.Connected(nil, rc) + cm.TagPeer(rc.RemotePeer(), "test", 20) + } + + // the pruning happens in the background -- this timing condition is not good. + time.Sleep(1 * time.Second) + + cm.lk.Lock() // pacify the race detector + defer cm.lk.Unlock() + if !protected[0].(*tconn).closed { t.Error("unprotected connection was kept open by connection manager") } @@ -391,4 +487,37 @@ func TestPeerProtection(t *testing.T) { t.Error("protected connection was closed by connection manager") } } + +} + +func TestPeerProtectionIdempotent(t *testing.T) { + SilencePeriod = 0 + cm := NewConnManager(10, 20, 0) + SilencePeriod = 10 * time.Second + + id, _ := tu.RandPeerID() + cm.Protect(id, "global") + cm.Protect(id, "global") + cm.Protect(id, "global") + cm.Protect(id, "global") + + if len(cm.protected[id]) > 1 { + t.Error("expected peer to be protected only once") + } + + if !cm.Unprotect(id, "unused") { + t.Error("expected peer to continue to be protected") + } + + if !cm.Unprotect(id, "unused2") { + t.Error("expected peer to continue to be protected") + } + + if cm.Unprotect(id, "global") { + t.Error("expected peer to be unprotected") + } + + if len(cm.protected) > 0 { + t.Error("expected no protections") + } } From 08cdc1931373be83bf942046f39b59ef640d8463 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 6 May 2019 14:01:32 +0300 Subject: [PATCH 19/58] implement UpsertTag --- p2p/net/connmgr/connmgr.go | 17 +++++++++++++++++ p2p/net/connmgr/connmgr_test.go | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 2629dffdf0..cd2173e6c6 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -244,6 +244,23 @@ func (cm *BasicConnMgr) UntagPeer(p peer.ID, tag string) { delete(pi.tags, tag) } +// UpsertTag is called to insert/update a peer tag +func (cm *BasicConnMgr) UpsertTag(p peer.ID, tag string, upsert func(int) int) { + cm.lk.Lock() + defer cm.lk.Unlock() + + pi, ok := cm.peers[p] + if !ok { + log.Info("tried to upsert tag from untracked peer: ", p) + return + } + + oldval := pi.tags[tag] + newval := upsert(oldval) + pi.value += (newval - oldval) + pi.tags[tag] = newval +} + // CMInfo holds the configuration for BasicConnMgr, as well as status data. type CMInfo struct { // The low watermark, as described in NewConnManager. diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index a5057612ae..b8ebba7e8d 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -521,3 +521,35 @@ func TestPeerProtectionIdempotent(t *testing.T) { t.Error("expected no protections") } } + +func TestUpsertTag(t *testing.T) { + cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + not := cm.Notifee() + conn := randConn(t, nil) + not.Connected(nil, conn) + rp := conn.RemotePeer() + + cm.UpsertTag(rp, "tag", func(v int) int { return v + 1 }) + if len(cm.peers[rp].tags) != 1 { + t.Fatal("expected a tag") + } + if cm.peers[rp].value != 1 { + t.Fatal("expected a tag value of 1") + } + + cm.UpsertTag(rp, "tag", func(v int) int { return v + 1 }) + if len(cm.peers[rp].tags) != 1 { + t.Fatal("expected a tag") + } + if cm.peers[rp].value != 2 { + t.Fatal("expected a tag value of 2") + } + + cm.UpsertTag(rp, "tag", func(v int) int { return v - 1 }) + if len(cm.peers[rp].tags) != 1 { + t.Fatal("expected a tag") + } + if cm.peers[rp].value != 1 { + t.Fatal("expected a tag value of 1") + } +} From dbd00cdb069f69e862fc8177c8b005b390bcaa9a Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 10 May 2019 13:53:52 +0300 Subject: [PATCH 20/58] implement segmented lock to eliminate big lock contention --- p2p/net/connmgr/connmgr.go | 123 ++++++++++++++++++++++++------------- 1 file changed, 79 insertions(+), 44 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index cd2173e6c6..3010e30579 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -4,6 +4,7 @@ import ( "context" "sort" "sync" + "sync/atomic" "time" logging "github.com/ipfs/go-log" @@ -25,12 +26,11 @@ var log = logging.Logger("connmgr") // // See configuration parameters in NewConnManager. type BasicConnMgr struct { - lk sync.Mutex highWater int lowWater int - connCount int + connCount int32 gracePeriod time.Duration - peers map[peer.ID]*peerInfo + segments segments plk sync.RWMutex protected map[peer.ID]map[string]struct{} @@ -43,6 +43,27 @@ type BasicConnMgr struct { var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) +type segment struct { + sync.Mutex + peers map[peer.ID]*peerInfo +} + +type segments [256]*segment + +func (s *segments) get(id peer.ID) *segment { + b := []byte(id) + return s[b[len(b)-1]] +} + +func (s *segments) countPeers() (count int) { + for _, seg := range s { + seg.Lock() + count += len(seg.peers) + seg.Unlock() + } + return count +} + // NewConnManager creates a new BasicConnMgr with the provided params: // * lo and hi are watermarks governing the number of connections that'll be maintained. // When the peer count exceeds the 'high watermark', as many peers will be pruned (and @@ -54,10 +75,17 @@ func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { highWater: hi, lowWater: low, gracePeriod: grace, - peers: make(map[peer.ID]*peerInfo), trimRunningCh: make(chan struct{}, 1), protected: make(map[peer.ID]map[string]struct{}, 16), silencePeriod: SilencePeriod, + segments: func() (ret segments) { + for i := range ret { + ret[i] = &segment{ + peers: make(map[peer.ID]*peerInfo), + } + } + return ret + }(), } } @@ -129,27 +157,29 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { // getConnsToClose runs the heuristics described in TrimOpenConns and returns the // connections to close. func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { - cm.lk.Lock() - defer cm.lk.Unlock() - if cm.lowWater == 0 || cm.highWater == 0 { // disabled return nil } now := time.Now() - if len(cm.peers) < cm.lowWater { + npeers := cm.segments.countPeers() + if npeers < cm.lowWater { log.Info("open connection count below limit") return nil } var candidates []*peerInfo cm.plk.RLock() - for id, inf := range cm.peers { - if _, ok := cm.protected[id]; ok { - // skip over protected peer. - continue + for _, s := range cm.segments { + s.Lock() + for id, inf := range s.peers { + if _, ok := cm.protected[id]; ok { + // skip over protected peer. + continue + } + candidates = append(candidates, inf) } - candidates = append(candidates, inf) + s.Unlock() } cm.plk.RUnlock() @@ -158,7 +188,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { return candidates[i].value < candidates[j].value }) - target := len(cm.peers) - cm.lowWater + target := npeers - cm.lowWater // 2x number of peers we're disconnecting from because we may have more // than one connection per peer. Slightly over allocating isn't an issue @@ -187,10 +217,11 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { // GetTagInfo is called to fetch the tag information associated with a given // peer, nil is returned if p refers to an unknown peer. func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { - cm.lk.Lock() - defer cm.lk.Unlock() + s := cm.segments.get(p) + s.Lock() + defer s.Unlock() - pi, ok := cm.peers[p] + pi, ok := s.peers[p] if !ok { return nil } @@ -214,10 +245,11 @@ func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { // TagPeer is called to associate a string and integer with a given peer. func (cm *BasicConnMgr) TagPeer(p peer.ID, tag string, val int) { - cm.lk.Lock() - defer cm.lk.Unlock() + s := cm.segments.get(p) + s.Lock() + defer s.Unlock() - pi, ok := cm.peers[p] + pi, ok := s.peers[p] if !ok { log.Info("tried to tag conn from untracked peer: ", p) return @@ -230,10 +262,11 @@ func (cm *BasicConnMgr) TagPeer(p peer.ID, tag string, val int) { // UntagPeer is called to disassociate a string and integer from a given peer. func (cm *BasicConnMgr) UntagPeer(p peer.ID, tag string) { - cm.lk.Lock() - defer cm.lk.Unlock() + s := cm.segments.get(p) + s.Lock() + defer s.Unlock() - pi, ok := cm.peers[p] + pi, ok := s.peers[p] if !ok { log.Info("tried to remove tag from untracked peer: ", p) return @@ -246,10 +279,11 @@ func (cm *BasicConnMgr) UntagPeer(p peer.ID, tag string) { // UpsertTag is called to insert/update a peer tag func (cm *BasicConnMgr) UpsertTag(p peer.ID, tag string, upsert func(int) int) { - cm.lk.Lock() - defer cm.lk.Unlock() + s := cm.segments.get(p) + s.Lock() + defer s.Unlock() - pi, ok := cm.peers[p] + pi, ok := s.peers[p] if !ok { log.Info("tried to upsert tag from untracked peer: ", p) return @@ -281,15 +315,12 @@ type CMInfo struct { // GetInfo returns the configuration and status data for this connection manager. func (cm *BasicConnMgr) GetInfo() CMInfo { - cm.lk.Lock() - defer cm.lk.Unlock() - return CMInfo{ HighWater: cm.highWater, LowWater: cm.lowWater, LastTrim: cm.lastTrim, GracePeriod: cm.gracePeriod, - ConnCount: cm.connCount, + ConnCount: int(atomic.LoadInt32(&cm.connCount)), } } @@ -312,29 +343,31 @@ func (nn *cmNotifee) cm() *BasicConnMgr { func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { cm := nn.cm() - cm.lk.Lock() - defer cm.lk.Unlock() + p := c.RemotePeer() + s := cm.segments.get(p) + s.Lock() + defer s.Unlock() - pinfo, ok := cm.peers[c.RemotePeer()] + pinfo, ok := s.peers[p] if !ok { pinfo = &peerInfo{ firstSeen: time.Now(), tags: make(map[string]int), conns: make(map[inet.Conn]time.Time), } - cm.peers[c.RemotePeer()] = pinfo + s.peers[p] = pinfo } _, ok = pinfo.conns[c] if ok { - log.Error("received connected notification for conn we are already tracking: ", c.RemotePeer()) + log.Error("received connected notification for conn we are already tracking: ", p) return } pinfo.conns[c] = time.Now() - cm.connCount++ + connCount := atomic.AddInt32(&cm.connCount, 1) - if cm.connCount > nn.highWater { + if int(connCount) > nn.highWater { go cm.TrimOpenConns(context.Background()) } } @@ -344,26 +377,28 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { func (nn *cmNotifee) Disconnected(n inet.Network, c inet.Conn) { cm := nn.cm() - cm.lk.Lock() - defer cm.lk.Unlock() + p := c.RemotePeer() + s := cm.segments.get(p) + s.Lock() + defer s.Unlock() - cinf, ok := cm.peers[c.RemotePeer()] + cinf, ok := s.peers[p] if !ok { - log.Error("received disconnected notification for peer we are not tracking: ", c.RemotePeer()) + log.Error("received disconnected notification for peer we are not tracking: ", p) return } _, ok = cinf.conns[c] if !ok { - log.Error("received disconnected notification for conn we are not tracking: ", c.RemotePeer()) + log.Error("received disconnected notification for conn we are not tracking: ", p) return } delete(cinf.conns, c) - cm.connCount-- if len(cinf.conns) == 0 { - delete(cm.peers, c.RemotePeer()) + delete(s.peers, p) } + atomic.AddInt32(&cm.connCount, -1) } // Listen is no-op in this implementation. From 30f9525c0355f06e8fd56f4fbb4d2c7f3714a5bc Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 10 May 2019 13:54:00 +0300 Subject: [PATCH 21/58] fix tests --- p2p/net/connmgr/connmgr_test.go | 54 ++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index b8ebba7e8d..20c792c025 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + detectrace "github.com/ipfs/go-detect-race" inet "github.com/libp2p/go-libp2p-net" peer "github.com/libp2p/go-libp2p-peer" tu "github.com/libp2p/go-testutil" @@ -189,7 +190,7 @@ func TestTagPeerNonExistant(t *testing.T) { id := tu.RandPeerIDFatal(t) cm.TagPeer(id, "test", 1) - if len(cm.peers) != 0 { + if cm.segments.countPeers() != 0 { t.Fatal("expected zero peers") } } @@ -205,20 +206,20 @@ func TestUntagPeer(t *testing.T) { id := tu.RandPeerIDFatal(t) cm.UntagPeer(id, "test") - if len(cm.peers[rp].tags) != 2 { + if len(cm.segments.get(rp).peers[rp].tags) != 2 { t.Fatal("expected tags to be uneffected") } cm.UntagPeer(conn.RemotePeer(), "test") - if len(cm.peers[rp].tags) != 2 { + if len(cm.segments.get(rp).peers[rp].tags) != 2 { t.Fatal("expected tags to be uneffected") } cm.UntagPeer(conn.RemotePeer(), "tag") - if len(cm.peers[rp].tags) != 1 { + if len(cm.segments.get(rp).peers[rp].tags) != 1 { t.Fatal("expected tag to be removed") } - if cm.peers[rp].value != 5 { + if cm.segments.get(rp).peers[rp].value != 5 { t.Fatal("expected aggreagte tag value to be 5") } } @@ -262,7 +263,7 @@ func TestDoubleConnection(t *testing.T) { if cm.connCount != 1 { t.Fatal("unexpected number of connections") } - if cm.peers[conn.RemotePeer()].value != 10 { + if cm.segments.get(conn.RemotePeer()).peers[conn.RemotePeer()].value != 10 { t.Fatal("unexpected peer value") } } @@ -279,7 +280,7 @@ func TestDisconnected(t *testing.T) { if cm.connCount != 1 { t.Fatal("unexpected number of connections") } - if cm.peers[conn.RemotePeer()].value != 10 { + if cm.segments.get(conn.RemotePeer()).peers[conn.RemotePeer()].value != 10 { t.Fatal("unexpected peer value") } @@ -287,7 +288,7 @@ func TestDisconnected(t *testing.T) { if cm.connCount != 1 { t.Fatal("unexpected number of connections") } - if cm.peers[conn.RemotePeer()].value != 10 { + if cm.segments.get(conn.RemotePeer()).peers[conn.RemotePeer()].value != 10 { t.Fatal("unexpected peer value") } @@ -295,13 +296,17 @@ func TestDisconnected(t *testing.T) { if cm.connCount != 0 { t.Fatal("unexpected number of connections") } - if len(cm.peers) != 0 { + if cm.segments.countPeers() != 0 { t.Fatal("unexpected number of peers") } } // see https://github.com/libp2p/go-libp2p-connmgr/issues/23 func TestQuickBurstRespectsSilencePeriod(t *testing.T) { + if detectrace.WithRace() { + t.Skip("race detector is unhappy with this test") + } + cm := NewConnManager(10, 20, 0) not := cm.Notifee() @@ -317,9 +322,6 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { // wait for a few seconds time.Sleep(time.Second * 3) - cm.lk.Lock() // pacify the race detector - defer cm.lk.Unlock() - // only the first trim is allowed in; make sure we close at most 20 connections, not all of them. var closed int for _, c := range conns { @@ -330,12 +332,16 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { if closed > 20 { t.Fatalf("should have closed at most 20 connections, closed: %d", closed) } - if total := closed + cm.connCount; total != 30 { + if total := closed + int(cm.connCount); total != 30 { t.Fatalf("expected closed connections + open conn count to equal 30, value: %d", total) } } func TestPeerProtectionSingleTag(t *testing.T) { + if detectrace.WithRace() { + t.Skip("race detector is unhappy with this test") + } + SilencePeriod = 0 cm := NewConnManager(19, 20, 0) SilencePeriod = 10 * time.Second @@ -386,9 +392,6 @@ func TestPeerProtectionSingleTag(t *testing.T) { // the pruning happens in the background -- this timing condition is not good. time.Sleep(1 * time.Second) - cm.lk.Lock() // pacify the race detector - defer cm.lk.Unlock() - if !protected[0].(*tconn).closed { t.Error("unprotected connection was kept open by connection manager") } @@ -400,6 +403,10 @@ func TestPeerProtectionSingleTag(t *testing.T) { } func TestPeerProtectionMultipleTags(t *testing.T) { + if detectrace.WithRace() { + t.Skip("race detector is unhappy with this test") + } + SilencePeriod = 0 cm := NewConnManager(19, 20, 0) SilencePeriod = 10 * time.Second @@ -476,9 +483,6 @@ func TestPeerProtectionMultipleTags(t *testing.T) { // the pruning happens in the background -- this timing condition is not good. time.Sleep(1 * time.Second) - cm.lk.Lock() // pacify the race detector - defer cm.lk.Unlock() - if !protected[0].(*tconn).closed { t.Error("unprotected connection was kept open by connection manager") } @@ -530,26 +534,26 @@ func TestUpsertTag(t *testing.T) { rp := conn.RemotePeer() cm.UpsertTag(rp, "tag", func(v int) int { return v + 1 }) - if len(cm.peers[rp].tags) != 1 { + if len(cm.segments.get(rp).peers[rp].tags) != 1 { t.Fatal("expected a tag") } - if cm.peers[rp].value != 1 { + if cm.segments.get(rp).peers[rp].value != 1 { t.Fatal("expected a tag value of 1") } cm.UpsertTag(rp, "tag", func(v int) int { return v + 1 }) - if len(cm.peers[rp].tags) != 1 { + if len(cm.segments.get(rp).peers[rp].tags) != 1 { t.Fatal("expected a tag") } - if cm.peers[rp].value != 2 { + if cm.segments.get(rp).peers[rp].value != 2 { t.Fatal("expected a tag value of 2") } cm.UpsertTag(rp, "tag", func(v int) int { return v - 1 }) - if len(cm.peers[rp].tags) != 1 { + if len(cm.segments.get(rp).peers[rp].tags) != 1 { t.Fatal("expected a tag") } - if cm.peers[rp].value != 1 { + if cm.segments.get(rp).peers[rp].value != 1 { t.Fatal("expected a tag value of 1") } } From 96ea7fc0d2e097c5fe324935abfc22bad4299c5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Fri, 10 May 2019 16:16:32 +0100 Subject: [PATCH 22/58] add benchmark for lock contention (possibly inaccurate). --- p2p/net/connmgr/bench_test.go | 51 +++++++++++++++++++++++++++++++++ p2p/net/connmgr/connmgr_test.go | 2 +- 2 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 p2p/net/connmgr/bench_test.go diff --git a/p2p/net/connmgr/bench_test.go b/p2p/net/connmgr/bench_test.go new file mode 100644 index 0000000000..91124d0dd8 --- /dev/null +++ b/p2p/net/connmgr/bench_test.go @@ -0,0 +1,51 @@ +package connmgr + +import ( + "math/rand" + "sync" + "testing" + + inet "github.com/libp2p/go-libp2p-net" +) + +func randomConns(tb testing.TB) (c [5000]inet.Conn) { + for i, _ := range c { + c[i] = randConn(tb, nil) + } + return c +} + +func BenchmarkLockContention(b *testing.B) { + conns := randomConns(b) + cm := NewConnManager(1000, 1000, 0) + not := cm.Notifee() + + kill := make(chan struct{}) + var wg sync.WaitGroup + + for i := 0; i < 16; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for { + select { + case <-kill: + return + default: + _ = cm.GetTagInfo(conns[rand.Intn(3000)].RemotePeer()) + } + } + }() + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + rc := conns[rand.Intn(3000)] + not.Connected(nil, rc) + cm.TagPeer(rc.RemotePeer(), "tag", 100) + cm.UntagPeer(rc.RemotePeer(), "tag") + not.Disconnected(nil, rc) + } + close(kill) + wg.Wait() +} diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 20c792c025..ba61fc30de 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -40,7 +40,7 @@ func (c *tconn) RemoteMultiaddr() ma.Multiaddr { return addr } -func randConn(t *testing.T, discNotify func(inet.Network, inet.Conn)) inet.Conn { +func randConn(t testing.TB, discNotify func(inet.Network, inet.Conn)) inet.Conn { pid := tu.RandPeerIDFatal(t) return &tconn{peer: pid, disconnectNotify: discNotify} } From c7917a3b24797a1f8302dafba256f8b2bcf7e646 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 10 May 2019 20:47:13 +0300 Subject: [PATCH 23/58] fix bench_test to use TagPeer in parallel goroutines --- p2p/net/connmgr/bench_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/p2p/net/connmgr/bench_test.go b/p2p/net/connmgr/bench_test.go index 91124d0dd8..aac11bb994 100644 --- a/p2p/net/connmgr/bench_test.go +++ b/p2p/net/connmgr/bench_test.go @@ -32,7 +32,7 @@ func BenchmarkLockContention(b *testing.B) { case <-kill: return default: - _ = cm.GetTagInfo(conns[rand.Intn(3000)].RemotePeer()) + cm.TagPeer(conns[rand.Intn(len(conns))].RemotePeer(), "another-tag", 1) } } }() @@ -40,7 +40,7 @@ func BenchmarkLockContention(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - rc := conns[rand.Intn(3000)] + rc := conns[rand.Intn(len(conns))] not.Connected(nil, rc) cm.TagPeer(rc.RemotePeer(), "tag", 100) cm.UntagPeer(rc.RemotePeer(), "tag") From 68c9087d5515cf612391adeb1673e7a7528053e9 Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 12 May 2019 14:01:38 +0300 Subject: [PATCH 24/58] fix some issues in connection trimming - fix potential concurrent modification of connection map from concurrent connects/disconnects by locking the segment - don't spawn goroutines on every connect during trims --- p2p/net/connmgr/connmgr.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 3010e30579..2b159b5581 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -145,12 +145,18 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { // skip this attempt to trim as the last one just took place. return } + + // mark to stop spawning goroutines in every connection while trimming + cm.lastTrim = time.Now() + defer log.EventBegin(ctx, "connCleanup").Done() for _, c := range cm.getConnsToClose(ctx) { log.Info("closing conn: ", c.RemotePeer()) log.Event(ctx, "closeConn", c.RemotePeer()) c.Close() } + + // we just finished, set the trim time cm.lastTrim = time.Now() } @@ -168,7 +174,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { return nil } - var candidates []*peerInfo + candidates := make([]*peerInfo, 0, npeers) cm.plk.RLock() for _, s := range cm.segments { s.Lock() @@ -201,9 +207,13 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { continue } + // lock this to protect from concurrent modifications from connect/disconnect events + s := cm.segments.get(inf.id) + s.Lock() for c := range inf.conns { selected = append(selected, c) } + s.Unlock() target-- if target == 0 { @@ -351,6 +361,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { pinfo, ok := s.peers[p] if !ok { pinfo = &peerInfo{ + id: p, firstSeen: time.Now(), tags: make(map[string]int), conns: make(map[inet.Conn]time.Time), @@ -367,7 +378,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { pinfo.conns[c] = time.Now() connCount := atomic.AddInt32(&cm.connCount, 1) - if int(connCount) > nn.highWater { + if int(connCount) > nn.highWater && time.Since(cm.lastTrim) > cm.silencePeriod { go cm.TrimOpenConns(context.Background()) } } From a696dc0095ba91b0aeb3b69af703e674048e08ff Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 12 May 2019 14:24:10 +0300 Subject: [PATCH 25/58] revert racey last trim check --- p2p/net/connmgr/connmgr.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 2b159b5581..fc51b9d248 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -146,9 +146,6 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { return } - // mark to stop spawning goroutines in every connection while trimming - cm.lastTrim = time.Now() - defer log.EventBegin(ctx, "connCleanup").Done() for _, c := range cm.getConnsToClose(ctx) { log.Info("closing conn: ", c.RemotePeer()) @@ -156,7 +153,6 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { c.Close() } - // we just finished, set the trim time cm.lastTrim = time.Now() } @@ -378,7 +374,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { pinfo.conns[c] = time.Now() connCount := atomic.AddInt32(&cm.connCount, 1) - if int(connCount) > nn.highWater && time.Since(cm.lastTrim) > cm.silencePeriod { + if int(connCount) > nn.highWater { go cm.TrimOpenConns(context.Background()) } } From 0ad5be224a0917637b9f57242f3676736f14373d Mon Sep 17 00:00:00 2001 From: vyzo Date: Sun, 12 May 2019 15:41:34 +0300 Subject: [PATCH 26/58] fix peer comparison to low water mark otherwise the target can be 0, which makes it negative on decrement, and it will end up dropping all connections. --- p2p/net/connmgr/connmgr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index fc51b9d248..b860cc7108 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -165,7 +165,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { } now := time.Now() npeers := cm.segments.countPeers() - if npeers < cm.lowWater { + if npeers <= cm.lowWater { log.Info("open connection count below limit") return nil } From c9ef3d9b9710b25ae14f682d92159db186e76129 Mon Sep 17 00:00:00 2001 From: vyzo Date: Fri, 17 May 2019 23:46:49 +0300 Subject: [PATCH 27/58] index string directly to avoid intermediate byte slice --- p2p/net/connmgr/connmgr.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index b860cc7108..af73945c1f 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -50,9 +50,8 @@ type segment struct { type segments [256]*segment -func (s *segments) get(id peer.ID) *segment { - b := []byte(id) - return s[b[len(b)-1]] +func (s *segments) get(p peer.ID) *segment { + return s[byte(p[len(p)-1])] } func (s *segments) countPeers() (count int) { From 72e862833f1b198e4ad1a3c70f9970de68cc9b77 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 13 May 2019 19:54:02 +0300 Subject: [PATCH 28/58] implement background trimming --- p2p/net/connmgr/connmgr.go | 39 ++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index af73945c1f..9a3912da5b 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -39,6 +39,9 @@ type BasicConnMgr struct { trimRunningCh chan struct{} lastTrim time.Time silencePeriod time.Duration + + ctx context.Context + cancel func() } var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) @@ -70,13 +73,16 @@ func (s *segments) countPeers() (count int) { // * grace is the amount of time a newly opened connection is given before it becomes // subject to pruning. func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { - return &BasicConnMgr{ + ctx, cancel := context.WithCancel(context.Background()) + cm := &BasicConnMgr{ highWater: hi, lowWater: low, gracePeriod: grace, trimRunningCh: make(chan struct{}, 1), protected: make(map[peer.ID]map[string]struct{}, 16), silencePeriod: SilencePeriod, + ctx: ctx, + cancel: cancel, segments: func() (ret segments) { for i := range ret { ret[i] = &segment{ @@ -86,6 +92,14 @@ func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { return ret }(), } + + go cm.background() + return cm +} + +func (cm *BasicConnMgr) Close() error { + cm.cancel() + return nil } func (cm *BasicConnMgr) Protect(id peer.ID, tag string) { @@ -155,6 +169,23 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { cm.lastTrim = time.Now() } +func (cm *BasicConnMgr) background() { + ticker := time.NewTicker(time.Minute) + defer ticker.Stop() + + for { + select { + case <-ticker.C: + if atomic.LoadInt32(&cm.connCount) > int32(cm.highWater) { + cm.TrimOpenConns(cm.ctx) + } + + case <-cm.ctx.Done(): + return + } + } +} + // getConnsToClose runs the heuristics described in TrimOpenConns and returns the // connections to close. func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { @@ -371,11 +402,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { } pinfo.conns[c] = time.Now() - connCount := atomic.AddInt32(&cm.connCount, 1) - - if int(connCount) > nn.highWater { - go cm.TrimOpenConns(context.Background()) - } + atomic.AddInt32(&cm.connCount, 1) } // Disconnected is called by notifiers to inform that an existing connection has been closed or terminated. From 75cc6185548a259ac3e46c122128c52ee039f548 Mon Sep 17 00:00:00 2001 From: vyzo Date: Mon, 13 May 2019 19:57:23 +0300 Subject: [PATCH 29/58] fix tests --- p2p/net/connmgr/connmgr_test.go | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index ba61fc30de..7f4ed10436 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -369,8 +369,7 @@ func TestPeerProtectionSingleTag(t *testing.T) { // add one more connection, sending the connection manager overboard. not.Connected(nil, randConn(t, not.Disconnected)) - // the pruning happens in the background -- this timing condition is not good. - time.Sleep(1 * time.Second) + cm.TrimOpenConns(context.Background()) for _, c := range protected { if c.(*tconn).closed { @@ -389,8 +388,7 @@ func TestPeerProtectionSingleTag(t *testing.T) { cm.TagPeer(rc.RemotePeer(), "test", 20) } - // the pruning happens in the background -- this timing condition is not good. - time.Sleep(1 * time.Second) + cm.TrimOpenConns(context.Background()) if !protected[0].(*tconn).closed { t.Error("unprotected connection was kept open by connection manager") @@ -435,8 +433,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { // add one more connection, sending the connection manager overboard. not.Connected(nil, randConn(t, not.Disconnected)) - // the pruning happens in the background -- this timing condition is not good. - time.Sleep(1 * time.Second) + cm.TrimOpenConns(context.Background()) for _, c := range protected { if c.(*tconn).closed { @@ -459,8 +456,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { cm.TagPeer(rc.RemotePeer(), "test", 20) } - // the pruning happens in the background -- this timing condition is not good. - time.Sleep(1 * time.Second) + cm.TrimOpenConns(context.Background()) // connections should still remain open, as they were protected. for _, c := range protected[0:] { @@ -480,8 +476,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { cm.TagPeer(rc.RemotePeer(), "test", 20) } - // the pruning happens in the background -- this timing condition is not good. - time.Sleep(1 * time.Second) + cm.TrimOpenConns(context.Background()) if !protected[0].(*tconn).closed { t.Error("unprotected connection was kept open by connection manager") From 234c13d26a11e88549c169ce756eef90d5113bf8 Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 14 May 2019 15:21:19 +0300 Subject: [PATCH 30/58] fix issue #44 --- p2p/net/connmgr/connmgr.go | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 9a3912da5b..e61ad15ca9 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -194,12 +194,13 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { return nil } now := time.Now() - npeers := cm.segments.countPeers() - if npeers <= cm.lowWater { + nconns := int(atomic.LoadInt32(&cm.connCount)) + if nconns <= cm.lowWater { log.Info("open connection count below limit") return nil } + npeers := cm.segments.countPeers() candidates := make([]*peerInfo, 0, npeers) cm.plk.RLock() for _, s := range cm.segments { @@ -220,12 +221,10 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { return candidates[i].value < candidates[j].value }) - target := npeers - cm.lowWater + target := nconns - cm.lowWater - // 2x number of peers we're disconnecting from because we may have more - // than one connection per peer. Slightly over allocating isn't an issue - // as this is a very short-lived array. - selected := make([]inet.Conn, 0, target*2) + // slightly overallocate because we may have more than one conns per peer + selected := make([]inet.Conn, 0, target+10) for _, inf := range candidates { // TODO: should we be using firstSeen or the time associated with the connection itself? @@ -239,10 +238,10 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { for c := range inf.conns { selected = append(selected, c) } + target -= len(inf.conns) s.Unlock() - target-- - if target == 0 { + if target <= 0 { break } } From 90574a9e8d55f2f56cdda12308c3a659b75b2577 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 23 May 2019 15:36:41 +0100 Subject: [PATCH 31/58] Buffer early tags in temporary entries (#39) --- p2p/net/connmgr/connmgr.go | 81 ++++++++++++++++++++++----------- p2p/net/connmgr/connmgr_test.go | 50 ++++++++++++++++++-- 2 files changed, 102 insertions(+), 29 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index e61ad15ca9..681649662c 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -53,12 +53,12 @@ type segment struct { type segments [256]*segment -func (s *segments) get(p peer.ID) *segment { - return s[byte(p[len(p)-1])] +func (ss *segments) get(p peer.ID) *segment { + return ss[byte(p[len(p)-1])] } -func (s *segments) countPeers() (count int) { - for _, seg := range s { +func (ss *segments) countPeers() (count int) { + for _, seg := range ss { seg.Lock() count += len(seg.peers) seg.Unlock() @@ -66,6 +66,23 @@ func (s *segments) countPeers() (count int) { return count } +func (s *segment) tagInfoFor(p peer.ID) *peerInfo { + pi, ok := s.peers[p] + if ok { + return pi + } + // create a temporary peer to buffer early tags before the Connected notification arrives. + pi = &peerInfo{ + id: p, + firstSeen: time.Now(), // this timestamp will be updated when the first Connected notification arrives. + temp: true, + tags: make(map[string]int), + conns: make(map[inet.Conn]time.Time), + } + s.peers[p] = pi + return pi +} + // NewConnManager creates a new BasicConnMgr with the provided params: // * lo and hi are watermarks governing the number of connections that'll be maintained. // When the peer count exceeds the 'high watermark', as many peers will be pruned (and @@ -134,6 +151,7 @@ type peerInfo struct { id peer.ID tags map[string]int // value for each tag value int // cached sum of all tag values + temp bool // this is a temporary entry holding early tags, and awaiting connections conns map[inet.Conn]time.Time // start time of each connection @@ -218,7 +236,13 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { // Sort peers according to their value. sort.Slice(candidates, func(i, j int) bool { - return candidates[i].value < candidates[j].value + left, right := candidates[i], candidates[j] + // temporary peers are preferred for pruning. + if left.temp != right.temp { + return left.temp + } + // otherwise, compare by value. + return left.value < right.value }) target := nconns - cm.lowWater @@ -227,6 +251,9 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { selected := make([]inet.Conn, 0, target+10) for _, inf := range candidates { + if target <= 0 { + break + } // TODO: should we be using firstSeen or the time associated with the connection itself? if inf.firstSeen.Add(cm.gracePeriod).After(now) { continue @@ -235,15 +262,18 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { // lock this to protect from concurrent modifications from connect/disconnect events s := cm.segments.get(inf.id) s.Lock() - for c := range inf.conns { - selected = append(selected, c) + + if len(inf.conns) == 0 && inf.temp { + // handle temporary entries for early tags -- this entry has gone past the grace period + // and still holds no connections, so prune it. + delete(s.peers, inf.id) + } else { + for c := range inf.conns { + selected = append(selected, c) + } } target -= len(inf.conns) s.Unlock() - - if target <= 0 { - break - } } return selected @@ -284,14 +314,10 @@ func (cm *BasicConnMgr) TagPeer(p peer.ID, tag string, val int) { s.Lock() defer s.Unlock() - pi, ok := s.peers[p] - if !ok { - log.Info("tried to tag conn from untracked peer: ", p) - return - } + pi := s.tagInfoFor(p) // Update the total value of the peer. - pi.value += (val - pi.tags[tag]) + pi.value += val - pi.tags[tag] pi.tags[tag] = val } @@ -318,15 +344,11 @@ func (cm *BasicConnMgr) UpsertTag(p peer.ID, tag string, upsert func(int) int) { s.Lock() defer s.Unlock() - pi, ok := s.peers[p] - if !ok { - log.Info("tried to upsert tag from untracked peer: ", p) - return - } + pi := s.tagInfoFor(p) oldval := pi.tags[tag] newval := upsert(oldval) - pi.value += (newval - oldval) + pi.value += newval - oldval pi.tags[tag] = newval } @@ -383,15 +405,22 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { s.Lock() defer s.Unlock() - pinfo, ok := s.peers[p] + id := c.RemotePeer() + pinfo, ok := s.peers[id] if !ok { pinfo = &peerInfo{ - id: p, + id: id, firstSeen: time.Now(), tags: make(map[string]int), conns: make(map[inet.Conn]time.Time), } - s.peers[p] = pinfo + s.peers[id] = pinfo + } else if pinfo.temp { + // we had created a temporary entry for this peer to buffer early tags before the + // Connected notification arrived: flip the temporary flag, and update the firstSeen + // timestamp to the real one. + pinfo.temp = false + pinfo.firstSeen = time.Now() } _, ok = pinfo.conns[c] diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 7f4ed10436..d7933ae893 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -190,8 +190,8 @@ func TestTagPeerNonExistant(t *testing.T) { id := tu.RandPeerIDFatal(t) cm.TagPeer(id, "test", 1) - if cm.segments.countPeers() != 0 { - t.Fatal("expected zero peers") + if !cm.segments.get(id).peers[id].temp { + t.Fatal("expected 1 temporary entry") } } @@ -525,9 +525,9 @@ func TestUpsertTag(t *testing.T) { cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) not := cm.Notifee() conn := randConn(t, nil) - not.Connected(nil, conn) rp := conn.RemotePeer() + // this is an early tag, before the Connected notification arrived. cm.UpsertTag(rp, "tag", func(v int) int { return v + 1 }) if len(cm.segments.get(rp).peers[rp].tags) != 1 { t.Fatal("expected a tag") @@ -536,6 +536,9 @@ func TestUpsertTag(t *testing.T) { t.Fatal("expected a tag value of 1") } + // now let's notify the connection. + not.Connected(nil, conn) + cm.UpsertTag(rp, "tag", func(v int) int { return v + 1 }) if len(cm.segments.get(rp).peers[rp].tags) != 1 { t.Fatal("expected a tag") @@ -552,3 +555,44 @@ func TestUpsertTag(t *testing.T) { t.Fatal("expected a tag value of 1") } } + +func TestTemporaryEntriesClearedFirst(t *testing.T) { + cm := NewConnManager(1, 1, 0) + + id := tu.RandPeerIDFatal(t) + cm.TagPeer(id, "test", 20) + + if cm.GetTagInfo(id).Value != 20 { + t.Fatal("expected an early tag with value 20") + } + + not := cm.Notifee() + conn1, conn2 := randConn(t, nil), randConn(t, nil) + not.Connected(nil, conn1) + not.Connected(nil, conn2) + + cm.TrimOpenConns(context.Background()) + if cm.GetTagInfo(id) != nil { + t.Fatal("expected no temporary tags after trimming") + } +} + +func TestTemporaryEntryConvertedOnConnection(t *testing.T) { + cm := NewConnManager(1, 1, 0) + + conn := randConn(t, nil) + cm.TagPeer(conn.RemotePeer(), "test", 20) + + ti := cm.segments.get(conn.RemotePeer()).peers[conn.RemotePeer()] + + if ti.value != 20 || !ti.temp { + t.Fatal("expected a temporary tag with value 20") + } + + not := cm.Notifee() + not.Connected(nil, conn) + + if ti.value != 20 || ti.temp { + t.Fatal("expected a non-temporary tag with value 20") + } +} From 65aca08c7fa09f883d057d86cad2c447b1abc4b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Sat, 25 May 2019 12:57:31 +0100 Subject: [PATCH 32/58] migrate to consolidated types (#45) --- p2p/net/connmgr/bench_test.go | 4 ++-- p2p/net/connmgr/connmgr.go | 37 +++++++++++++++++---------------- p2p/net/connmgr/connmgr_test.go | 26 ++++++++++++----------- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/p2p/net/connmgr/bench_test.go b/p2p/net/connmgr/bench_test.go index aac11bb994..3749e63fc4 100644 --- a/p2p/net/connmgr/bench_test.go +++ b/p2p/net/connmgr/bench_test.go @@ -5,10 +5,10 @@ import ( "sync" "testing" - inet "github.com/libp2p/go-libp2p-net" + "github.com/libp2p/go-libp2p-core/network" ) -func randomConns(tb testing.TB) (c [5000]inet.Conn) { +func randomConns(tb testing.TB) (c [5000]network.Conn) { for i, _ := range c { c[i] = randConn(tb, nil) } diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 681649662c..0ee8901742 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -7,10 +7,11 @@ import ( "sync/atomic" "time" + "github.com/libp2p/go-libp2p-core/connmgr" + "github.com/libp2p/go-libp2p-core/network" + "github.com/libp2p/go-libp2p-core/peer" + logging "github.com/ipfs/go-log" - ifconnmgr "github.com/libp2p/go-libp2p-interface-connmgr" - inet "github.com/libp2p/go-libp2p-net" - peer "github.com/libp2p/go-libp2p-peer" ma "github.com/multiformats/go-multiaddr" ) @@ -44,7 +45,7 @@ type BasicConnMgr struct { cancel func() } -var _ ifconnmgr.ConnManager = (*BasicConnMgr)(nil) +var _ connmgr.ConnManager = (*BasicConnMgr)(nil) type segment struct { sync.Mutex @@ -77,7 +78,7 @@ func (s *segment) tagInfoFor(p peer.ID) *peerInfo { firstSeen: time.Now(), // this timestamp will be updated when the first Connected notification arrives. temp: true, tags: make(map[string]int), - conns: make(map[inet.Conn]time.Time), + conns: make(map[network.Conn]time.Time), } s.peers[p] = pi return pi @@ -153,7 +154,7 @@ type peerInfo struct { value int // cached sum of all tag values temp bool // this is a temporary entry holding early tags, and awaiting connections - conns map[inet.Conn]time.Time // start time of each connection + conns map[network.Conn]time.Time // start time of each connection firstSeen time.Time // timestamp when we began tracking this peer. } @@ -206,7 +207,7 @@ func (cm *BasicConnMgr) background() { // getConnsToClose runs the heuristics described in TrimOpenConns and returns the // connections to close. -func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { +func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { if cm.lowWater == 0 || cm.highWater == 0 { // disabled return nil @@ -248,7 +249,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { target := nconns - cm.lowWater // slightly overallocate because we may have more than one conns per peer - selected := make([]inet.Conn, 0, target+10) + selected := make([]network.Conn, 0, target+10) for _, inf := range candidates { if target <= 0 { @@ -281,7 +282,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []inet.Conn { // GetTagInfo is called to fetch the tag information associated with a given // peer, nil is returned if p refers to an unknown peer. -func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { +func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *connmgr.TagInfo { s := cm.segments.get(p) s.Lock() defer s.Unlock() @@ -291,7 +292,7 @@ func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *ifconnmgr.TagInfo { return nil } - out := &ifconnmgr.TagInfo{ + out := &connmgr.TagInfo{ FirstSeen: pi.firstSeen, Value: pi.value, Tags: make(map[string]int), @@ -384,7 +385,7 @@ func (cm *BasicConnMgr) GetInfo() CMInfo { // Notifee returns a sink through which Notifiers can inform the BasicConnMgr when // events occur. Currently, the notifee only reacts upon connection events // {Connected, Disconnected}. -func (cm *BasicConnMgr) Notifee() inet.Notifiee { +func (cm *BasicConnMgr) Notifee() network.Notifiee { return (*cmNotifee)(cm) } @@ -397,7 +398,7 @@ func (nn *cmNotifee) cm() *BasicConnMgr { // Connected is called by notifiers to inform that a new connection has been established. // The notifee updates the BasicConnMgr to start tracking the connection. If the new connection // count exceeds the high watermark, a trim may be triggered. -func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { +func (nn *cmNotifee) Connected(n network.Network, c network.Conn) { cm := nn.cm() p := c.RemotePeer() @@ -412,7 +413,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { id: id, firstSeen: time.Now(), tags: make(map[string]int), - conns: make(map[inet.Conn]time.Time), + conns: make(map[network.Conn]time.Time), } s.peers[id] = pinfo } else if pinfo.temp { @@ -435,7 +436,7 @@ func (nn *cmNotifee) Connected(n inet.Network, c inet.Conn) { // Disconnected is called by notifiers to inform that an existing connection has been closed or terminated. // The notifee updates the BasicConnMgr accordingly to stop tracking the connection, and performs housekeeping. -func (nn *cmNotifee) Disconnected(n inet.Network, c inet.Conn) { +func (nn *cmNotifee) Disconnected(n network.Network, c network.Conn) { cm := nn.cm() p := c.RemotePeer() @@ -463,13 +464,13 @@ func (nn *cmNotifee) Disconnected(n inet.Network, c inet.Conn) { } // Listen is no-op in this implementation. -func (nn *cmNotifee) Listen(n inet.Network, addr ma.Multiaddr) {} +func (nn *cmNotifee) Listen(n network.Network, addr ma.Multiaddr) {} // ListenClose is no-op in this implementation. -func (nn *cmNotifee) ListenClose(n inet.Network, addr ma.Multiaddr) {} +func (nn *cmNotifee) ListenClose(n network.Network, addr ma.Multiaddr) {} // OpenedStream is no-op in this implementation. -func (nn *cmNotifee) OpenedStream(inet.Network, inet.Stream) {} +func (nn *cmNotifee) OpenedStream(network.Network, network.Stream) {} // ClosedStream is no-op in this implementation. -func (nn *cmNotifee) ClosedStream(inet.Network, inet.Stream) {} +func (nn *cmNotifee) ClosedStream(network.Network, network.Stream) {} diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index d7933ae893..edaff7bc6c 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -6,18 +6,20 @@ import ( "time" detectrace "github.com/ipfs/go-detect-race" - inet "github.com/libp2p/go-libp2p-net" - peer "github.com/libp2p/go-libp2p-peer" - tu "github.com/libp2p/go-testutil" + + "github.com/libp2p/go-libp2p-core/network" + "github.com/libp2p/go-libp2p-core/peer" + + tu "github.com/libp2p/go-libp2p-core/test" ma "github.com/multiformats/go-multiaddr" ) type tconn struct { - inet.Conn + network.Conn peer peer.ID closed bool - disconnectNotify func(net inet.Network, conn inet.Conn) + disconnectNotify func(net network.Network, conn network.Conn) } func (c *tconn) Close() error { @@ -40,7 +42,7 @@ func (c *tconn) RemoteMultiaddr() ma.Multiaddr { return addr } -func randConn(t testing.TB, discNotify func(inet.Network, inet.Conn)) inet.Conn { +func randConn(t testing.TB, discNotify func(network.Network, network.Conn)) network.Conn { pid := tu.RandPeerIDFatal(t) return &tconn{peer: pid, disconnectNotify: discNotify} } @@ -49,7 +51,7 @@ func TestConnTrimming(t *testing.T) { cm := NewConnManager(200, 300, 0) not := cm.Notifee() - var conns []inet.Conn + var conns []network.Conn for i := 0; i < 300; i++ { rc := randConn(t, nil) conns = append(conns, rc) @@ -310,7 +312,7 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { cm := NewConnManager(10, 20, 0) not := cm.Notifee() - var conns []inet.Conn + var conns []network.Conn // quickly produce 30 connections (sending us above the high watermark) for i := 0; i < 30; i++ { @@ -349,7 +351,7 @@ func TestPeerProtectionSingleTag(t *testing.T) { not := cm.Notifee() // produce 20 connections with unique peers. - var conns []inet.Conn + var conns []network.Conn for i := 0; i < 20; i++ { rc := randConn(t, not.Disconnected) conns = append(conns, rc) @@ -358,7 +360,7 @@ func TestPeerProtectionSingleTag(t *testing.T) { } // protect the first 5 peers. - var protected []inet.Conn + var protected []network.Conn for _, c := range conns[0:5] { cm.Protect(c.RemotePeer(), "global") protected = append(protected, c) @@ -412,7 +414,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { not := cm.Notifee() // produce 20 connections with unique peers. - var conns []inet.Conn + var conns []network.Conn for i := 0; i < 20; i++ { rc := randConn(t, not.Disconnected) conns = append(conns, rc) @@ -421,7 +423,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { } // protect the first 5 peers under two tags. - var protected []inet.Conn + var protected []network.Conn for _, c := range conns[0:5] { cm.Protect(c.RemotePeer(), "tag1") cm.Protect(c.RemotePeer(), "tag2") From f72759323472fc9775225f2217c1a056a22edcb3 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Thu, 25 Jul 2019 15:11:55 -0700 Subject: [PATCH 33/58] don't count connections in the grace period against the limit fixes #46 --- p2p/net/connmgr/connmgr.go | 21 +++++-- p2p/net/connmgr/connmgr_test.go | 99 ++++++++++++++++++++++++++++++--- 2 files changed, 106 insertions(+), 14 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 0ee8901742..e430a80cfc 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -221,6 +221,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { npeers := cm.segments.countPeers() candidates := make([]*peerInfo, 0, npeers) + ncandidates := 0 cm.plk.RLock() for _, s := range cm.segments { s.Lock() @@ -229,12 +230,26 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { // skip over protected peer. continue } + if inf.firstSeen.Add(cm.gracePeriod).After(now) { + // skip peers in the grace period. + continue + } candidates = append(candidates, inf) + ncandidates += len(inf.conns) } s.Unlock() } cm.plk.RUnlock() + if ncandidates < cm.lowWater { + log.Info("open connection count above limit but too many are in the grace period") + // We have too many connections but fewer than lowWater + // connections out of the grace period. + // + // If we trimmed now, we'd kill potentially useful connections. + return nil + } + // Sort peers according to their value. sort.Slice(candidates, func(i, j int) bool { left, right := candidates[i], candidates[j] @@ -246,7 +261,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { return left.value < right.value }) - target := nconns - cm.lowWater + target := ncandidates - cm.lowWater // slightly overallocate because we may have more than one conns per peer selected := make([]network.Conn, 0, target+10) @@ -255,10 +270,6 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { if target <= 0 { break } - // TODO: should we be using firstSeen or the time associated with the connection itself? - if inf.firstSeen.Add(cm.gracePeriod).After(now) { - continue - } // lock this to protect from concurrent modifications from connect/disconnect events s := cm.segments.get(inf.id) diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index edaff7bc6c..ad787864c7 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -303,6 +303,63 @@ func TestDisconnected(t *testing.T) { } } +func TestGracePeriod(t *testing.T) { + if detectrace.WithRace() { + t.Skip("race detector is unhappy with this test") + } + + SilencePeriod = 0 + cm := NewConnManager(10, 20, 100*time.Millisecond) + SilencePeriod = 10 * time.Second + + not := cm.Notifee() + + var conns []network.Conn + + // Add a connection and wait the grace period. + { + rc := randConn(t, not.Disconnected) + conns = append(conns, rc) + not.Connected(nil, rc) + + time.Sleep(200 * time.Millisecond) + + if rc.(*tconn).closed { + t.Fatal("expected conn to remain open") + } + } + + // quickly add 30 connections (sending us above the high watermark) + for i := 0; i < 30; i++ { + rc := randConn(t, not.Disconnected) + conns = append(conns, rc) + not.Connected(nil, rc) + } + + cm.TrimOpenConns(context.Background()) + + for _, c := range conns { + if c.(*tconn).closed { + t.Fatal("expected no conns to be closed") + } + } + + time.Sleep(200 * time.Millisecond) + + cm.TrimOpenConns(context.Background()) + + closed := 0 + for _, c := range conns { + if c.(*tconn).closed { + closed++ + } + } + + if closed != 21 { + t.Fatal("expected to have closed 21 connections") + } +} + // see https://github.com/libp2p/go-libp2p-connmgr/issues/23 func TestQuickBurstRespectsSilencePeriod(t *testing.T) { if detectrace.WithRace() { @@ -350,13 +407,17 @@ func TestPeerProtectionSingleTag(t *testing.T) { not := cm.Notifee() - // produce 20 connections with unique peers. var conns []network.Conn - for i := 0; i < 20; i++ { + addConn := func(value int) { rc := randConn(t, not.Disconnected) conns = append(conns, rc) not.Connected(nil, rc) - cm.TagPeer(rc.RemotePeer(), "test", 20) + cm.TagPeer(rc.RemotePeer(), "test", value) + } + + // produce 20 connections with unique peers. + for i := 0; i < 20; i++ { + addConn(20) } // protect the first 5 peers. @@ -368,8 +429,21 @@ func TestPeerProtectionSingleTag(t *testing.T) { cm.TagPeer(c.RemotePeer(), "test", -100) } - // add one more connection, sending the connection manager overboard. - not.Connected(nil, randConn(t, not.Disconnected)) + // add 1 more conn, this shouldn't send us over the limit as protected conns don't count + addConn(20) + + cm.TrimOpenConns(context.Background()) + + for _, c := range conns { + if c.(*tconn).closed { + t.Error("connection was closed by connection manager") + } + } + + // add 5 more connection, sending the connection manager overboard. + for i := 0; i < 5; i++ { + addConn(20) + } cm.TrimOpenConns(context.Background()) @@ -379,15 +453,22 @@ func TestPeerProtectionSingleTag(t *testing.T) { } } + closed := 0 + for _, c := range conns { + if c.(*tconn).closed { + closed++ + } + } + if closed != 2 { + t.Errorf("expected 2 connection to be closed, found %d", closed) + } + // unprotect the first peer. cm.Unprotect(protected[0].RemotePeer(), "global") // add 2 more connections, sending the connection manager overboard again. for i := 0; i < 2; i++ { - rc := randConn(t, not.Disconnected) - conns = append(conns, rc) - not.Connected(nil, rc) - cm.TagPeer(rc.RemotePeer(), "test", 20) + addConn(20) } cm.TrimOpenConns(context.Background()) From ac5292eb4b2685639c76a42ef24c868d7540ec39 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Sat, 27 Jul 2019 16:45:37 -0700 Subject: [PATCH 34/58] pre-compute grace period --- p2p/net/connmgr/connmgr.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index e430a80cfc..804730dea4 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -212,7 +212,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { // disabled return nil } - now := time.Now() + nconns := int(atomic.LoadInt32(&cm.connCount)) if nconns <= cm.lowWater { log.Info("open connection count below limit") @@ -222,6 +222,8 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { npeers := cm.segments.countPeers() candidates := make([]*peerInfo, 0, npeers) ncandidates := 0 + gracePeriodStart := time.Now().Add(-cm.gracePeriod) + cm.plk.RLock() for _, s := range cm.segments { s.Lock() @@ -230,7 +232,7 @@ func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { // skip over protected peer. continue } - if inf.firstSeen.Add(cm.gracePeriod).After(now) { + if inf.firstSeen.After(gracePeriodStart) { // skip peers in the grace period. continue } From b200369cf1dfd267c71351dabb10c7fa85a88545 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 30 Jul 2019 13:52:08 -0700 Subject: [PATCH 35/58] fix: trim connections every gracePeriod/2 We're still rate-limited to once every 10 seconds. However, this means that setting the grace period to something below one minute actually _does_ something useful. fixes #51 --- p2p/net/connmgr/connmgr.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 804730dea4..563c1fbb94 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -189,7 +189,15 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { } func (cm *BasicConnMgr) background() { - ticker := time.NewTicker(time.Minute) + interval := cm.gracePeriod / 2 + if interval < cm.silencePeriod { + interval = cm.silencePeriod + } + if interval < 10*time.Second { + interval = 10 * time.Second + } + + ticker := time.NewTicker(interval) defer ticker.Stop() for { From f1d6c943086897ce319303f386a347900ca1d506 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 11 Dec 2019 10:38:57 +0100 Subject: [PATCH 36/58] feat: block on trim 1. Perform all trims in the background goroutine. 2. Make `TrimOpenConns` return early when canceled. 3. Make multiple concurrent calls to `TrimOpenConns` block until the trim finishes. Note: It already _may_ block one caller, this just ensures that it always behaves the same way. Returning a signal channel may be a nicer solution but this is less breaking. --- p2p/net/connmgr/connmgr.go | 90 +++++++++++++++++++++++++-------- p2p/net/connmgr/connmgr_test.go | 85 +++++++++++++++++++++++++++++-- 2 files changed, 149 insertions(+), 26 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 804730dea4..fb94fd8730 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -36,8 +36,9 @@ type BasicConnMgr struct { plk sync.RWMutex protected map[peer.ID]map[string]struct{} - // channel-based semaphore that enforces only a single trim is in progress - trimRunningCh chan struct{} + trimMu sync.RWMutex + trimTrigger chan struct{} + trimSignal chan struct{} lastTrim time.Time silencePeriod time.Duration @@ -96,7 +97,8 @@ func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { highWater: hi, lowWater: low, gracePeriod: grace, - trimRunningCh: make(chan struct{}, 1), + trimTrigger: make(chan struct{}), + trimSignal: make(chan struct{}), protected: make(map[peer.ID]map[string]struct{}, 16), silencePeriod: SilencePeriod, ctx: ctx, @@ -167,25 +169,28 @@ type peerInfo struct { // TODO: error return value so we can cleanly signal we are aborting because: // (a) there's another trim in progress, or (b) the silence period is in effect. func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { + cm.trimMu.RLock() + trimSignal := cm.trimSignal + cm.trimMu.RUnlock() + + // Trigger a trim. select { - case cm.trimRunningCh <- struct{}{}: - default: - return - } - defer func() { <-cm.trimRunningCh }() - if time.Since(cm.lastTrim) < cm.silencePeriod { - // skip this attempt to trim as the last one just took place. + case cm.trimTrigger <- struct{}{}: + case <-trimSignal: + // Someone else just trimmed. return + case <-cm.ctx.Done(): + case <-ctx.Done(): + // TODO: return an error? } - defer log.EventBegin(ctx, "connCleanup").Done() - for _, c := range cm.getConnsToClose(ctx) { - log.Info("closing conn: ", c.RemotePeer()) - log.Event(ctx, "closeConn", c.RemotePeer()) - c.Close() + // Wait for the trim. + select { + case <-trimSignal: + case <-cm.ctx.Done(): + case <-ctx.Done(): + // TODO: return an error? } - - cm.lastTrim = time.Now() } func (cm *BasicConnMgr) background() { @@ -195,19 +200,56 @@ func (cm *BasicConnMgr) background() { for { select { case <-ticker.C: - if atomic.LoadInt32(&cm.connCount) > int32(cm.highWater) { - cm.TrimOpenConns(cm.ctx) + if atomic.LoadInt32(&cm.connCount) < int32(cm.highWater) { + // Below high water, skip. + continue } - + case <-cm.trimTrigger: case <-cm.ctx.Done(): return } + cm.trim() + } +} + +func (cm *BasicConnMgr) trim() { + cm.trimMu.Lock() + + // read the last trim time under the lock + lastTrim := cm.lastTrim + + // swap out the trim signal + trimSignal := cm.trimSignal + cm.trimSignal = make(chan struct{}) + + cm.trimMu.Unlock() + + // always signal a trim, even if we _don't_ trim, to unblock anyone + // waiting. + defer close(trimSignal) + + // skip this attempt to trim if the last one just took place. + if time.Since(lastTrim) < cm.silencePeriod { + return } + + // do the actual trim. + defer log.EventBegin(cm.ctx, "connCleanup").Done() + for _, c := range cm.getConnsToClose() { + log.Info("closing conn: ", c.RemotePeer()) + log.Event(cm.ctx, "closeConn", c.RemotePeer()) + c.Close() + } + + // finally, update the last trim time. + cm.trimMu.Lock() + cm.lastTrim = time.Now() + cm.trimMu.Unlock() } // getConnsToClose runs the heuristics described in TrimOpenConns and returns the // connections to close. -func (cm *BasicConnMgr) getConnsToClose(ctx context.Context) []network.Conn { +func (cm *BasicConnMgr) getConnsToClose() []network.Conn { if cm.lowWater == 0 || cm.highWater == 0 { // disabled return nil @@ -386,10 +428,14 @@ type CMInfo struct { // GetInfo returns the configuration and status data for this connection manager. func (cm *BasicConnMgr) GetInfo() CMInfo { + cm.trimMu.RLock() + lastTrim := cm.lastTrim + cm.trimMu.RUnlock() + return CMInfo{ HighWater: cm.highWater, LowWater: cm.lowWater, - LastTrim: cm.lastTrim, + LastTrim: lastTrim, GracePeriod: cm.gracePeriod, ConnCount: int(atomic.LoadInt32(&cm.connCount)), } diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index ad787864c7..705fa7721c 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -2,6 +2,7 @@ package connmgr import ( "context" + "sync" "testing" "time" @@ -47,6 +48,82 @@ func randConn(t testing.TB, discNotify func(network.Network, network.Conn)) netw return &tconn{peer: pid, disconnectNotify: discNotify} } +// Make sure multiple trim calls block. +func TestTrimBlocks(t *testing.T) { + cm := NewConnManager(200, 300, 0) + + cm.trimMu.RLock() + + doneCh := make(chan struct{}, 2) + go func() { + cm.TrimOpenConns(context.Background()) + doneCh <- struct{}{} + }() + go func() { + cm.TrimOpenConns(context.Background()) + doneCh <- struct{}{} + }() + time.Sleep(time.Millisecond) + select { + case <-doneCh: + cm.trimMu.RUnlock() + t.Fatal("expected trim to block") + default: + cm.trimMu.RUnlock() + } + <-doneCh + <-doneCh +} + +// Make sure we return from trim when the context is canceled. +func TestTrimCancels(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + cm := NewConnManager(200, 300, 0) + + cm.trimMu.RLock() + defer cm.trimMu.RUnlock() + + doneCh := make(chan struct{}) + go func() { + defer close(doneCh) + cm.TrimOpenConns(ctx) + }() + time.Sleep(time.Millisecond) + cancel() + <-doneCh +} + +// Make sure trim returns when closed. +func TestTrimClosed(t *testing.T) { + cm := NewConnManager(200, 300, 0) + cm.Close() + cm.TrimOpenConns(context.Background()) +} + +// Make sure joining an existing trim works. +func TestTrimJoin(t *testing.T) { + cm := NewConnManager(200, 300, 0) + cm.trimMu.RLock() + var wg sync.WaitGroup + wg.Add(3) + go func() { + defer wg.Done() + cm.TrimOpenConns(context.Background()) + }() + time.Sleep(time.Millisecond) + go func() { + defer wg.Done() + cm.TrimOpenConns(context.Background()) + }() + go func() { + defer wg.Done() + cm.TrimOpenConns(context.Background()) + }() + time.Sleep(time.Millisecond) + cm.trimMu.RUnlock() + wg.Wait() +} + func TestConnTrimming(t *testing.T) { cm := NewConnManager(200, 300, 0) not := cm.Notifee() @@ -86,19 +163,19 @@ func TestConnTrimming(t *testing.T) { func TestConnsToClose(t *testing.T) { cm := NewConnManager(0, 10, 0) - conns := cm.getConnsToClose(context.Background()) + conns := cm.getConnsToClose() if conns != nil { t.Fatal("expected no connections") } cm = NewConnManager(10, 0, 0) - conns = cm.getConnsToClose(context.Background()) + conns = cm.getConnsToClose() if conns != nil { t.Fatal("expected no connections") } cm = NewConnManager(1, 1, 0) - conns = cm.getConnsToClose(context.Background()) + conns = cm.getConnsToClose() if conns != nil { t.Fatal("expected no connections") } @@ -109,7 +186,7 @@ func TestConnsToClose(t *testing.T) { conn := randConn(t, nil) not.Connected(nil, conn) } - conns = cm.getConnsToClose(context.Background()) + conns = cm.getConnsToClose() if len(conns) != 0 { t.Fatal("expected no connections") } From 99ddcd1293d7ba2a7e46edc2a8424ac92741a662 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 11 Dec 2019 14:50:07 +0100 Subject: [PATCH 37/58] fix: simplify trim logic Pro: It's a lot simpler. Con: We lose the ordering guarantee. Before, TrimOpenConns would only return once a trim that started _after_ calling TrimOpenConns finished. Now, it returns as soon as it observes _any_ trim finishing (even if the trim started earlier). --- p2p/net/connmgr/connmgr.go | 66 +++++++++++++++++---------------- p2p/net/connmgr/connmgr_test.go | 14 +++---- 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index fb94fd8730..3de578934e 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -36,10 +36,11 @@ type BasicConnMgr struct { plk sync.RWMutex protected map[peer.ID]map[string]struct{} - trimMu sync.RWMutex - trimTrigger chan struct{} - trimSignal chan struct{} - lastTrim time.Time + trimTrigger chan chan<- struct{} + + lastTrimMu sync.RWMutex + lastTrim time.Time + silencePeriod time.Duration ctx context.Context @@ -97,8 +98,7 @@ func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { highWater: hi, lowWater: low, gracePeriod: grace, - trimTrigger: make(chan struct{}), - trimSignal: make(chan struct{}), + trimTrigger: make(chan chan<- struct{}), protected: make(map[peer.ID]map[string]struct{}, 16), silencePeriod: SilencePeriod, ctx: ctx, @@ -169,16 +169,10 @@ type peerInfo struct { // TODO: error return value so we can cleanly signal we are aborting because: // (a) there's another trim in progress, or (b) the silence period is in effect. func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { - cm.trimMu.RLock() - trimSignal := cm.trimSignal - cm.trimMu.RUnlock() - // Trigger a trim. + ch := make(chan struct{}) select { - case cm.trimTrigger <- struct{}{}: - case <-trimSignal: - // Someone else just trimmed. - return + case cm.trimTrigger <- ch: case <-cm.ctx.Done(): case <-ctx.Done(): // TODO: return an error? @@ -186,7 +180,7 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { // Wait for the trim. select { - case <-trimSignal: + case <-ch: case <-cm.ctx.Done(): case <-ctx.Done(): // TODO: return an error? @@ -198,35 +192,43 @@ func (cm *BasicConnMgr) background() { defer ticker.Stop() for { + var waiting chan<- struct{} select { case <-ticker.C: if atomic.LoadInt32(&cm.connCount) < int32(cm.highWater) { // Below high water, skip. continue } - case <-cm.trimTrigger: + case waiting = <-cm.trimTrigger: case <-cm.ctx.Done(): return } cm.trim() + + // Notify anyone waiting on this trim. + if waiting != nil { + close(waiting) + } + + for { + select { + case waiting = <-cm.trimTrigger: + if waiting != nil { + close(waiting) + } + continue + default: + } + break + } } } func (cm *BasicConnMgr) trim() { - cm.trimMu.Lock() - + cm.lastTrimMu.RLock() // read the last trim time under the lock lastTrim := cm.lastTrim - - // swap out the trim signal - trimSignal := cm.trimSignal - cm.trimSignal = make(chan struct{}) - - cm.trimMu.Unlock() - - // always signal a trim, even if we _don't_ trim, to unblock anyone - // waiting. - defer close(trimSignal) + cm.lastTrimMu.RUnlock() // skip this attempt to trim if the last one just took place. if time.Since(lastTrim) < cm.silencePeriod { @@ -242,9 +244,9 @@ func (cm *BasicConnMgr) trim() { } // finally, update the last trim time. - cm.trimMu.Lock() + cm.lastTrimMu.Lock() cm.lastTrim = time.Now() - cm.trimMu.Unlock() + cm.lastTrimMu.Unlock() } // getConnsToClose runs the heuristics described in TrimOpenConns and returns the @@ -428,9 +430,9 @@ type CMInfo struct { // GetInfo returns the configuration and status data for this connection manager. func (cm *BasicConnMgr) GetInfo() CMInfo { - cm.trimMu.RLock() + cm.lastTrimMu.RLock() lastTrim := cm.lastTrim - cm.trimMu.RUnlock() + cm.lastTrimMu.RUnlock() return CMInfo{ HighWater: cm.highWater, diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 705fa7721c..8e1b4b15d7 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -52,7 +52,7 @@ func randConn(t testing.TB, discNotify func(network.Network, network.Conn)) netw func TestTrimBlocks(t *testing.T) { cm := NewConnManager(200, 300, 0) - cm.trimMu.RLock() + cm.lastTrimMu.RLock() doneCh := make(chan struct{}, 2) go func() { @@ -66,10 +66,10 @@ func TestTrimBlocks(t *testing.T) { time.Sleep(time.Millisecond) select { case <-doneCh: - cm.trimMu.RUnlock() + cm.lastTrimMu.RUnlock() t.Fatal("expected trim to block") default: - cm.trimMu.RUnlock() + cm.lastTrimMu.RUnlock() } <-doneCh <-doneCh @@ -80,8 +80,8 @@ func TestTrimCancels(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cm := NewConnManager(200, 300, 0) - cm.trimMu.RLock() - defer cm.trimMu.RUnlock() + cm.lastTrimMu.RLock() + defer cm.lastTrimMu.RUnlock() doneCh := make(chan struct{}) go func() { @@ -103,7 +103,7 @@ func TestTrimClosed(t *testing.T) { // Make sure joining an existing trim works. func TestTrimJoin(t *testing.T) { cm := NewConnManager(200, 300, 0) - cm.trimMu.RLock() + cm.lastTrimMu.RLock() var wg sync.WaitGroup wg.Add(3) go func() { @@ -120,7 +120,7 @@ func TestTrimJoin(t *testing.T) { cm.TrimOpenConns(context.Background()) }() time.Sleep(time.Millisecond) - cm.trimMu.RUnlock() + cm.lastTrimMu.RUnlock() wg.Wait() } From 936762820afc0fd2252e758a515aa7c0ca187981 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 11 Dec 2019 15:26:38 +0100 Subject: [PATCH 38/58] docs: document TrimOpenConns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Raúl Kripalani --- p2p/net/connmgr/connmgr.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 3de578934e..b385533a88 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -166,9 +166,13 @@ type peerInfo struct { // pruning those peers with the lowest scores first, as long as they are not within their // grace period. // -// TODO: error return value so we can cleanly signal we are aborting because: -// (a) there's another trim in progress, or (b) the silence period is in effect. +// This function blocks until a trim is completed. If a trim is underway, a new +// one won't be started, and instead it'll wait until that one is completed before +// returning. func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { + // TODO: error return value so we can cleanly signal we are aborting because: + // (a) there's another trim in progress, or (b) the silence period is in effect. + // Trigger a trim. ch := make(chan struct{}) select { From bce720e9350de758200670b5f157cc6fa60b05e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Thu, 14 May 2020 17:34:12 +0100 Subject: [PATCH 39/58] implement decaying tags. (#61) --- p2p/net/connmgr/connmgr.go | 85 ++++++--- p2p/net/connmgr/decay.go | 278 +++++++++++++++++++++++++++++ p2p/net/connmgr/decay_test.go | 318 ++++++++++++++++++++++++++++++++++ p2p/net/connmgr/options.go | 24 +++ 4 files changed, 679 insertions(+), 26 deletions(-) create mode 100644 p2p/net/connmgr/decay.go create mode 100644 p2p/net/connmgr/decay_test.go create mode 100644 p2p/net/connmgr/options.go diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index b385533a88..fda493c4e9 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -27,27 +27,30 @@ var log = logging.Logger("connmgr") // // See configuration parameters in NewConnManager. type BasicConnMgr struct { - highWater int - lowWater int - connCount int32 - gracePeriod time.Duration - segments segments + *decayer + + cfg *BasicConnManagerConfig + segments segments plk sync.RWMutex protected map[peer.ID]map[string]struct{} - trimTrigger chan chan<- struct{} + // channel-based semaphore that enforces only a single trim is in progress + trimRunningCh chan struct{} + trimTrigger chan chan<- struct{} + connCount int32 lastTrimMu sync.RWMutex lastTrim time.Time - silencePeriod time.Duration - ctx context.Context cancel func() } -var _ connmgr.ConnManager = (*BasicConnMgr)(nil) +var ( + _ connmgr.ConnManager = (*BasicConnMgr)(nil) + _ connmgr.Decayer = (*BasicConnMgr)(nil) +) type segment struct { sync.Mutex @@ -80,6 +83,7 @@ func (s *segment) tagInfoFor(p peer.ID) *peerInfo { firstSeen: time.Now(), // this timestamp will be updated when the first Connected notification arrives. temp: true, tags: make(map[string]int), + decaying: make(map[*decayingTag]*connmgr.DecayingValue), conns: make(map[network.Conn]time.Time), } s.peers[p] = pi @@ -92,15 +96,32 @@ func (s *segment) tagInfoFor(p peer.ID) *peerInfo { // their connections terminated) until 'low watermark' peers remain. // * grace is the amount of time a newly opened connection is given before it becomes // subject to pruning. -func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { +func NewConnManager(low, hi int, grace time.Duration, opts ...Option) *BasicConnMgr { ctx, cancel := context.WithCancel(context.Background()) - cm := &BasicConnMgr{ + + cfg := &BasicConnManagerConfig{ highWater: hi, lowWater: low, gracePeriod: grace, + silencePeriod: SilencePeriod, + } + + for _, o := range opts { + // TODO we're ignoring errors from options because we have no way to + // return them, or otherwise act on them. + _ = o(cfg) + } + + if cfg.decayer == nil { + // Set the default decayer config. + cfg.decayer = (&DecayerCfg{}).WithDefaults() + } + + cm := &BasicConnMgr{ + cfg: cfg, + trimRunningCh: make(chan struct{}, 1), trimTrigger: make(chan chan<- struct{}), protected: make(map[peer.ID]map[string]struct{}, 16), - silencePeriod: SilencePeriod, ctx: ctx, cancel: cancel, segments: func() (ret segments) { @@ -113,11 +134,17 @@ func NewConnManager(low, hi int, grace time.Duration) *BasicConnMgr { }(), } + decay, _ := NewDecayer(cfg.decayer, cm) + cm.decayer = decay + go cm.background() return cm } func (cm *BasicConnMgr) Close() error { + if err := cm.decayer.Close(); err != nil { + return err + } cm.cancel() return nil } @@ -151,10 +178,12 @@ func (cm *BasicConnMgr) Unprotect(id peer.ID, tag string) (protected bool) { // peerInfo stores metadata for a given peer. type peerInfo struct { - id peer.ID - tags map[string]int // value for each tag - value int // cached sum of all tag values - temp bool // this is a temporary entry holding early tags, and awaiting connections + id peer.ID + tags map[string]int // value for each tag + decaying map[*decayingTag]*connmgr.DecayingValue // decaying tags + + value int // cached sum of all tag values + temp bool // this is a temporary entry holding early tags, and awaiting connections conns map[network.Conn]time.Time // start time of each connection @@ -199,7 +228,7 @@ func (cm *BasicConnMgr) background() { var waiting chan<- struct{} select { case <-ticker.C: - if atomic.LoadInt32(&cm.connCount) < int32(cm.highWater) { + if atomic.LoadInt32(&cm.connCount) < int32(cm.cfg.highWater) { // Below high water, skip. continue } @@ -235,7 +264,7 @@ func (cm *BasicConnMgr) trim() { cm.lastTrimMu.RUnlock() // skip this attempt to trim if the last one just took place. - if time.Since(lastTrim) < cm.silencePeriod { + if time.Since(lastTrim) < cm.cfg.silencePeriod { return } @@ -256,13 +285,13 @@ func (cm *BasicConnMgr) trim() { // getConnsToClose runs the heuristics described in TrimOpenConns and returns the // connections to close. func (cm *BasicConnMgr) getConnsToClose() []network.Conn { - if cm.lowWater == 0 || cm.highWater == 0 { + if cm.cfg.lowWater == 0 || cm.cfg.highWater == 0 { // disabled return nil } nconns := int(atomic.LoadInt32(&cm.connCount)) - if nconns <= cm.lowWater { + if nconns <= cm.cfg.lowWater { log.Info("open connection count below limit") return nil } @@ -270,7 +299,7 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { npeers := cm.segments.countPeers() candidates := make([]*peerInfo, 0, npeers) ncandidates := 0 - gracePeriodStart := time.Now().Add(-cm.gracePeriod) + gracePeriodStart := time.Now().Add(-cm.cfg.gracePeriod) cm.plk.RLock() for _, s := range cm.segments { @@ -291,7 +320,7 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { } cm.plk.RUnlock() - if ncandidates < cm.lowWater { + if ncandidates < cm.cfg.lowWater { log.Info("open connection count above limit but too many are in the grace period") // We have too many connections but fewer than lowWater // connections out of the grace period. @@ -311,7 +340,7 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { return left.value < right.value }) - target := ncandidates - cm.lowWater + target := ncandidates - cm.cfg.lowWater // slightly overallocate because we may have more than one conns per peer selected := make([]network.Conn, 0, target+10) @@ -363,6 +392,9 @@ func (cm *BasicConnMgr) GetTagInfo(p peer.ID) *connmgr.TagInfo { for t, v := range pi.tags { out.Tags[t] = v } + for t, v := range pi.decaying { + out.Tags[t.name] = v.Value + } for c, t := range pi.conns { out.Conns[c.RemoteMultiaddr().String()] = t } @@ -439,10 +471,10 @@ func (cm *BasicConnMgr) GetInfo() CMInfo { cm.lastTrimMu.RUnlock() return CMInfo{ - HighWater: cm.highWater, - LowWater: cm.lowWater, + HighWater: cm.cfg.highWater, + LowWater: cm.cfg.lowWater, LastTrim: lastTrim, - GracePeriod: cm.gracePeriod, + GracePeriod: cm.cfg.gracePeriod, ConnCount: int(atomic.LoadInt32(&cm.connCount)), } } @@ -478,6 +510,7 @@ func (nn *cmNotifee) Connected(n network.Network, c network.Conn) { id: id, firstSeen: time.Now(), tags: make(map[string]int), + decaying: make(map[*decayingTag]*connmgr.DecayingValue), conns: make(map[network.Conn]time.Time), } s.peers[id] = pinfo diff --git a/p2p/net/connmgr/decay.go b/p2p/net/connmgr/decay.go new file mode 100644 index 0000000000..fd523bf36f --- /dev/null +++ b/p2p/net/connmgr/decay.go @@ -0,0 +1,278 @@ +package connmgr + +import ( + "fmt" + "sync" + "sync/atomic" + "time" + + "github.com/libp2p/go-libp2p-core/connmgr" + "github.com/libp2p/go-libp2p-core/peer" + + "github.com/benbjohnson/clock" +) + +// DefaultResolution is the default resolution of the decay tracker. +var DefaultResolution = 1 * time.Minute + +// bumpCmd represents a bump command. +type bumpCmd struct { + peer peer.ID + tag *decayingTag + delta int +} + +// decayer tracks and manages all decaying tags and their values. +type decayer struct { + cfg *DecayerCfg + mgr *BasicConnMgr + clock clock.Clock // for testing. + + tagsMu sync.Mutex + knownTags map[string]*decayingTag + + // lastTick stores the last time the decayer ticked. Guarded by atomic. + lastTick atomic.Value + + // bumpCh queues bump commands to be processed by the loop. + bumpCh chan bumpCmd + + // closure thingies. + closeCh chan struct{} + doneCh chan struct{} + err error +} + +var _ connmgr.Decayer = (*decayer)(nil) + +// DecayerCfg is the configuration object for the Decayer. +type DecayerCfg struct { + Resolution time.Duration + Clock clock.Clock +} + +// WithDefaults writes the default values on this DecayerConfig instance, +// and returns itself for chainability. +// +// cfg := (&DecayerCfg{}).WithDefaults() +// cfg.Resolution = 30 * time.Second +// t := NewDecayer(cfg, cm) +func (cfg *DecayerCfg) WithDefaults() *DecayerCfg { + cfg.Resolution = DefaultResolution + return cfg +} + +// NewDecayer creates a new decaying tag registry. +func NewDecayer(cfg *DecayerCfg, mgr *BasicConnMgr) (*decayer, error) { + // use real time if the Clock in the config is nil. + if cfg.Clock == nil { + cfg.Clock = clock.New() + } + + d := &decayer{ + cfg: cfg, + mgr: mgr, + clock: cfg.Clock, + knownTags: make(map[string]*decayingTag), + bumpCh: make(chan bumpCmd, 128), + closeCh: make(chan struct{}), + doneCh: make(chan struct{}), + } + + d.lastTick.Store(d.clock.Now()) + + // kick things off. + go d.process() + + return d, nil +} + +func (d *decayer) RegisterDecayingTag(name string, interval time.Duration, decayFn connmgr.DecayFn, bumpFn connmgr.BumpFn) (connmgr.DecayingTag, error) { + d.tagsMu.Lock() + defer d.tagsMu.Unlock() + + tag, ok := d.knownTags[name] + if ok { + return nil, fmt.Errorf("decaying tag with name %s already exists", name) + } + + if interval < d.cfg.Resolution { + log.Warnf("decay interval for %s (%s) was lower than tracker's resolution (%s); overridden to resolution", + name, interval, d.cfg.Resolution) + interval = d.cfg.Resolution + } + + if interval%d.cfg.Resolution != 0 { + log.Warnf("decay interval for tag %s (%s) is not a multiple of tracker's resolution (%s); "+ + "some precision may be lost", name, interval, d.cfg.Resolution) + } + + lastTick := d.lastTick.Load().(time.Time) + tag = &decayingTag{ + trkr: d, + name: name, + interval: interval, + nextTick: lastTick.Add(interval), + decayFn: decayFn, + bumpFn: bumpFn, + } + + d.knownTags[name] = tag + return tag, nil +} + +// Close closes the Decayer. It is idempotent. +func (d *decayer) Close() error { + select { + case <-d.doneCh: + return d.err + default: + } + + close(d.closeCh) + <-d.doneCh + return d.err +} + +// process is the heart of the tracker. It performs the following duties: +// +// 1. Manages decay. +// 2. Applies score bumps. +// 3. Yields when closed. +func (d *decayer) process() { + defer close(d.doneCh) + + ticker := d.clock.Ticker(d.cfg.Resolution) + defer ticker.Stop() + + var ( + bmp bumpCmd + now time.Time + visit = make(map[*decayingTag]struct{}) + ) + + for { + select { + case now = <-ticker.C: + d.lastTick.Store(now) + + d.tagsMu.Lock() + for _, tag := range d.knownTags { + if tag.nextTick.After(now) { + // skip the tag. + continue + } + // Mark the tag to be updated in this round. + visit[tag] = struct{}{} + } + d.tagsMu.Unlock() + + // Visit each peer, and decay tags that need to be decayed. + for _, s := range d.mgr.segments { + s.Lock() + + // Entered a segment that contains peers. Process each peer. + for _, p := range s.peers { + for tag, v := range p.decaying { + if _, ok := visit[tag]; !ok { + // skip this tag. + continue + } + + // ~ this value needs to be visited. ~ + var delta int + if after, rm := tag.decayFn(*v); rm { + // delete the value and move on to the next tag. + delta -= v.Value + delete(p.decaying, tag) + } else { + // accumulate the delta, and apply the changes. + delta += after - v.Value + v.Value, v.LastVisit = after, now + } + p.value += delta + } + } + + s.Unlock() + } + + // Reset each tag's next visit round, and clear the visited set. + for tag := range visit { + tag.nextTick = tag.nextTick.Add(tag.interval) + delete(visit, tag) + } + + case bmp = <-d.bumpCh: + var ( + now = d.clock.Now() + peer, tag = bmp.peer, bmp.tag + ) + + s := d.mgr.segments.get(peer) + s.Lock() + + p := s.tagInfoFor(peer) + v, ok := p.decaying[tag] + if !ok { + v = &connmgr.DecayingValue{ + Tag: tag, + Peer: peer, + LastVisit: now, + Added: now, + Value: 0, + } + p.decaying[tag] = v + } + + prev := v.Value + v.Value, v.LastVisit = v.Tag.(*decayingTag).bumpFn(*v, bmp.delta), now + p.value += v.Value - prev + + s.Unlock() + + case <-d.closeCh: + return + + } + } +} + +// decayingTag represents a decaying tag, with an associated decay interval, a +// decay function, and a bump function. +type decayingTag struct { + trkr *decayer + name string + interval time.Duration + nextTick time.Time + decayFn connmgr.DecayFn + bumpFn connmgr.BumpFn +} + +var _ connmgr.DecayingTag = (*decayingTag)(nil) + +func (t *decayingTag) Name() string { + return t.name +} + +func (t *decayingTag) Interval() time.Duration { + return t.interval +} + +// Bump bumps a tag for this peer. +func (t *decayingTag) Bump(p peer.ID, delta int) error { + bmp := bumpCmd{peer: p, tag: t, delta: delta} + + select { + case t.trkr.bumpCh <- bmp: + return nil + + default: + return fmt.Errorf( + "unable to bump decaying tag for peer %s, tag %s, delta %d; queue full (len=%d)", + p.Pretty(), + t.name, + delta, + len(t.trkr.bumpCh)) + } +} diff --git a/p2p/net/connmgr/decay_test.go b/p2p/net/connmgr/decay_test.go new file mode 100644 index 0000000000..fad5e0d8b6 --- /dev/null +++ b/p2p/net/connmgr/decay_test.go @@ -0,0 +1,318 @@ +package connmgr + +import ( + "testing" + "time" + + "github.com/libp2p/go-libp2p-core/connmgr" + "github.com/libp2p/go-libp2p-core/peer" + tu "github.com/libp2p/go-libp2p-core/test" + "github.com/stretchr/testify/require" + + "github.com/benbjohnson/clock" +) + +const TestResolution = 50 * time.Millisecond + +func TestDecayExpire(t *testing.T) { + var ( + id = tu.RandPeerIDFatal(t) + mgr, decay, mockClock = testDecayTracker(t) + ) + + tag, err := decay.RegisterDecayingTag("pop", 250*time.Millisecond, connmgr.DecayExpireWhenInactive(1*time.Second), connmgr.BumpSumUnbounded()) + if err != nil { + t.Fatal(err) + } + + err = tag.Bump(id, 10) + if err != nil { + t.Fatal(err) + } + + // give time for the bump command to process. + <-time.After(100 * time.Millisecond) + + if v := mgr.GetTagInfo(id).Value; v != 10 { + t.Fatalf("wrong value; expected = %d; got = %d", 10, v) + } + + mockClock.Add(250 * time.Millisecond) + mockClock.Add(250 * time.Millisecond) + mockClock.Add(250 * time.Millisecond) + mockClock.Add(250 * time.Millisecond) + + if v := mgr.GetTagInfo(id).Value; v != 0 { + t.Fatalf("wrong value; expected = %d; got = %d", 0, v) + } +} + +func TestMultipleBumps(t *testing.T) { + var ( + id = tu.RandPeerIDFatal(t) + mgr, decay, _ = testDecayTracker(t) + ) + + tag, err := decay.RegisterDecayingTag("pop", 250*time.Millisecond, connmgr.DecayExpireWhenInactive(1*time.Second), connmgr.BumpSumBounded(10, 20)) + if err != nil { + t.Fatal(err) + } + + err = tag.Bump(id, 5) + if err != nil { + t.Fatal(err) + } + + <-time.After(100 * time.Millisecond) + + if v := mgr.GetTagInfo(id).Value; v != 10 { + t.Fatalf("wrong value; expected = %d; got = %d", 10, v) + } + + err = tag.Bump(id, 100) + if err != nil { + t.Fatal(err) + } + + <-time.After(100 * time.Millisecond) + + if v := mgr.GetTagInfo(id).Value; v != 20 { + t.Fatalf("wrong value; expected = %d; got = %d", 20, v) + } +} + +func TestMultipleTagsNoDecay(t *testing.T) { + var ( + id = tu.RandPeerIDFatal(t) + mgr, decay, _ = testDecayTracker(t) + ) + + tag1, err := decay.RegisterDecayingTag("beep", 250*time.Millisecond, connmgr.DecayNone(), connmgr.BumpSumBounded(0, 100)) + if err != nil { + t.Fatal(err) + } + + tag2, err := decay.RegisterDecayingTag("bop", 250*time.Millisecond, connmgr.DecayNone(), connmgr.BumpSumBounded(0, 100)) + if err != nil { + t.Fatal(err) + } + + tag3, err := decay.RegisterDecayingTag("foo", 250*time.Millisecond, connmgr.DecayNone(), connmgr.BumpSumBounded(0, 100)) + if err != nil { + t.Fatal(err) + } + + _ = tag1.Bump(id, 100) + _ = tag2.Bump(id, 100) + _ = tag3.Bump(id, 100) + _ = tag1.Bump(id, 100) + _ = tag2.Bump(id, 100) + _ = tag3.Bump(id, 100) + + <-time.After(500 * time.Millisecond) + + // all tags are upper-bounded, so the score must be 300 + ti := mgr.GetTagInfo(id) + if v := ti.Value; v != 300 { + t.Fatalf("wrong value; expected = %d; got = %d", 300, v) + } + + for _, s := range []string{"beep", "bop", "foo"} { + if v, ok := ti.Tags[s]; !ok || v != 100 { + t.Fatalf("expected tag %s to be 100; was = %d", s, v) + } + } +} + +func TestCustomFunctions(t *testing.T) { + var ( + id = tu.RandPeerIDFatal(t) + mgr, decay, mockClock = testDecayTracker(t) + ) + + tag1, err := decay.RegisterDecayingTag("beep", 250*time.Millisecond, connmgr.DecayFixed(10), connmgr.BumpSumUnbounded()) + if err != nil { + t.Fatal(err) + } + + tag2, err := decay.RegisterDecayingTag("bop", 100*time.Millisecond, connmgr.DecayFixed(5), connmgr.BumpSumUnbounded()) + if err != nil { + t.Fatal(err) + } + + tag3, err := decay.RegisterDecayingTag("foo", 50*time.Millisecond, connmgr.DecayFixed(1), connmgr.BumpSumUnbounded()) + if err != nil { + t.Fatal(err) + } + + _ = tag1.Bump(id, 1000) + _ = tag2.Bump(id, 1000) + _ = tag3.Bump(id, 1000) + + <-time.After(500 * time.Millisecond) + + // no decay has occurred yet, so score must be 3000. + if v := mgr.GetTagInfo(id).Value; v != 3000 { + t.Fatalf("wrong value; expected = %d; got = %d", 3000, v) + } + + // only tag3 should tick. + mockClock.Add(50 * time.Millisecond) + if v := mgr.GetTagInfo(id).Value; v != 2999 { + t.Fatalf("wrong value; expected = %d; got = %d", 2999, v) + } + + // tag3 will tick thrice, tag2 will tick twice. + mockClock.Add(150 * time.Millisecond) + if v := mgr.GetTagInfo(id).Value; v != 2986 { + t.Fatalf("wrong value; expected = %d; got = %d", 2986, v) + } + + // tag3 will tick once, tag1 will tick once. + mockClock.Add(50 * time.Millisecond) + if v := mgr.GetTagInfo(id).Value; v != 2975 { + t.Fatalf("wrong value; expected = %d; got = %d", 2975, v) + } +} + +func TestMultiplePeers(t *testing.T) { + var ( + ids = []peer.ID{tu.RandPeerIDFatal(t), tu.RandPeerIDFatal(t), tu.RandPeerIDFatal(t)} + mgr, decay, mockClock = testDecayTracker(t) + ) + + tag1, err := decay.RegisterDecayingTag("beep", 250*time.Millisecond, connmgr.DecayFixed(10), connmgr.BumpSumUnbounded()) + if err != nil { + t.Fatal(err) + } + + tag2, err := decay.RegisterDecayingTag("bop", 100*time.Millisecond, connmgr.DecayFixed(5), connmgr.BumpSumUnbounded()) + if err != nil { + t.Fatal(err) + } + + tag3, err := decay.RegisterDecayingTag("foo", 50*time.Millisecond, connmgr.DecayFixed(1), connmgr.BumpSumUnbounded()) + if err != nil { + t.Fatal(err) + } + + _ = tag1.Bump(ids[0], 1000) + _ = tag2.Bump(ids[0], 1000) + _ = tag3.Bump(ids[0], 1000) + + _ = tag1.Bump(ids[1], 500) + _ = tag2.Bump(ids[1], 500) + _ = tag3.Bump(ids[1], 500) + + _ = tag1.Bump(ids[2], 100) + _ = tag2.Bump(ids[2], 100) + _ = tag3.Bump(ids[2], 100) + + // allow the background goroutine to process bumps. + <-time.After(500 * time.Millisecond) + + mockClock.Add(3 * time.Second) + + // allow the background goroutine to process ticks. + <-time.After(500 * time.Millisecond) + + if v := mgr.GetTagInfo(ids[0]).Value; v != 2670 { + t.Fatalf("wrong value; expected = %d; got = %d", 2670, v) + } + + if v := mgr.GetTagInfo(ids[1]).Value; v != 1170 { + t.Fatalf("wrong value; expected = %d; got = %d", 1170, v) + } + + if v := mgr.GetTagInfo(ids[2]).Value; v != 40 { + t.Fatalf("wrong value; expected = %d; got = %d", 40, v) + } +} + +func TestLinearDecayOverwrite(t *testing.T) { + var ( + id = tu.RandPeerIDFatal(t) + mgr, decay, mockClock = testDecayTracker(t) + ) + + tag1, err := decay.RegisterDecayingTag("beep", 250*time.Millisecond, connmgr.DecayLinear(0.5), connmgr.BumpOverwrite()) + if err != nil { + t.Fatal(err) + } + + _ = tag1.Bump(id, 1000) + // allow the background goroutine to process bumps. + <-time.After(500 * time.Millisecond) + + mockClock.Add(250 * time.Millisecond) + + if v := mgr.GetTagInfo(id).Value; v != 500 { + t.Fatalf("value should be half; got = %d", v) + } + + mockClock.Add(250 * time.Millisecond) + + if v := mgr.GetTagInfo(id).Value; v != 250 { + t.Fatalf("value should be half; got = %d", v) + } + + _ = tag1.Bump(id, 1000) + // allow the background goroutine to process bumps. + <-time.After(500 * time.Millisecond) + + if v := mgr.GetTagInfo(id).Value; v != 1000 { + t.Fatalf("value should 1000; got = %d", v) + } +} + +func TestResolutionMisaligned(t *testing.T) { + var ( + id = tu.RandPeerIDFatal(t) + mgr, decay, mockClock = testDecayTracker(t) + require = require.New(t) + ) + + tag1, err := decay.RegisterDecayingTag("beep", time.Duration(float64(TestResolution)*1.4), connmgr.DecayFixed(1), connmgr.BumpOverwrite()) + require.NoError(err) + + tag2, err := decay.RegisterDecayingTag("bop", time.Duration(float64(TestResolution)*2.4), connmgr.DecayFixed(1), connmgr.BumpOverwrite()) + require.NoError(err) + + _ = tag1.Bump(id, 1000) + _ = tag2.Bump(id, 1000) + // allow the background goroutine to process bumps. + <-time.After(500 * time.Millisecond) + + // nothing has happened. + mockClock.Add(TestResolution) + require.Equal(1000, mgr.GetTagInfo(id).Tags["beep"]) + require.Equal(1000, mgr.GetTagInfo(id).Tags["bop"]) + + // next tick; tag1 would've ticked. + mockClock.Add(TestResolution) + require.Equal(999, mgr.GetTagInfo(id).Tags["beep"]) + require.Equal(1000, mgr.GetTagInfo(id).Tags["bop"]) + + // next tick; tag1 would've ticked twice, tag2 once. + mockClock.Add(TestResolution) + require.Equal(998, mgr.GetTagInfo(id).Tags["beep"]) + require.Equal(999, mgr.GetTagInfo(id).Tags["bop"]) + + require.Equal(1997, mgr.GetTagInfo(id).Value) +} + +func testDecayTracker(tb testing.TB) (*BasicConnMgr, connmgr.Decayer, *clock.Mock) { + mockClock := clock.NewMock() + cfg := &DecayerCfg{ + Resolution: TestResolution, + Clock: mockClock, + } + + mgr := NewConnManager(10, 10, 1*time.Second, DecayerConfig(cfg)) + decay, ok := connmgr.SupportsDecay(mgr) + if !ok { + tb.Fatalf("connmgr does not support decay") + } + + return mgr, decay, mockClock +} diff --git a/p2p/net/connmgr/options.go b/p2p/net/connmgr/options.go new file mode 100644 index 0000000000..cfe919e13b --- /dev/null +++ b/p2p/net/connmgr/options.go @@ -0,0 +1,24 @@ +package connmgr + +import "time" + +// BasicConnManagerConfig is the configuration struct for the basic connection +// manager. +type BasicConnManagerConfig struct { + highWater int + lowWater int + gracePeriod time.Duration + silencePeriod time.Duration + decayer *DecayerCfg +} + +// Option represents an option for the basic connection manager. +type Option func(*BasicConnManagerConfig) error + +// DecayerConfig applies a configuration for the decayer. +func DecayerConfig(opts *DecayerCfg) Option { + return func(cfg *BasicConnManagerConfig) error { + cfg.decayer = opts + return nil + } +} From 9fba5602d0b31bcbfddf37b1d240b0d835a37f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Kripalani?= Date: Fri, 15 May 2020 09:27:44 +0100 Subject: [PATCH 40/58] decaying tags: support removal and closure. (#72) --- p2p/net/connmgr/decay.go | 112 ++++++++++++++++++++++++++++------ p2p/net/connmgr/decay_test.go | 100 +++++++++++++++++++++++++++++- 2 files changed, 194 insertions(+), 18 deletions(-) diff --git a/p2p/net/connmgr/decay.go b/p2p/net/connmgr/decay.go index fd523bf36f..62029a539b 100644 --- a/p2p/net/connmgr/decay.go +++ b/p2p/net/connmgr/decay.go @@ -22,6 +22,12 @@ type bumpCmd struct { delta int } +// removeCmd represents a tag removal command. +type removeCmd struct { + peer peer.ID + tag *decayingTag +} + // decayer tracks and manages all decaying tags and their values. type decayer struct { cfg *DecayerCfg @@ -34,8 +40,10 @@ type decayer struct { // lastTick stores the last time the decayer ticked. Guarded by atomic. lastTick atomic.Value - // bumpCh queues bump commands to be processed by the loop. - bumpCh chan bumpCmd + // bumpTagCh queues bump commands to be processed by the loop. + bumpTagCh chan bumpCmd + removeTagCh chan removeCmd + closeTagCh chan *decayingTag // closure thingies. closeCh chan struct{} @@ -70,13 +78,15 @@ func NewDecayer(cfg *DecayerCfg, mgr *BasicConnMgr) (*decayer, error) { } d := &decayer{ - cfg: cfg, - mgr: mgr, - clock: cfg.Clock, - knownTags: make(map[string]*decayingTag), - bumpCh: make(chan bumpCmd, 128), - closeCh: make(chan struct{}), - doneCh: make(chan struct{}), + cfg: cfg, + mgr: mgr, + clock: cfg.Clock, + knownTags: make(map[string]*decayingTag), + bumpTagCh: make(chan bumpCmd, 128), + removeTagCh: make(chan removeCmd, 128), + closeTagCh: make(chan *decayingTag, 128), + closeCh: make(chan struct{}), + doneCh: make(chan struct{}), } d.lastTick.Store(d.clock.Now()) @@ -203,7 +213,7 @@ func (d *decayer) process() { delete(visit, tag) } - case bmp = <-d.bumpCh: + case bmp = <-d.bumpTagCh: var ( now = d.clock.Now() peer, tag = bmp.peer, bmp.tag @@ -231,9 +241,42 @@ func (d *decayer) process() { s.Unlock() + case rm := <-d.removeTagCh: + s := d.mgr.segments.get(rm.peer) + s.Lock() + + p := s.tagInfoFor(rm.peer) + v, ok := p.decaying[rm.tag] + if !ok { + s.Unlock() + continue + } + p.value -= v.Value + delete(p.decaying, rm.tag) + s.Unlock() + + case t := <-d.closeTagCh: + // Stop tracking the tag. + d.tagsMu.Lock() + delete(d.knownTags, t.name) + d.tagsMu.Unlock() + + // Remove the tag from all peers that had it in the connmgr. + for _, s := range d.mgr.segments { + // visit all segments, and attempt to remove the tag from all the peers it stores. + s.Lock() + for _, p := range s.peers { + if dt, ok := p.decaying[t]; ok { + // decrease the value of the tagInfo, and delete the tag. + p.value -= dt.Value + delete(p.decaying, t) + } + } + s.Unlock() + } + case <-d.closeCh: return - } } } @@ -247,6 +290,10 @@ type decayingTag struct { nextTick time.Time decayFn connmgr.DecayFn bumpFn connmgr.BumpFn + + // closed marks this tag as closed, so that if it's bumped after being + // closed, we can return an error. 0 = false; 1 = true; guarded by atomic. + closed int32 } var _ connmgr.DecayingTag = (*decayingTag)(nil) @@ -261,18 +308,49 @@ func (t *decayingTag) Interval() time.Duration { // Bump bumps a tag for this peer. func (t *decayingTag) Bump(p peer.ID, delta int) error { + if atomic.LoadInt32(&t.closed) == 1 { + return fmt.Errorf("decaying tag %s had been closed; no further bumps are accepted", t.name) + } + bmp := bumpCmd{peer: p, tag: t, delta: delta} select { - case t.trkr.bumpCh <- bmp: + case t.trkr.bumpTagCh <- bmp: return nil - default: return fmt.Errorf( "unable to bump decaying tag for peer %s, tag %s, delta %d; queue full (len=%d)", - p.Pretty(), - t.name, - delta, - len(t.trkr.bumpCh)) + p.Pretty(), t.name, delta, len(t.trkr.bumpTagCh)) + } +} + +func (t *decayingTag) Remove(p peer.ID) error { + if atomic.LoadInt32(&t.closed) == 1 { + return fmt.Errorf("decaying tag %s had been closed; no further removals are accepted", t.name) + } + + rm := removeCmd{peer: p, tag: t} + + select { + case t.trkr.removeTagCh <- rm: + return nil + default: + return fmt.Errorf( + "unable to remove decaying tag for peer %s, tag %s; queue full (len=%d)", + p.Pretty(), t.name, len(t.trkr.removeTagCh)) + } +} + +func (t *decayingTag) Close() error { + if !atomic.CompareAndSwapInt32(&t.closed, 0, 1) { + log.Warnf("duplicate decaying tag closure: %s; skipping", t.name) + return nil + } + + select { + case t.trkr.closeTagCh <- t: + return nil + default: + return fmt.Errorf("unable to close decaying tag %s; queue full (len=%d)", t.name, len(t.trkr.closeTagCh)) } } diff --git a/p2p/net/connmgr/decay_test.go b/p2p/net/connmgr/decay_test.go index fad5e0d8b6..c75efce1eb 100644 --- a/p2p/net/connmgr/decay_test.go +++ b/p2p/net/connmgr/decay_test.go @@ -283,7 +283,7 @@ func TestResolutionMisaligned(t *testing.T) { // allow the background goroutine to process bumps. <-time.After(500 * time.Millisecond) - // nothing has happened. + // first tick. mockClock.Add(TestResolution) require.Equal(1000, mgr.GetTagInfo(id).Tags["beep"]) require.Equal(1000, mgr.GetTagInfo(id).Tags["bop"]) @@ -301,6 +301,104 @@ func TestResolutionMisaligned(t *testing.T) { require.Equal(1997, mgr.GetTagInfo(id).Value) } +func TestTagRemoval(t *testing.T) { + var ( + id1, id2 = tu.RandPeerIDFatal(t), tu.RandPeerIDFatal(t) + mgr, decay, mockClock = testDecayTracker(t) + require = require.New(t) + ) + + tag1, err := decay.RegisterDecayingTag("beep", TestResolution, connmgr.DecayFixed(1), connmgr.BumpOverwrite()) + require.NoError(err) + + tag2, err := decay.RegisterDecayingTag("bop", TestResolution, connmgr.DecayFixed(1), connmgr.BumpOverwrite()) + require.NoError(err) + + // id1 has both tags; id2 only has the first tag. + _ = tag1.Bump(id1, 1000) + _ = tag2.Bump(id1, 1000) + _ = tag1.Bump(id2, 1000) + + // allow the background goroutine to process bumps. + <-time.After(500 * time.Millisecond) + + // first tick. + mockClock.Add(TestResolution) + require.Equal(999, mgr.GetTagInfo(id1).Tags["beep"]) + require.Equal(999, mgr.GetTagInfo(id1).Tags["bop"]) + require.Equal(999, mgr.GetTagInfo(id2).Tags["beep"]) + + require.Equal(999*2, mgr.GetTagInfo(id1).Value) + require.Equal(999, mgr.GetTagInfo(id2).Value) + + // remove tag1 from p1. + err = tag1.Remove(id1) + + // allow the background goroutine to process the removal. + <-time.After(500 * time.Millisecond) + require.NoError(err) + + // next tick. both peers only have 1 tag, both at 998 value. + mockClock.Add(TestResolution) + require.Zero(mgr.GetTagInfo(id1).Tags["beep"]) + require.Equal(998, mgr.GetTagInfo(id1).Tags["bop"]) + require.Equal(998, mgr.GetTagInfo(id2).Tags["beep"]) + + require.Equal(998, mgr.GetTagInfo(id1).Value) + require.Equal(998, mgr.GetTagInfo(id2).Value) + + // remove tag1 from p1 again; no error. + err = tag1.Remove(id1) + require.NoError(err) +} + +func TestTagClosure(t *testing.T) { + var ( + id = tu.RandPeerIDFatal(t) + mgr, decay, mockClock = testDecayTracker(t) + require = require.New(t) + ) + + tag1, err := decay.RegisterDecayingTag("beep", TestResolution, connmgr.DecayFixed(1), connmgr.BumpOverwrite()) + require.NoError(err) + + tag2, err := decay.RegisterDecayingTag("bop", TestResolution, connmgr.DecayFixed(1), connmgr.BumpOverwrite()) + require.NoError(err) + + _ = tag1.Bump(id, 1000) + _ = tag2.Bump(id, 1000) + // allow the background goroutine to process bumps. + <-time.After(500 * time.Millisecond) + + // nothing has happened. + mockClock.Add(TestResolution) + require.Equal(999, mgr.GetTagInfo(id).Tags["beep"]) + require.Equal(999, mgr.GetTagInfo(id).Tags["bop"]) + require.Equal(999*2, mgr.GetTagInfo(id).Value) + + // next tick; tag1 would've ticked. + mockClock.Add(TestResolution) + require.Equal(998, mgr.GetTagInfo(id).Tags["beep"]) + require.Equal(998, mgr.GetTagInfo(id).Tags["bop"]) + require.Equal(998*2, mgr.GetTagInfo(id).Value) + + // close the tag. + err = tag1.Close() + require.NoError(err) + + // allow the background goroutine to process the closure. + <-time.After(500 * time.Millisecond) + require.Equal(998, mgr.GetTagInfo(id).Value) + + // a second closure should not error. + err = tag1.Close() + require.NoError(err) + + // bumping a tag after it's been closed should error. + err = tag1.Bump(id, 5) + require.Error(err) +} + func testDecayTracker(tb testing.TB) (*BasicConnMgr, connmgr.Decayer, *clock.Mock) { mockClock := clock.NewMock() cfg := &DecayerCfg{ From f237862d8fbf1e4f6a707c3f23659d6b2707c236 Mon Sep 17 00:00:00 2001 From: vyzo Date: Wed, 3 Jun 2020 22:01:56 +0300 Subject: [PATCH 41/58] implement IsProtected interface --- p2p/net/connmgr/connmgr.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index fda493c4e9..aa424576e5 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -176,6 +176,23 @@ func (cm *BasicConnMgr) Unprotect(id peer.ID, tag string) (protected bool) { return true } +func (cm *BasicConnMgr) IsProtected(id peer.ID, tag string) (protected bool) { + cm.plk.Lock() + defer cm.plk.Unlock() + + tags, ok := cm.protected[id] + if !ok { + return false + } + + if tag == "" { + return true + } + + _, protected = tags[tag] + return protected +} + // peerInfo stores metadata for a given peer. type peerInfo struct { id peer.ID From d7f3ee5786d2c2d12f5674aa1628e3ba67164e84 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Fri, 23 Apr 2021 10:55:40 +0700 Subject: [PATCH 42/58] fix staticcheck --- p2p/net/connmgr/bench_test.go | 2 +- p2p/net/connmgr/connmgr.go | 2 -- p2p/net/connmgr/connmgr_test.go | 2 -- p2p/net/connmgr/decay.go | 5 ++--- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/p2p/net/connmgr/bench_test.go b/p2p/net/connmgr/bench_test.go index 3749e63fc4..9789d16649 100644 --- a/p2p/net/connmgr/bench_test.go +++ b/p2p/net/connmgr/bench_test.go @@ -9,7 +9,7 @@ import ( ) func randomConns(tb testing.TB) (c [5000]network.Conn) { - for i, _ := range c { + for i := range c { c[i] = randConn(tb, nil) } return c diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index aa424576e5..70680c42a4 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -286,10 +286,8 @@ func (cm *BasicConnMgr) trim() { } // do the actual trim. - defer log.EventBegin(cm.ctx, "connCleanup").Done() for _, c := range cm.getConnsToClose() { log.Info("closing conn: ", c.RemotePeer()) - log.Event(cm.ctx, "closeConn", c.RemotePeer()) c.Close() } diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 8e1b4b15d7..554ad18bf9 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -611,7 +611,6 @@ func TestPeerProtectionMultipleTags(t *testing.T) { // add 2 more connections, sending the connection manager overboard again. for i := 0; i < 2; i++ { rc := randConn(t, not.Disconnected) - conns = append(conns, rc) not.Connected(nil, rc) cm.TagPeer(rc.RemotePeer(), "test", 20) } @@ -631,7 +630,6 @@ func TestPeerProtectionMultipleTags(t *testing.T) { // add 2 more connections, sending the connection manager overboard again. for i := 0; i < 2; i++ { rc := randConn(t, not.Disconnected) - conns = append(conns, rc) not.Connected(nil, rc) cm.TagPeer(rc.RemotePeer(), "test", 20) } diff --git a/p2p/net/connmgr/decay.go b/p2p/net/connmgr/decay.go index 62029a539b..189759a180 100644 --- a/p2p/net/connmgr/decay.go +++ b/p2p/net/connmgr/decay.go @@ -101,8 +101,7 @@ func (d *decayer) RegisterDecayingTag(name string, interval time.Duration, decay d.tagsMu.Lock() defer d.tagsMu.Unlock() - tag, ok := d.knownTags[name] - if ok { + if _, ok := d.knownTags[name]; ok { return nil, fmt.Errorf("decaying tag with name %s already exists", name) } @@ -118,7 +117,7 @@ func (d *decayer) RegisterDecayingTag(name string, interval time.Duration, decay } lastTick := d.lastTick.Load().(time.Time) - tag = &decayingTag{ + tag := &decayingTag{ trkr: d, name: name, interval: interval, From 16dce8a67690eda457b5d983f78feb4602c5c393 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Thu, 2 Sep 2021 20:51:57 +0100 Subject: [PATCH 43/58] fix build --- p2p/net/connmgr/connmgr.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index d8f44314ab..51b6592a7d 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -238,9 +238,9 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { } func (cm *BasicConnMgr) background() { - interval := cm.gracePeriod / 2 - if interval < cm.silencePeriod { - interval = cm.silencePeriod + interval := cm.cfg.gracePeriod / 2 + if interval < cm.cfg.silencePeriod { + interval = cm.cfg.silencePeriod } if interval < 10*time.Second { interval = 10 * time.Second From 44a21dc590a3b95c7018e75b8b740c6d30870e2c Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Fri, 26 Nov 2021 10:50:30 +0400 Subject: [PATCH 44/58] update go-log to v2 --- p2p/net/connmgr/connmgr.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 51b6592a7d..1aed4edc24 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -11,7 +11,7 @@ import ( "github.com/libp2p/go-libp2p-core/network" "github.com/libp2p/go-libp2p-core/peer" - logging "github.com/ipfs/go-log" + logging "github.com/ipfs/go-log/v2" ma "github.com/multiformats/go-multiaddr" ) From 628e658092083d118dd05fe5364d27d506b69fdc Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Fri, 26 Nov 2021 10:54:09 +0400 Subject: [PATCH 45/58] make sure the background go routine has stopped when closing --- p2p/net/connmgr/connmgr.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 51b6592a7d..7830c3f8da 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -43,8 +43,9 @@ type BasicConnMgr struct { lastTrimMu sync.RWMutex lastTrim time.Time - ctx context.Context - cancel func() + refCount sync.WaitGroup + ctx context.Context + cancel func() } var ( @@ -137,15 +138,17 @@ func NewConnManager(low, hi int, grace time.Duration, opts ...Option) *BasicConn decay, _ := NewDecayer(cfg.decayer, cm) cm.decayer = decay + cm.refCount.Add(1) go cm.background() return cm } func (cm *BasicConnMgr) Close() error { + cm.cancel() if err := cm.decayer.Close(); err != nil { return err } - cm.cancel() + cm.refCount.Wait() return nil } @@ -238,6 +241,8 @@ func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { } func (cm *BasicConnMgr) background() { + defer cm.refCount.Done() + interval := cm.cfg.gracePeriod / 2 if interval < cm.cfg.silencePeriod { interval = cm.cfg.silencePeriod From 71224097679bae1109ef5806fde24c7be3d70999 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Fri, 26 Nov 2021 10:57:23 +0400 Subject: [PATCH 46/58] unexport the config --- p2p/net/connmgr/connmgr.go | 4 ++-- p2p/net/connmgr/options.go | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 51b6592a7d..a6cf5d163f 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -29,7 +29,7 @@ var log = logging.Logger("connmgr") type BasicConnMgr struct { *decayer - cfg *BasicConnManagerConfig + cfg *config segments segments plk sync.RWMutex @@ -99,7 +99,7 @@ func (s *segment) tagInfoFor(p peer.ID) *peerInfo { func NewConnManager(low, hi int, grace time.Duration, opts ...Option) *BasicConnMgr { ctx, cancel := context.WithCancel(context.Background()) - cfg := &BasicConnManagerConfig{ + cfg := &config{ highWater: hi, lowWater: low, gracePeriod: grace, diff --git a/p2p/net/connmgr/options.go b/p2p/net/connmgr/options.go index cfe919e13b..a4135d9a86 100644 --- a/p2p/net/connmgr/options.go +++ b/p2p/net/connmgr/options.go @@ -2,9 +2,8 @@ package connmgr import "time" -// BasicConnManagerConfig is the configuration struct for the basic connection -// manager. -type BasicConnManagerConfig struct { +// config is the configuration struct for the basic connection manager. +type config struct { highWater int lowWater int gracePeriod time.Duration @@ -13,11 +12,11 @@ type BasicConnManagerConfig struct { } // Option represents an option for the basic connection manager. -type Option func(*BasicConnManagerConfig) error +type Option func(*config) error // DecayerConfig applies a configuration for the decayer. func DecayerConfig(opts *DecayerCfg) Option { - return func(cfg *BasicConnManagerConfig) error { + return func(cfg *config) error { cfg.decayer = opts return nil } From 4aeb0b901d50bd9a8ff372393983cd5ddddaee94 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Thu, 2 Sep 2021 22:37:37 +0100 Subject: [PATCH 47/58] fix race condition in getConnsToClose --- p2p/net/connmgr/connmgr.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 4a382fd65a..b9e31d9379 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -318,15 +318,13 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { return nil } - nconns := int(atomic.LoadInt32(&cm.connCount)) - if nconns <= cm.cfg.lowWater { + if int(atomic.LoadInt32(&cm.connCount)) <= cm.cfg.lowWater { log.Info("open connection count below limit") return nil } - npeers := cm.segments.countPeers() - candidates := make([]*peerInfo, 0, npeers) - ncandidates := 0 + candidates := make([]peerInfo, 0, cm.segments.countPeers()) + var ncandidates int gracePeriodStart := time.Now().Add(-cm.cfg.gracePeriod) cm.plk.RLock() @@ -341,7 +339,9 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { // skip peers in the grace period. continue } - candidates = append(candidates, inf) + // note that we're copying the entry here, + // but since inf.conns is a map, it will still point to the original object + candidates = append(candidates, *inf) ncandidates += len(inf.conns) } s.Unlock() @@ -381,7 +381,6 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { // lock this to protect from concurrent modifications from connect/disconnect events s := cm.segments.get(inf.id) s.Lock() - if len(inf.conns) == 0 && inf.temp { // handle temporary entries for early tags -- this entry has gone past the grace period // and still holds no connections, so prune it. @@ -390,8 +389,8 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { for c := range inf.conns { selected = append(selected, c) } + target -= len(inf.conns) } - target -= len(inf.conns) s.Unlock() } From 4533d5ef67bb836694a2935089bf9d935401dafc Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Sun, 28 Nov 2021 19:05:04 +0400 Subject: [PATCH 48/58] add a test case --- p2p/net/connmgr/connmgr.go | 6 ++- p2p/net/connmgr/connmgr_test.go | 96 ++++++++++++++++++++++++--------- p2p/net/connmgr/decay_test.go | 4 ++ 3 files changed, 78 insertions(+), 28 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index b9e31d9379..f434958e57 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -17,6 +17,8 @@ import ( var SilencePeriod = 10 * time.Second +var minCleanupInterval = 10 * time.Second + var log = logging.Logger("connmgr") // BasicConnMgr is a ConnManager that trims connections whenever the count exceeds the @@ -247,8 +249,8 @@ func (cm *BasicConnMgr) background() { if interval < cm.cfg.silencePeriod { interval = cm.cfg.silencePeriod } - if interval < 10*time.Second { - interval = 10 * time.Second + if interval < minCleanupInterval { + interval = minCleanupInterval } ticker := time.NewTicker(interval) diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 554ad18bf9..63d8103518 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -6,6 +6,8 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + detectrace "github.com/ipfs/go-detect-race" "github.com/libp2p/go-libp2p-core/network" @@ -51,6 +53,7 @@ func randConn(t testing.TB, discNotify func(network.Network, network.Conn)) netw // Make sure multiple trim calls block. func TestTrimBlocks(t *testing.T) { cm := NewConnManager(200, 300, 0) + defer cm.Close() cm.lastTrimMu.RLock() @@ -79,6 +82,7 @@ func TestTrimBlocks(t *testing.T) { func TestTrimCancels(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) cm := NewConnManager(200, 300, 0) + defer cm.Close() cm.lastTrimMu.RLock() defer cm.lastTrimMu.RUnlock() @@ -103,6 +107,7 @@ func TestTrimClosed(t *testing.T) { // Make sure joining an existing trim works. func TestTrimJoin(t *testing.T) { cm := NewConnManager(200, 300, 0) + defer cm.Close() cm.lastTrimMu.RLock() var wg sync.WaitGroup wg.Add(3) @@ -126,6 +131,7 @@ func TestTrimJoin(t *testing.T) { func TestConnTrimming(t *testing.T) { cm := NewConnManager(200, 300, 0) + defer cm.Close() not := cm.Notifee() var conns []network.Conn @@ -162,39 +168,47 @@ func TestConnTrimming(t *testing.T) { } func TestConnsToClose(t *testing.T) { - cm := NewConnManager(0, 10, 0) - conns := cm.getConnsToClose() - if conns != nil { - t.Fatal("expected no connections") - } - - cm = NewConnManager(10, 0, 0) - conns = cm.getConnsToClose() - if conns != nil { - t.Fatal("expected no connections") - } - - cm = NewConnManager(1, 1, 0) - conns = cm.getConnsToClose() - if conns != nil { - t.Fatal("expected no connections") + addConns := func(cm *BasicConnMgr, n int) { + not := cm.Notifee() + for i := 0; i < n; i++ { + conn := randConn(t, nil) + not.Connected(nil, conn) + } } - cm = NewConnManager(1, 1, time.Duration(10*time.Minute)) - not := cm.Notifee() - for i := 0; i < 5; i++ { - conn := randConn(t, nil) - not.Connected(nil, conn) - } - conns = cm.getConnsToClose() - if len(conns) != 0 { - t.Fatal("expected no connections") - } + t.Run("below hi limit", func(t *testing.T) { + cm := NewConnManager(0, 10, 0) + defer cm.Close() + addConns(cm, 5) + require.Empty(t, cm.getConnsToClose()) + }) + + t.Run("below low limit", func(t *testing.T) { + cm := NewConnManager(10, 0, 0) + defer cm.Close() + addConns(cm, 5) + require.Empty(t, cm.getConnsToClose()) + }) + + t.Run("below low and hi limit", func(t *testing.T) { + cm := NewConnManager(1, 1, 0) + defer cm.Close() + addConns(cm, 1) + require.Empty(t, cm.getConnsToClose()) + }) + + t.Run("within silence period", func(t *testing.T) { + cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + defer cm.Close() + addConns(cm, 1) + require.Empty(t, cm.getConnsToClose()) + }) } func TestGetTagInfo(t *testing.T) { start := time.Now() cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + defer cm.Close() not := cm.Notifee() conn := randConn(t, nil) not.Connected(nil, conn) @@ -265,6 +279,7 @@ func TestGetTagInfo(t *testing.T) { func TestTagPeerNonExistant(t *testing.T) { cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + defer cm.Close() id := tu.RandPeerIDFatal(t) cm.TagPeer(id, "test", 1) @@ -276,6 +291,7 @@ func TestTagPeerNonExistant(t *testing.T) { func TestUntagPeer(t *testing.T) { cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + defer cm.Close() not := cm.Notifee() conn := randConn(t, nil) not.Connected(nil, conn) @@ -307,6 +323,7 @@ func TestGetInfo(t *testing.T) { start := time.Now() gp := time.Duration(10 * time.Minute) cm := NewConnManager(1, 5, gp) + defer cm.Close() not := cm.Notifee() conn := randConn(t, nil) not.Connected(nil, conn) @@ -334,6 +351,7 @@ func TestGetInfo(t *testing.T) { func TestDoubleConnection(t *testing.T) { gp := time.Duration(10 * time.Minute) cm := NewConnManager(1, 5, gp) + defer cm.Close() not := cm.Notifee() conn := randConn(t, nil) not.Connected(nil, conn) @@ -350,6 +368,7 @@ func TestDoubleConnection(t *testing.T) { func TestDisconnected(t *testing.T) { gp := time.Duration(10 * time.Minute) cm := NewConnManager(1, 5, gp) + defer cm.Close() not := cm.Notifee() conn := randConn(t, nil) not.Connected(nil, conn) @@ -387,6 +406,7 @@ func TestGracePeriod(t *testing.T) { SilencePeriod = 0 cm := NewConnManager(10, 20, 100*time.Millisecond) + defer cm.Close() SilencePeriod = 10 * time.Second not := cm.Notifee() @@ -444,6 +464,7 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { } cm := NewConnManager(10, 20, 0) + defer cm.Close() not := cm.Notifee() var conns []network.Conn @@ -480,6 +501,7 @@ func TestPeerProtectionSingleTag(t *testing.T) { SilencePeriod = 0 cm := NewConnManager(19, 20, 0) + defer cm.Close() SilencePeriod = 10 * time.Second not := cm.Notifee() @@ -567,6 +589,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { SilencePeriod = 0 cm := NewConnManager(19, 20, 0) + defer cm.Close() SilencePeriod = 10 * time.Second not := cm.Notifee() @@ -650,6 +673,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { func TestPeerProtectionIdempotent(t *testing.T) { SilencePeriod = 0 cm := NewConnManager(10, 20, 0) + defer cm.Close() SilencePeriod = 10 * time.Second id, _ := tu.RandPeerID() @@ -681,6 +705,8 @@ func TestPeerProtectionIdempotent(t *testing.T) { func TestUpsertTag(t *testing.T) { cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + defer cm.Close() + not := cm.Notifee() conn := randConn(t, nil) rp := conn.RemotePeer() @@ -737,6 +763,7 @@ func TestTemporaryEntriesClearedFirst(t *testing.T) { func TestTemporaryEntryConvertedOnConnection(t *testing.T) { cm := NewConnManager(1, 1, 0) + defer cm.Close() conn := randConn(t, nil) cm.TagPeer(conn.RemotePeer(), "test", 20) @@ -754,3 +781,20 @@ func TestTemporaryEntryConvertedOnConnection(t *testing.T) { t.Fatal("expected a non-temporary tag with value 20") } } + +// see https://github.com/libp2p/go-libp2p-connmgr/issues/82 +func TestConcurrentCleanupAndTagging(t *testing.T) { + origMinCleanupInterval := minCleanupInterval + t.Cleanup(func() { minCleanupInterval = origMinCleanupInterval }) + minCleanupInterval = time.Millisecond + + SilencePeriod = 0 + cm := NewConnManager(1, 1, 0) + defer cm.Close() + SilencePeriod = 10 * time.Second + + for i := 0; i < 1000; i++ { + conn := randConn(t, nil) + cm.TagPeer(conn.RemotePeer(), "test", 20) + } +} diff --git a/p2p/net/connmgr/decay_test.go b/p2p/net/connmgr/decay_test.go index c75efce1eb..d1fb9985ca 100644 --- a/p2p/net/connmgr/decay_test.go +++ b/p2p/net/connmgr/decay_test.go @@ -411,6 +411,10 @@ func testDecayTracker(tb testing.TB) (*BasicConnMgr, connmgr.Decayer, *clock.Moc if !ok { tb.Fatalf("connmgr does not support decay") } + tb.Cleanup(func() { + mgr.Close() + decay.Close() + }) return mgr, decay, mockClock } From e2489641e90bfb9511fa5fcf52033d73f1c6beef Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Fri, 26 Nov 2021 10:45:05 +0400 Subject: [PATCH 49/58] add an error return value to the constructor --- p2p/net/connmgr/bench_test.go | 5 +- p2p/net/connmgr/connmgr.go | 16 +++--- p2p/net/connmgr/connmgr_test.go | 93 +++++++++++++++++++++------------ p2p/net/connmgr/decay_test.go | 3 +- 4 files changed, 71 insertions(+), 46 deletions(-) diff --git a/p2p/net/connmgr/bench_test.go b/p2p/net/connmgr/bench_test.go index 9789d16649..cd42e51cb6 100644 --- a/p2p/net/connmgr/bench_test.go +++ b/p2p/net/connmgr/bench_test.go @@ -6,6 +6,8 @@ import ( "testing" "github.com/libp2p/go-libp2p-core/network" + + "github.com/stretchr/testify/require" ) func randomConns(tb testing.TB) (c [5000]network.Conn) { @@ -17,7 +19,8 @@ func randomConns(tb testing.TB) (c [5000]network.Conn) { func BenchmarkLockContention(b *testing.B) { conns := randomConns(b) - cm := NewConnManager(1000, 1000, 0) + cm, err := NewConnManager(1000, 1000, 0) + require.NoError(b, err) not := cm.Notifee() kill := make(chan struct{}) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index f434958e57..b64aebdd86 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -99,20 +99,17 @@ func (s *segment) tagInfoFor(p peer.ID) *peerInfo { // their connections terminated) until 'low watermark' peers remain. // * grace is the amount of time a newly opened connection is given before it becomes // subject to pruning. -func NewConnManager(low, hi int, grace time.Duration, opts ...Option) *BasicConnMgr { - ctx, cancel := context.WithCancel(context.Background()) - +func NewConnManager(low, hi int, grace time.Duration, opts ...Option) (*BasicConnMgr, error) { cfg := &config{ highWater: hi, lowWater: low, gracePeriod: grace, silencePeriod: SilencePeriod, } - for _, o := range opts { - // TODO we're ignoring errors from options because we have no way to - // return them, or otherwise act on them. - _ = o(cfg) + if err := o(cfg); err != nil { + return nil, err + } } if cfg.decayer == nil { @@ -125,8 +122,6 @@ func NewConnManager(low, hi int, grace time.Duration, opts ...Option) *BasicConn trimRunningCh: make(chan struct{}, 1), trimTrigger: make(chan chan<- struct{}), protected: make(map[peer.ID]map[string]struct{}, 16), - ctx: ctx, - cancel: cancel, segments: func() (ret segments) { for i := range ret { ret[i] = &segment{ @@ -136,13 +131,14 @@ func NewConnManager(low, hi int, grace time.Duration, opts ...Option) *BasicConn return ret }(), } + cm.ctx, cm.cancel = context.WithCancel(context.Background()) decay, _ := NewDecayer(cfg.decayer, cm) cm.decayer = decay cm.refCount.Add(1) go cm.background() - return cm + return cm, nil } func (cm *BasicConnMgr) Close() error { diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 63d8103518..4799e73a6e 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -6,15 +6,14 @@ import ( "testing" "time" - "github.com/stretchr/testify/require" - - detectrace "github.com/ipfs/go-detect-race" - "github.com/libp2p/go-libp2p-core/network" "github.com/libp2p/go-libp2p-core/peer" tu "github.com/libp2p/go-libp2p-core/test" ma "github.com/multiformats/go-multiaddr" + + detectrace "github.com/ipfs/go-detect-race" + "github.com/stretchr/testify/require" ) type tconn struct { @@ -52,7 +51,8 @@ func randConn(t testing.TB, discNotify func(network.Network, network.Conn)) netw // Make sure multiple trim calls block. func TestTrimBlocks(t *testing.T) { - cm := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, 0) + require.NoError(t, err) defer cm.Close() cm.lastTrimMu.RLock() @@ -80,9 +80,10 @@ func TestTrimBlocks(t *testing.T) { // Make sure we return from trim when the context is canceled. func TestTrimCancels(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - cm := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, 0) + require.NoError(t, err) defer cm.Close() + ctx, cancel := context.WithCancel(context.Background()) cm.lastTrimMu.RLock() defer cm.lastTrimMu.RUnlock() @@ -99,15 +100,18 @@ func TestTrimCancels(t *testing.T) { // Make sure trim returns when closed. func TestTrimClosed(t *testing.T) { - cm := NewConnManager(200, 300, 0) - cm.Close() + cm, err := NewConnManager(200, 300, 0) + require.NoError(t, err) + require.NoError(t, cm.Close()) cm.TrimOpenConns(context.Background()) } // Make sure joining an existing trim works. func TestTrimJoin(t *testing.T) { - cm := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, 0) + require.NoError(t, err) defer cm.Close() + cm.lastTrimMu.RLock() var wg sync.WaitGroup wg.Add(3) @@ -130,7 +134,8 @@ func TestTrimJoin(t *testing.T) { } func TestConnTrimming(t *testing.T) { - cm := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, 0) + require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -177,28 +182,32 @@ func TestConnsToClose(t *testing.T) { } t.Run("below hi limit", func(t *testing.T) { - cm := NewConnManager(0, 10, 0) + cm, err := NewConnManager(0, 10, 0) + require.NoError(t, err) defer cm.Close() addConns(cm, 5) require.Empty(t, cm.getConnsToClose()) }) t.Run("below low limit", func(t *testing.T) { - cm := NewConnManager(10, 0, 0) + cm, err := NewConnManager(10, 0, 0) + require.NoError(t, err) defer cm.Close() addConns(cm, 5) require.Empty(t, cm.getConnsToClose()) }) t.Run("below low and hi limit", func(t *testing.T) { - cm := NewConnManager(1, 1, 0) + cm, err := NewConnManager(1, 1, 0) + require.NoError(t, err) defer cm.Close() addConns(cm, 1) require.Empty(t, cm.getConnsToClose()) }) t.Run("within silence period", func(t *testing.T) { - cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + require.NoError(t, err) defer cm.Close() addConns(cm, 1) require.Empty(t, cm.getConnsToClose()) @@ -207,8 +216,10 @@ func TestConnsToClose(t *testing.T) { func TestGetTagInfo(t *testing.T) { start := time.Now() - cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + require.NoError(t, err) defer cm.Close() + not := cm.Notifee() conn := randConn(t, nil) not.Connected(nil, conn) @@ -278,7 +289,8 @@ func TestGetTagInfo(t *testing.T) { } func TestTagPeerNonExistant(t *testing.T) { - cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + require.NoError(t, err) defer cm.Close() id := tu.RandPeerIDFatal(t) @@ -290,9 +302,11 @@ func TestTagPeerNonExistant(t *testing.T) { } func TestUntagPeer(t *testing.T) { - cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + require.NoError(t, err) defer cm.Close() not := cm.Notifee() + conn := randConn(t, nil) not.Connected(nil, conn) rp := conn.RemotePeer() @@ -321,8 +335,9 @@ func TestUntagPeer(t *testing.T) { func TestGetInfo(t *testing.T) { start := time.Now() - gp := time.Duration(10 * time.Minute) - cm := NewConnManager(1, 5, gp) + const gp = 10 * time.Minute + cm, err := NewConnManager(1, 5, gp) + require.NoError(t, err) defer cm.Close() not := cm.Notifee() conn := randConn(t, nil) @@ -349,8 +364,9 @@ func TestGetInfo(t *testing.T) { } func TestDoubleConnection(t *testing.T) { - gp := time.Duration(10 * time.Minute) - cm := NewConnManager(1, 5, gp) + const gp = 10 * time.Minute + cm, err := NewConnManager(1, 5, gp) + require.NoError(t, err) defer cm.Close() not := cm.Notifee() conn := randConn(t, nil) @@ -366,8 +382,9 @@ func TestDoubleConnection(t *testing.T) { } func TestDisconnected(t *testing.T) { - gp := time.Duration(10 * time.Minute) - cm := NewConnManager(1, 5, gp) + const gp = 10 * time.Minute + cm, err := NewConnManager(1, 5, gp) + require.NoError(t, err) defer cm.Close() not := cm.Notifee() conn := randConn(t, nil) @@ -405,7 +422,8 @@ func TestGracePeriod(t *testing.T) { } SilencePeriod = 0 - cm := NewConnManager(10, 20, 100*time.Millisecond) + cm, err := NewConnManager(10, 20, 100*time.Millisecond) + require.NoError(t, err) defer cm.Close() SilencePeriod = 10 * time.Second @@ -463,7 +481,8 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { t.Skip("race detector is unhappy with this test") } - cm := NewConnManager(10, 20, 0) + cm, err := NewConnManager(10, 20, 0) + require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -500,7 +519,8 @@ func TestPeerProtectionSingleTag(t *testing.T) { } SilencePeriod = 0 - cm := NewConnManager(19, 20, 0) + cm, err := NewConnManager(19, 20, 0) + require.NoError(t, err) defer cm.Close() SilencePeriod = 10 * time.Second @@ -588,7 +608,8 @@ func TestPeerProtectionMultipleTags(t *testing.T) { } SilencePeriod = 0 - cm := NewConnManager(19, 20, 0) + cm, err := NewConnManager(19, 20, 0) + require.NoError(t, err) defer cm.Close() SilencePeriod = 10 * time.Second @@ -672,7 +693,8 @@ func TestPeerProtectionMultipleTags(t *testing.T) { func TestPeerProtectionIdempotent(t *testing.T) { SilencePeriod = 0 - cm := NewConnManager(10, 20, 0) + cm, err := NewConnManager(10, 20, 0) + require.NoError(t, err) defer cm.Close() SilencePeriod = 10 * time.Second @@ -704,9 +726,9 @@ func TestPeerProtectionIdempotent(t *testing.T) { } func TestUpsertTag(t *testing.T) { - cm := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + require.NoError(t, err) defer cm.Close() - not := cm.Notifee() conn := randConn(t, nil) rp := conn.RemotePeer() @@ -741,7 +763,8 @@ func TestUpsertTag(t *testing.T) { } func TestTemporaryEntriesClearedFirst(t *testing.T) { - cm := NewConnManager(1, 1, 0) + cm, err := NewConnManager(1, 1, 0) + require.NoError(t, err) id := tu.RandPeerIDFatal(t) cm.TagPeer(id, "test", 20) @@ -762,7 +785,8 @@ func TestTemporaryEntriesClearedFirst(t *testing.T) { } func TestTemporaryEntryConvertedOnConnection(t *testing.T) { - cm := NewConnManager(1, 1, 0) + cm, err := NewConnManager(1, 1, 0) + require.NoError(t, err) defer cm.Close() conn := randConn(t, nil) @@ -789,7 +813,8 @@ func TestConcurrentCleanupAndTagging(t *testing.T) { minCleanupInterval = time.Millisecond SilencePeriod = 0 - cm := NewConnManager(1, 1, 0) + cm, err := NewConnManager(1, 1, 0) + require.NoError(t, err) defer cm.Close() SilencePeriod = 10 * time.Second diff --git a/p2p/net/connmgr/decay_test.go b/p2p/net/connmgr/decay_test.go index d1fb9985ca..05b0aef387 100644 --- a/p2p/net/connmgr/decay_test.go +++ b/p2p/net/connmgr/decay_test.go @@ -406,7 +406,8 @@ func testDecayTracker(tb testing.TB) (*BasicConnMgr, connmgr.Decayer, *clock.Moc Clock: mockClock, } - mgr := NewConnManager(10, 10, 1*time.Second, DecayerConfig(cfg)) + mgr, err := NewConnManager(10, 10, 1*time.Second, DecayerConfig(cfg)) + require.NoError(tb, err) decay, ok := connmgr.SupportsDecay(mgr) if !ok { tb.Fatalf("connmgr does not support decay") From cb6c4108034612bbd6be9d4acc5c1f1d0c23e2a1 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Mon, 29 Nov 2021 10:38:20 +0400 Subject: [PATCH 50/58] remove check for the last trim time when trimming As trims are triggered by a time.Ticker, this would lead to roughly every second trim being skipped. It also makes calling TrimOpenConns pointless. --- p2p/net/connmgr/connmgr.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index b64aebdd86..05ac3a9643 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -286,16 +286,6 @@ func (cm *BasicConnMgr) background() { } func (cm *BasicConnMgr) trim() { - cm.lastTrimMu.RLock() - // read the last trim time under the lock - lastTrim := cm.lastTrim - cm.lastTrimMu.RUnlock() - - // skip this attempt to trim if the last one just took place. - if time.Since(lastTrim) < cm.cfg.silencePeriod { - return - } - // do the actual trim. for _, c := range cm.getConnsToClose() { log.Info("closing conn: ", c.RemotePeer()) From 414856bb9517082216dd2a68411b57d036cac410 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Mon, 29 Nov 2021 10:55:57 +0400 Subject: [PATCH 51/58] introduce WithGracePeriod and WithSilencePeriod configuration options --- p2p/net/connmgr/bench_test.go | 2 +- p2p/net/connmgr/connmgr.go | 23 ++---- p2p/net/connmgr/connmgr_test.go | 126 +++++++++++++------------------- p2p/net/connmgr/decay_test.go | 2 +- p2p/net/connmgr/options.go | 31 +++++++- 5 files changed, 89 insertions(+), 95 deletions(-) diff --git a/p2p/net/connmgr/bench_test.go b/p2p/net/connmgr/bench_test.go index cd42e51cb6..73e25e97c5 100644 --- a/p2p/net/connmgr/bench_test.go +++ b/p2p/net/connmgr/bench_test.go @@ -19,7 +19,7 @@ func randomConns(tb testing.TB) (c [5000]network.Conn) { func BenchmarkLockContention(b *testing.B) { conns := randomConns(b) - cm, err := NewConnManager(1000, 1000, 0) + cm, err := NewConnManager(1000, 1000, WithGracePeriod(0)) require.NoError(b, err) not := cm.Notifee() diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 05ac3a9643..caff969f9e 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -15,10 +15,6 @@ import ( ma "github.com/multiformats/go-multiaddr" ) -var SilencePeriod = 10 * time.Second - -var minCleanupInterval = 10 * time.Second - var log = logging.Logger("connmgr") // BasicConnMgr is a ConnManager that trims connections whenever the count exceeds the @@ -94,17 +90,15 @@ func (s *segment) tagInfoFor(p peer.ID) *peerInfo { } // NewConnManager creates a new BasicConnMgr with the provided params: -// * lo and hi are watermarks governing the number of connections that'll be maintained. -// When the peer count exceeds the 'high watermark', as many peers will be pruned (and -// their connections terminated) until 'low watermark' peers remain. -// * grace is the amount of time a newly opened connection is given before it becomes -// subject to pruning. -func NewConnManager(low, hi int, grace time.Duration, opts ...Option) (*BasicConnMgr, error) { +// lo and hi are watermarks governing the number of connections that'll be maintained. +// When the peer count exceeds the 'high watermark', as many peers will be pruned (and +// their connections terminated) until 'low watermark' peers remain. +func NewConnManager(low, hi int, opts ...Option) (*BasicConnMgr, error) { cfg := &config{ highWater: hi, lowWater: low, - gracePeriod: grace, - silencePeriod: SilencePeriod, + gracePeriod: time.Minute, + silencePeriod: 10 * time.Second, } for _, o := range opts { if err := o(cfg); err != nil { @@ -242,12 +236,9 @@ func (cm *BasicConnMgr) background() { defer cm.refCount.Done() interval := cm.cfg.gracePeriod / 2 - if interval < cm.cfg.silencePeriod { + if cm.cfg.silencePeriod != 0 { interval = cm.cfg.silencePeriod } - if interval < minCleanupInterval { - interval = minCleanupInterval - } ticker := time.NewTicker(interval) defer ticker.Stop() diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 4799e73a6e..a4f9db68d0 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -3,6 +3,7 @@ package connmgr import ( "context" "sync" + "sync/atomic" "testing" "time" @@ -12,7 +13,6 @@ import ( tu "github.com/libp2p/go-libp2p-core/test" ma "github.com/multiformats/go-multiaddr" - detectrace "github.com/ipfs/go-detect-race" "github.com/stretchr/testify/require" ) @@ -20,18 +20,22 @@ type tconn struct { network.Conn peer peer.ID - closed bool + closed uint32 // to be used atomically. Closed if 1 disconnectNotify func(net network.Network, conn network.Conn) } func (c *tconn) Close() error { - c.closed = true + atomic.StoreUint32(&c.closed, 1) if c.disconnectNotify != nil { c.disconnectNotify(nil, c) } return nil } +func (c *tconn) isClosed() bool { + return atomic.LoadUint32(&c.closed) == 1 +} + func (c *tconn) RemotePeer() peer.ID { return c.peer } @@ -51,7 +55,7 @@ func randConn(t testing.TB, discNotify func(network.Network, network.Conn)) netw // Make sure multiple trim calls block. func TestTrimBlocks(t *testing.T) { - cm, err := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() @@ -80,7 +84,7 @@ func TestTrimBlocks(t *testing.T) { // Make sure we return from trim when the context is canceled. func TestTrimCancels(t *testing.T) { - cm, err := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() ctx, cancel := context.WithCancel(context.Background()) @@ -100,7 +104,7 @@ func TestTrimCancels(t *testing.T) { // Make sure trim returns when closed. func TestTrimClosed(t *testing.T) { - cm, err := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, WithGracePeriod(0)) require.NoError(t, err) require.NoError(t, cm.Close()) cm.TrimOpenConns(context.Background()) @@ -108,7 +112,7 @@ func TestTrimClosed(t *testing.T) { // Make sure joining an existing trim works. func TestTrimJoin(t *testing.T) { - cm, err := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() @@ -134,7 +138,7 @@ func TestTrimJoin(t *testing.T) { } func TestConnTrimming(t *testing.T) { - cm, err := NewConnManager(200, 300, 0) + cm, err := NewConnManager(200, 300, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -147,7 +151,7 @@ func TestConnTrimming(t *testing.T) { } for _, c := range conns { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Fatal("nothing should be closed yet") } } @@ -162,12 +166,12 @@ func TestConnTrimming(t *testing.T) { for i := 0; i < 100; i++ { c := conns[i] - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Fatal("these shouldnt be closed") } } - if !conns[299].(*tconn).closed { + if !conns[299].(*tconn).isClosed() { t.Fatal("conn with bad tag should have gotten closed") } } @@ -182,7 +186,7 @@ func TestConnsToClose(t *testing.T) { } t.Run("below hi limit", func(t *testing.T) { - cm, err := NewConnManager(0, 10, 0) + cm, err := NewConnManager(0, 10, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() addConns(cm, 5) @@ -190,7 +194,7 @@ func TestConnsToClose(t *testing.T) { }) t.Run("below low limit", func(t *testing.T) { - cm, err := NewConnManager(10, 0, 0) + cm, err := NewConnManager(10, 0, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() addConns(cm, 5) @@ -198,7 +202,7 @@ func TestConnsToClose(t *testing.T) { }) t.Run("below low and hi limit", func(t *testing.T) { - cm, err := NewConnManager(1, 1, 0) + cm, err := NewConnManager(1, 1, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() addConns(cm, 1) @@ -206,7 +210,7 @@ func TestConnsToClose(t *testing.T) { }) t.Run("within silence period", func(t *testing.T) { - cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, WithGracePeriod(10*time.Minute)) require.NoError(t, err) defer cm.Close() addConns(cm, 1) @@ -216,7 +220,7 @@ func TestConnsToClose(t *testing.T) { func TestGetTagInfo(t *testing.T) { start := time.Now() - cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, WithGracePeriod(10*time.Minute)) require.NoError(t, err) defer cm.Close() @@ -289,7 +293,7 @@ func TestGetTagInfo(t *testing.T) { } func TestTagPeerNonExistant(t *testing.T) { - cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, WithGracePeriod(10*time.Minute)) require.NoError(t, err) defer cm.Close() @@ -302,7 +306,7 @@ func TestTagPeerNonExistant(t *testing.T) { } func TestUntagPeer(t *testing.T) { - cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, WithGracePeriod(10*time.Minute)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -336,7 +340,7 @@ func TestUntagPeer(t *testing.T) { func TestGetInfo(t *testing.T) { start := time.Now() const gp = 10 * time.Minute - cm, err := NewConnManager(1, 5, gp) + cm, err := NewConnManager(1, 5, WithGracePeriod(gp)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -365,7 +369,7 @@ func TestGetInfo(t *testing.T) { func TestDoubleConnection(t *testing.T) { const gp = 10 * time.Minute - cm, err := NewConnManager(1, 5, gp) + cm, err := NewConnManager(1, 5, WithGracePeriod(gp)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -383,7 +387,7 @@ func TestDoubleConnection(t *testing.T) { func TestDisconnected(t *testing.T) { const gp = 10 * time.Minute - cm, err := NewConnManager(1, 5, gp) + cm, err := NewConnManager(1, 5, WithGracePeriod(gp)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -417,15 +421,10 @@ func TestDisconnected(t *testing.T) { } func TestGracePeriod(t *testing.T) { - if detectrace.WithRace() { - t.Skip("race detector is unhappy with this test") - } - - SilencePeriod = 0 - cm, err := NewConnManager(10, 20, 100*time.Millisecond) + const gp = 100 * time.Millisecond + cm, err := NewConnManager(10, 20, WithGracePeriod(gp), WithSilencePeriod(time.Millisecond)) require.NoError(t, err) defer cm.Close() - SilencePeriod = 10 * time.Second not := cm.Notifee() @@ -437,9 +436,9 @@ func TestGracePeriod(t *testing.T) { conns = append(conns, rc) not.Connected(nil, rc) - time.Sleep(200 * time.Millisecond) + time.Sleep(2 * gp) - if rc.(*tconn).closed { + if rc.(*tconn).isClosed() { t.Fatal("expected conn to remain open") } } @@ -454,7 +453,7 @@ func TestGracePeriod(t *testing.T) { cm.TrimOpenConns(context.Background()) for _, c := range conns { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Fatal("expected no conns to be closed") } } @@ -465,7 +464,7 @@ func TestGracePeriod(t *testing.T) { closed := 0 for _, c := range conns { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { closed++ } } @@ -477,11 +476,7 @@ func TestGracePeriod(t *testing.T) { // see https://github.com/libp2p/go-libp2p-connmgr/issues/23 func TestQuickBurstRespectsSilencePeriod(t *testing.T) { - if detectrace.WithRace() { - t.Skip("race detector is unhappy with this test") - } - - cm, err := NewConnManager(10, 20, 0) + cm, err := NewConnManager(10, 20, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -501,7 +496,7 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { // only the first trim is allowed in; make sure we close at most 20 connections, not all of them. var closed int for _, c := range conns { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { closed++ } } @@ -514,16 +509,9 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { } func TestPeerProtectionSingleTag(t *testing.T) { - if detectrace.WithRace() { - t.Skip("race detector is unhappy with this test") - } - - SilencePeriod = 0 - cm, err := NewConnManager(19, 20, 0) + cm, err := NewConnManager(19, 20, WithGracePeriod(0), WithSilencePeriod(time.Millisecond)) require.NoError(t, err) defer cm.Close() - SilencePeriod = 10 * time.Second - not := cm.Notifee() var conns []network.Conn @@ -551,10 +539,11 @@ func TestPeerProtectionSingleTag(t *testing.T) { // add 1 more conn, this shouldn't send us over the limit as protected conns don't count addConn(20) + time.Sleep(100 * time.Millisecond) cm.TrimOpenConns(context.Background()) for _, c := range conns { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Error("connection was closed by connection manager") } } @@ -567,14 +556,14 @@ func TestPeerProtectionSingleTag(t *testing.T) { cm.TrimOpenConns(context.Background()) for _, c := range protected { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Error("protected connection was closed by connection manager") } } closed := 0 for _, c := range conns { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { closed++ } } @@ -592,27 +581,20 @@ func TestPeerProtectionSingleTag(t *testing.T) { cm.TrimOpenConns(context.Background()) - if !protected[0].(*tconn).closed { + if !protected[0].(*tconn).isClosed() { t.Error("unprotected connection was kept open by connection manager") } for _, c := range protected[1:] { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Error("protected connection was closed by connection manager") } } } func TestPeerProtectionMultipleTags(t *testing.T) { - if detectrace.WithRace() { - t.Skip("race detector is unhappy with this test") - } - - SilencePeriod = 0 - cm, err := NewConnManager(19, 20, 0) + cm, err := NewConnManager(19, 20, WithGracePeriod(0), WithSilencePeriod(time.Millisecond)) require.NoError(t, err) defer cm.Close() - SilencePeriod = 10 * time.Second - not := cm.Notifee() // produce 20 connections with unique peers. @@ -640,7 +622,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { cm.TrimOpenConns(context.Background()) for _, c := range protected { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Error("protected connection was closed by connection manager") } } @@ -663,7 +645,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { // connections should still remain open, as they were protected. for _, c := range protected[0:] { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Error("protected connection was closed by connection manager") } } @@ -680,11 +662,11 @@ func TestPeerProtectionMultipleTags(t *testing.T) { cm.TrimOpenConns(context.Background()) - if !protected[0].(*tconn).closed { + if !protected[0].(*tconn).isClosed() { t.Error("unprotected connection was kept open by connection manager") } for _, c := range protected[1:] { - if c.(*tconn).closed { + if c.(*tconn).isClosed() { t.Error("protected connection was closed by connection manager") } } @@ -692,11 +674,9 @@ func TestPeerProtectionMultipleTags(t *testing.T) { } func TestPeerProtectionIdempotent(t *testing.T) { - SilencePeriod = 0 - cm, err := NewConnManager(10, 20, 0) + cm, err := NewConnManager(10, 20, WithGracePeriod(0), WithSilencePeriod(time.Millisecond)) require.NoError(t, err) defer cm.Close() - SilencePeriod = 10 * time.Second id, _ := tu.RandPeerID() cm.Protect(id, "global") @@ -726,7 +706,7 @@ func TestPeerProtectionIdempotent(t *testing.T) { } func TestUpsertTag(t *testing.T) { - cm, err := NewConnManager(1, 1, time.Duration(10*time.Minute)) + cm, err := NewConnManager(1, 1, WithGracePeriod(10*time.Minute)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -763,7 +743,7 @@ func TestUpsertTag(t *testing.T) { } func TestTemporaryEntriesClearedFirst(t *testing.T) { - cm, err := NewConnManager(1, 1, 0) + cm, err := NewConnManager(1, 1, WithGracePeriod(0)) require.NoError(t, err) id := tu.RandPeerIDFatal(t) @@ -785,7 +765,7 @@ func TestTemporaryEntriesClearedFirst(t *testing.T) { } func TestTemporaryEntryConvertedOnConnection(t *testing.T) { - cm, err := NewConnManager(1, 1, 0) + cm, err := NewConnManager(1, 1, WithGracePeriod(0)) require.NoError(t, err) defer cm.Close() @@ -808,15 +788,9 @@ func TestTemporaryEntryConvertedOnConnection(t *testing.T) { // see https://github.com/libp2p/go-libp2p-connmgr/issues/82 func TestConcurrentCleanupAndTagging(t *testing.T) { - origMinCleanupInterval := minCleanupInterval - t.Cleanup(func() { minCleanupInterval = origMinCleanupInterval }) - minCleanupInterval = time.Millisecond - - SilencePeriod = 0 - cm, err := NewConnManager(1, 1, 0) + cm, err := NewConnManager(1, 1, WithGracePeriod(0), WithSilencePeriod(time.Millisecond)) require.NoError(t, err) defer cm.Close() - SilencePeriod = 10 * time.Second for i := 0; i < 1000; i++ { conn := randConn(t, nil) diff --git a/p2p/net/connmgr/decay_test.go b/p2p/net/connmgr/decay_test.go index 05b0aef387..6c064dc2f1 100644 --- a/p2p/net/connmgr/decay_test.go +++ b/p2p/net/connmgr/decay_test.go @@ -406,7 +406,7 @@ func testDecayTracker(tb testing.TB) (*BasicConnMgr, connmgr.Decayer, *clock.Moc Clock: mockClock, } - mgr, err := NewConnManager(10, 10, 1*time.Second, DecayerConfig(cfg)) + mgr, err := NewConnManager(10, 10, WithGracePeriod(time.Second), DecayerConfig(cfg)) require.NoError(tb, err) decay, ok := connmgr.SupportsDecay(mgr) if !ok { diff --git a/p2p/net/connmgr/options.go b/p2p/net/connmgr/options.go index a4135d9a86..6856b4bd40 100644 --- a/p2p/net/connmgr/options.go +++ b/p2p/net/connmgr/options.go @@ -1,6 +1,9 @@ package connmgr -import "time" +import ( + "errors" + "time" +) // config is the configuration struct for the basic connection manager. type config struct { @@ -21,3 +24,29 @@ func DecayerConfig(opts *DecayerCfg) Option { return nil } } + +// WithGracePeriod sets the grace period. +// The grace period is the time a newly opened connection is given before it becomes +// subject to pruning. +func WithGracePeriod(p time.Duration) Option { + return func(cfg *config) error { + if p < 0 { + return errors.New("grace period must be non-negative") + } + cfg.gracePeriod = p + return nil + } +} + +// WithSilencePeriod sets the silence period. +// The connection manager will perform a cleanup once per silence period +// if the number of connections surpasses the high watermark. +func WithSilencePeriod(p time.Duration) Option { + return func(cfg *config) error { + if p <= 0 { + return errors.New("silence period must be non-zero") + } + cfg.silencePeriod = p + return nil + } +} From b1842dfdc006103fc978347f3ba7af99d4f67940 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Mon, 13 Dec 2021 17:12:24 +0400 Subject: [PATCH 52/58] fix flaky tests caused by super short silence periods --- p2p/net/connmgr/connmgr_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index a4f9db68d0..46f84d06be 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -422,7 +422,7 @@ func TestDisconnected(t *testing.T) { func TestGracePeriod(t *testing.T) { const gp = 100 * time.Millisecond - cm, err := NewConnManager(10, 20, WithGracePeriod(gp), WithSilencePeriod(time.Millisecond)) + cm, err := NewConnManager(10, 20, WithGracePeriod(gp), WithSilencePeriod(time.Hour)) require.NoError(t, err) defer cm.Close() @@ -509,7 +509,7 @@ func TestQuickBurstRespectsSilencePeriod(t *testing.T) { } func TestPeerProtectionSingleTag(t *testing.T) { - cm, err := NewConnManager(19, 20, WithGracePeriod(0), WithSilencePeriod(time.Millisecond)) + cm, err := NewConnManager(19, 20, WithGracePeriod(0), WithSilencePeriod(time.Hour)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -592,7 +592,7 @@ func TestPeerProtectionSingleTag(t *testing.T) { } func TestPeerProtectionMultipleTags(t *testing.T) { - cm, err := NewConnManager(19, 20, WithGracePeriod(0), WithSilencePeriod(time.Millisecond)) + cm, err := NewConnManager(19, 20, WithGracePeriod(0), WithSilencePeriod(time.Hour)) require.NoError(t, err) defer cm.Close() not := cm.Notifee() @@ -674,7 +674,7 @@ func TestPeerProtectionMultipleTags(t *testing.T) { } func TestPeerProtectionIdempotent(t *testing.T) { - cm, err := NewConnManager(10, 20, WithGracePeriod(0), WithSilencePeriod(time.Millisecond)) + cm, err := NewConnManager(10, 20, WithGracePeriod(0), WithSilencePeriod(time.Hour)) require.NoError(t, err) defer cm.Close() From 03958f6ee950251aa335a080c8242668cb37332c Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Fri, 26 Nov 2021 10:20:25 +0400 Subject: [PATCH 53/58] aggressively trim connections when we're running out of memory --- p2p/net/connmgr/connmgr.go | 134 +++++++++++++++++++++++++++++++++---- 1 file changed, 120 insertions(+), 14 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index caff969f9e..a82f72c5ba 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -13,6 +13,7 @@ import ( logging "github.com/ipfs/go-log/v2" ma "github.com/multiformats/go-multiaddr" + "github.com/raulk/go-watchdog" ) var log = logging.Logger("connmgr") @@ -41,9 +42,10 @@ type BasicConnMgr struct { lastTrimMu sync.RWMutex lastTrim time.Time - refCount sync.WaitGroup - ctx context.Context - cancel func() + refCount sync.WaitGroup + ctx context.Context + cancel func() + unregisterWatchdog func() } var ( @@ -127,6 +129,9 @@ func NewConnManager(low, hi int, opts ...Option) (*BasicConnMgr, error) { } cm.ctx, cm.cancel = context.WithCancel(context.Background()) + // When we're running low on memory, immediately trigger a trim. + cm.unregisterWatchdog = watchdog.RegisterPostGCNotifee(cm.memoryEmergency) + decay, _ := NewDecayer(cfg.decayer, cm) cm.decayer = decay @@ -135,8 +140,40 @@ func NewConnManager(low, hi int, opts ...Option) (*BasicConnMgr, error) { return cm, nil } +// memoryEmergency is run when we run low on memory. +// Close half of the connections, as quickly as possible. +// We don't pay attention to the silence period or the grace period. +// We try to not kill protected connections, but if that turns out to be necessary, not connection is safe! +func (cm *BasicConnMgr) memoryEmergency() { + log.Info("Low on memory. Closing half of our connections.") + target := int(atomic.LoadInt32(&cm.connCount) / 2) + ch := make(chan struct{}) + select { + case cm.trimTrigger <- ch: + case <-cm.ctx.Done(): + } + + // Wait for the trim. + select { + case <-ch: + case <-cm.ctx.Done(): + } + + // Trim connections without paying attention to the silence period. + for _, c := range cm.getConnsToCloseEmergency(target) { + log.Infow("low on memory. closing conn", "peer", c.RemotePeer()) + c.Close() + } + + // finally, update the last trim time. + cm.lastTrimMu.Lock() + cm.lastTrim = time.Now() + cm.lastTrimMu.Unlock() +} + func (cm *BasicConnMgr) Close() error { cm.cancel() + cm.unregisterWatchdog() if err := cm.decayer.Close(); err != nil { return err } @@ -202,6 +239,20 @@ type peerInfo struct { firstSeen time.Time // timestamp when we began tracking this peer. } +type peerInfos []peerInfo + +func (p peerInfos) SortByValue() { + sort.Slice(p, func(i, j int) bool { + left, right := p[i], p[j] + // temporary peers are preferred for pruning. + if left.temp != right.temp { + return left.temp + } + // otherwise, compare by value. + return left.value < right.value + }) +} + // TrimOpenConns closes the connections of as many peers as needed to make the peer count // equal the low watermark. Peers are sorted in ascending order based on their total value, // pruning those peers with the lowest scores first, as long as they are not within their @@ -276,10 +327,11 @@ func (cm *BasicConnMgr) background() { } } +// trim starts the trim, if the last trim happened before the configured silence period. func (cm *BasicConnMgr) trim() { // do the actual trim. for _, c := range cm.getConnsToClose() { - log.Info("closing conn: ", c.RemotePeer()) + log.Infow("closing conn", "peer", c.RemotePeer()) c.Close() } @@ -289,6 +341,68 @@ func (cm *BasicConnMgr) trim() { cm.lastTrimMu.Unlock() } +func (cm *BasicConnMgr) getConnsToCloseEmergency(target int) []network.Conn { + npeers := cm.segments.countPeers() + candidates := make(peerInfos, 0, npeers) + + cm.plk.RLock() + for _, s := range cm.segments { + s.Lock() + for id, inf := range s.peers { + if _, ok := cm.protected[id]; ok { + // skip over protected peer. + continue + } + candidates = append(candidates, *inf) + } + s.Unlock() + } + cm.plk.RUnlock() + + // Sort peers according to their value. + candidates.SortByValue() + + selected := make([]network.Conn, 0, target+10) + for _, inf := range candidates { + if target <= 0 { + break + } + for c := range inf.conns { + selected = append(selected, c) + } + target -= len(inf.conns) + } + if len(selected) >= target { + // We found enough connections that were not protected. + return selected + } + + // We didn't find enough unprotected connections. + // We have no choice but to kill some protected connections. + candidates = candidates[:0] + cm.plk.RLock() + for _, s := range cm.segments { + s.Lock() + for _, inf := range s.peers { + candidates = append(candidates, *inf) + } + s.Unlock() + } + cm.plk.RUnlock() + + candidates.SortByValue() + for _, inf := range candidates { + if target <= 0 { + break + } + for c := range inf.conns { + selected = append(selected, c) + } + target -= len(inf.conns) + } + return selected +} + // getConnsToClose runs the heuristics described in TrimOpenConns and returns the // connections to close. func (cm *BasicConnMgr) getConnsToClose() []network.Conn { @@ -302,7 +416,7 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { return nil } - candidates := make([]peerInfo, 0, cm.segments.countPeers()) + candidates := make(peerInfos, 0, cm.segments.countPeers()) var ncandidates int gracePeriodStart := time.Now().Add(-cm.cfg.gracePeriod) @@ -337,15 +451,7 @@ func (cm *BasicConnMgr) getConnsToClose() []network.Conn { } // Sort peers according to their value. - sort.Slice(candidates, func(i, j int) bool { - left, right := candidates[i], candidates[j] - // temporary peers are preferred for pruning. - if left.temp != right.temp { - return left.temp - } - // otherwise, compare by value. - return left.value < right.value - }) + candidates.SortByValue() target := ncandidates - cm.cfg.lowWater From a8a2eaa0dec061a6f7dba03d6f4df9c42feff860 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Fri, 3 Dec 2021 10:13:51 +0400 Subject: [PATCH 54/58] rework concurrency logic for trims --- p2p/net/connmgr/connmgr.go | 85 ++++++++++----------------------- p2p/net/connmgr/connmgr_test.go | 20 -------- 2 files changed, 26 insertions(+), 79 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index a82f72c5ba..7cd3d95cf5 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -35,9 +35,11 @@ type BasicConnMgr struct { protected map[peer.ID]map[string]struct{} // channel-based semaphore that enforces only a single trim is in progress - trimRunningCh chan struct{} - trimTrigger chan chan<- struct{} - connCount int32 + trimMutex sync.Mutex + connCount int32 + // to be accessed atomically. This is mimicking the implementation of a sync.Once. + // Take care of correct alignment when modifying this struct. + trimCount uint64 lastTrimMu sync.RWMutex lastTrim time.Time @@ -114,10 +116,8 @@ func NewConnManager(low, hi int, opts ...Option) (*BasicConnMgr, error) { } cm := &BasicConnMgr{ - cfg: cfg, - trimRunningCh: make(chan struct{}, 1), - trimTrigger: make(chan chan<- struct{}), - protected: make(map[peer.ID]map[string]struct{}, 16), + cfg: cfg, + protected: make(map[peer.ID]map[string]struct{}, 16), segments: func() (ret segments) { for i := range ret { ret[i] = &segment{ @@ -147,17 +147,10 @@ func NewConnManager(low, hi int, opts ...Option) (*BasicConnMgr, error) { func (cm *BasicConnMgr) memoryEmergency() { log.Info("Low on memory. Closing half of our connections.") target := int(atomic.LoadInt32(&cm.connCount) / 2) - ch := make(chan struct{}) - select { - case cm.trimTrigger <- ch: - case <-cm.ctx.Done(): - } - // Wait for the trim. - select { - case <-ch: - case <-cm.ctx.Done(): - } + cm.trimMutex.Lock() + defer atomic.AddUint64(&cm.trimCount, 1) + defer cm.trimMutex.Unlock() // Trim connections without paying attention to the silence period. for _, c := range cm.getConnsToCloseEmergency(target) { @@ -261,26 +254,11 @@ func (p peerInfos) SortByValue() { // This function blocks until a trim is completed. If a trim is underway, a new // one won't be started, and instead it'll wait until that one is completed before // returning. -func (cm *BasicConnMgr) TrimOpenConns(ctx context.Context) { +func (cm *BasicConnMgr) TrimOpenConns(_ context.Context) { // TODO: error return value so we can cleanly signal we are aborting because: // (a) there's another trim in progress, or (b) the silence period is in effect. - // Trigger a trim. - ch := make(chan struct{}) - select { - case cm.trimTrigger <- ch: - case <-cm.ctx.Done(): - case <-ctx.Done(): - // TODO: return an error? - } - - // Wait for the trim. - select { - case <-ch: - case <-cm.ctx.Done(): - case <-ctx.Done(): - // TODO: return an error? - } + cm.doTrim() } func (cm *BasicConnMgr) background() { @@ -295,35 +273,30 @@ func (cm *BasicConnMgr) background() { defer ticker.Stop() for { - var waiting chan<- struct{} select { case <-ticker.C: if atomic.LoadInt32(&cm.connCount) < int32(cm.cfg.highWater) { // Below high water, skip. continue } - case waiting = <-cm.trimTrigger: case <-cm.ctx.Done(): return } cm.trim() + } +} - // Notify anyone waiting on this trim. - if waiting != nil { - close(waiting) - } - - for { - select { - case waiting = <-cm.trimTrigger: - if waiting != nil { - close(waiting) - } - continue - default: - } - break - } +func (cm *BasicConnMgr) doTrim() { + // This logic is mimicking the implementation of sync.Once in the standard library. + count := atomic.LoadUint64(&cm.trimCount) + cm.trimMutex.Lock() + defer cm.trimMutex.Unlock() + if count == atomic.LoadUint64(&cm.trimCount) { + cm.trim() + cm.lastTrimMu.Lock() + cm.lastTrim = time.Now() + cm.lastTrimMu.Unlock() + atomic.AddUint64(&cm.trimCount, 1) } } @@ -334,16 +307,10 @@ func (cm *BasicConnMgr) trim() { log.Infow("closing conn", "peer", c.RemotePeer()) c.Close() } - - // finally, update the last trim time. - cm.lastTrimMu.Lock() - cm.lastTrim = time.Now() - cm.lastTrimMu.Unlock() } func (cm *BasicConnMgr) getConnsToCloseEmergency(target int) []network.Conn { - npeers := cm.segments.countPeers() - candidates := make(peerInfos, 0, npeers) + candidates := make(peerInfos, 0, cm.segments.countPeers()) cm.plk.RLock() for _, s := range cm.segments { diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index 46f84d06be..c8941f6a89 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -82,26 +82,6 @@ func TestTrimBlocks(t *testing.T) { <-doneCh } -// Make sure we return from trim when the context is canceled. -func TestTrimCancels(t *testing.T) { - cm, err := NewConnManager(200, 300, WithGracePeriod(0)) - require.NoError(t, err) - defer cm.Close() - ctx, cancel := context.WithCancel(context.Background()) - - cm.lastTrimMu.RLock() - defer cm.lastTrimMu.RUnlock() - - doneCh := make(chan struct{}) - go func() { - defer close(doneCh) - cm.TrimOpenConns(ctx) - }() - time.Sleep(time.Millisecond) - cancel() - <-doneCh -} - // Make sure trim returns when closed. func TestTrimClosed(t *testing.T) { cm, err := NewConnManager(200, 300, WithGracePeriod(0)) From 3412150b9c350d83be935ea5fdf4bda178ef43f6 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Sun, 12 Dec 2021 10:35:27 +0400 Subject: [PATCH 55/58] sort connections by direction and number of streams in a memory emergency --- p2p/net/connmgr/connmgr.go | 36 ++++++++++++++- p2p/net/connmgr/connmgr_test.go | 78 +++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+), 2 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 7cd3d95cf5..b762ac826b 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -246,6 +246,38 @@ func (p peerInfos) SortByValue() { }) } +func (p peerInfos) SortByValueAndStreams() { + sort.Slice(p, func(i, j int) bool { + left, right := p[i], p[j] + // temporary peers are preferred for pruning. + if left.temp != right.temp { + return left.temp + } + // otherwise, compare by value. + if left.value != right.value { + return left.value < right.value + } + incomingAndStreams := func(m map[network.Conn]time.Time) (incoming bool, numStreams int) { + for c := range m { + stat := c.Stat() + if stat.Direction == network.DirInbound { + incoming = true + } + numStreams += stat.NumStreams + } + return + } + leftIncoming, leftStreams := incomingAndStreams(left.conns) + rightIncoming, rightStreams := incomingAndStreams(right.conns) + // incoming connections are preferred for pruning + if leftIncoming != rightIncoming { + return leftIncoming + } + // prune connections with a higher number of streams first + return rightStreams < leftStreams + }) +} + // TrimOpenConns closes the connections of as many peers as needed to make the peer count // equal the low watermark. Peers are sorted in ascending order based on their total value, // pruning those peers with the lowest scores first, as long as they are not within their @@ -327,7 +359,7 @@ func (cm *BasicConnMgr) getConnsToCloseEmergency(target int) []network.Conn { cm.plk.RUnlock() // Sort peers according to their value. - candidates.SortByValue() + candidates.SortByValueAndStreams() selected := make([]network.Conn, 0, target+10) for _, inf := range candidates { @@ -357,7 +389,7 @@ func (cm *BasicConnMgr) getConnsToCloseEmergency(target int) []network.Conn { } cm.plk.RUnlock() - candidates.SortByValue() + candidates.SortByValueAndStreams() for _, inf := range candidates { if target <= 0 { break diff --git a/p2p/net/connmgr/connmgr_test.go b/p2p/net/connmgr/connmgr_test.go index c8941f6a89..390c8599d5 100644 --- a/p2p/net/connmgr/connmgr_test.go +++ b/p2p/net/connmgr/connmgr_test.go @@ -7,6 +7,8 @@ import ( "testing" "time" + "github.com/libp2p/go-libp2p-core/crypto" + "github.com/libp2p/go-libp2p-core/network" "github.com/libp2p/go-libp2p-core/peer" @@ -777,3 +779,79 @@ func TestConcurrentCleanupAndTagging(t *testing.T) { cm.TagPeer(conn.RemotePeer(), "test", 20) } } + +type mockConn struct { + stats network.ConnStats +} + +func (m mockConn) Close() error { panic("implement me") } +func (m mockConn) LocalPeer() peer.ID { panic("implement me") } +func (m mockConn) LocalPrivateKey() crypto.PrivKey { panic("implement me") } +func (m mockConn) RemotePeer() peer.ID { panic("implement me") } +func (m mockConn) RemotePublicKey() crypto.PubKey { panic("implement me") } +func (m mockConn) LocalMultiaddr() ma.Multiaddr { panic("implement me") } +func (m mockConn) RemoteMultiaddr() ma.Multiaddr { panic("implement me") } +func (m mockConn) Stat() network.ConnStats { return m.stats } +func (m mockConn) ID() string { panic("implement me") } +func (m mockConn) NewStream(ctx context.Context) (network.Stream, error) { panic("implement me") } +func (m mockConn) GetStreams() []network.Stream { panic("implement me") } + +func TestPeerInfoSorting(t *testing.T) { + t.Run("starts with temporary connections", func(t *testing.T) { + p1 := peerInfo{id: peer.ID("peer1")} + p2 := peerInfo{id: peer.ID("peer2"), temp: true} + pis := peerInfos{p1, p2} + pis.SortByValue() + require.Equal(t, pis, peerInfos{p2, p1}) + }) + + t.Run("starts with low-value connections", func(t *testing.T) { + p1 := peerInfo{id: peer.ID("peer1"), value: 40} + p2 := peerInfo{id: peer.ID("peer2"), value: 20} + pis := peerInfos{p1, p2} + pis.SortByValue() + require.Equal(t, pis, peerInfos{p2, p1}) + }) + + t.Run("in a memory emergency, starts with incoming connections", func(t *testing.T) { + incoming := network.ConnStats{} + incoming.Direction = network.DirInbound + outgoing := network.ConnStats{} + outgoing.Direction = network.DirOutbound + p1 := peerInfo{ + id: peer.ID("peer1"), + conns: map[network.Conn]time.Time{ + &mockConn{stats: outgoing}: time.Now(), + }, + } + p2 := peerInfo{ + id: peer.ID("peer2"), + conns: map[network.Conn]time.Time{ + &mockConn{stats: outgoing}: time.Now(), + &mockConn{stats: incoming}: time.Now(), + }, + } + pis := peerInfos{p1, p2} + pis.SortByValueAndStreams() + require.Equal(t, pis, peerInfos{p2, p1}) + }) + + t.Run("in a memory emergency, starts with connections that have many streams", func(t *testing.T) { + p1 := peerInfo{ + id: peer.ID("peer1"), + conns: map[network.Conn]time.Time{ + &mockConn{stats: network.ConnStats{NumStreams: 100}}: time.Now(), + }, + } + p2 := peerInfo{ + id: peer.ID("peer2"), + conns: map[network.Conn]time.Time{ + &mockConn{stats: network.ConnStats{NumStreams: 80}}: time.Now(), + &mockConn{stats: network.ConnStats{NumStreams: 40}}: time.Now(), + }, + } + pis := peerInfos{p1, p2} + pis.SortByValueAndStreams() + require.Equal(t, pis, peerInfos{p2, p1}) + }) +} From ff0577ee4c68a8d81c8a55cc37ad07415b428759 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Sun, 12 Dec 2021 14:24:32 +0400 Subject: [PATCH 56/58] always trim to the low watermark in a memory emergency --- p2p/net/connmgr/connmgr.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index b762ac826b..93d9899b41 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -141,12 +141,18 @@ func NewConnManager(low, hi int, opts ...Option) (*BasicConnMgr, error) { } // memoryEmergency is run when we run low on memory. -// Close half of the connections, as quickly as possible. +// Close connections until we right the low watermark. // We don't pay attention to the silence period or the grace period. // We try to not kill protected connections, but if that turns out to be necessary, not connection is safe! func (cm *BasicConnMgr) memoryEmergency() { - log.Info("Low on memory. Closing half of our connections.") - target := int(atomic.LoadInt32(&cm.connCount) / 2) + connCount := int(atomic.LoadInt32(&cm.connCount)) + target := connCount - cm.cfg.lowWater + if target < 0 { + log.Warnw("Low on memory, but we only have a few connections", "num", connCount, "low watermark", cm.cfg.lowWater) + return + } else { + log.Warnf("Low on memory. Closing %d connections.", target) + } cm.trimMutex.Lock() defer atomic.AddUint64(&cm.trimCount, 1) From ee3785b5c014bae1300e5262b6fa156983ca541a Mon Sep 17 00:00:00 2001 From: vyzo Date: Sat, 15 Jan 2022 00:15:13 +0200 Subject: [PATCH 57/58] make emergency trimming optional, disabled by default --- p2p/net/connmgr/connmgr.go | 8 ++++++-- p2p/net/connmgr/options.go | 9 +++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index 93d9899b41..a42c443fdb 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -129,8 +129,12 @@ func NewConnManager(low, hi int, opts ...Option) (*BasicConnMgr, error) { } cm.ctx, cm.cancel = context.WithCancel(context.Background()) - // When we're running low on memory, immediately trigger a trim. - cm.unregisterWatchdog = watchdog.RegisterPostGCNotifee(cm.memoryEmergency) + if cfg.emergencyTrim { + // When we're running low on memory, immediately trigger a trim. + cm.unregisterWatchdog = watchdog.RegisterPostGCNotifee(cm.memoryEmergency) + } else { + cm.unregisterWatchdog = func() {} + } decay, _ := NewDecayer(cfg.decayer, cm) cm.decayer = decay diff --git a/p2p/net/connmgr/options.go b/p2p/net/connmgr/options.go index 6856b4bd40..d66884c779 100644 --- a/p2p/net/connmgr/options.go +++ b/p2p/net/connmgr/options.go @@ -12,6 +12,7 @@ type config struct { gracePeriod time.Duration silencePeriod time.Duration decayer *DecayerCfg + emergencyTrim bool } // Option represents an option for the basic connection manager. @@ -50,3 +51,11 @@ func WithSilencePeriod(p time.Duration) Option { return nil } } + +// WithEmergencyTrim is an option to enable trimming connections on memory emergency. +func WithEmergencyTrim(enable bool) Option { + return func(cfg *config) error { + cfg.emergencyTrim = enable + return nil + } +} From e32c45849c28388a74392d509715d138c7691607 Mon Sep 17 00:00:00 2001 From: Marten Seemann Date: Mon, 17 Jan 2022 06:41:43 -0800 Subject: [PATCH 58/58] hide watchdog behind cgo flag (#103) --- p2p/net/connmgr/connmgr.go | 17 ++++++++--------- p2p/net/connmgr/options.go | 8 -------- p2p/net/connmgr/watchdog_cgo.go | 18 ++++++++++++++++++ p2p/net/connmgr/watchdog_no_cgo.go | 16 ++++++++++++++++ 4 files changed, 42 insertions(+), 17 deletions(-) create mode 100644 p2p/net/connmgr/watchdog_cgo.go create mode 100644 p2p/net/connmgr/watchdog_no_cgo.go diff --git a/p2p/net/connmgr/connmgr.go b/p2p/net/connmgr/connmgr.go index a42c443fdb..9ee587015c 100644 --- a/p2p/net/connmgr/connmgr.go +++ b/p2p/net/connmgr/connmgr.go @@ -13,7 +13,6 @@ import ( logging "github.com/ipfs/go-log/v2" ma "github.com/multiformats/go-multiaddr" - "github.com/raulk/go-watchdog" ) var log = logging.Logger("connmgr") @@ -44,10 +43,10 @@ type BasicConnMgr struct { lastTrimMu sync.RWMutex lastTrim time.Time - refCount sync.WaitGroup - ctx context.Context - cancel func() - unregisterWatchdog func() + refCount sync.WaitGroup + ctx context.Context + cancel func() + unregisterMemoryWatcher func() } var ( @@ -131,9 +130,7 @@ func NewConnManager(low, hi int, opts ...Option) (*BasicConnMgr, error) { if cfg.emergencyTrim { // When we're running low on memory, immediately trigger a trim. - cm.unregisterWatchdog = watchdog.RegisterPostGCNotifee(cm.memoryEmergency) - } else { - cm.unregisterWatchdog = func() {} + cm.unregisterMemoryWatcher = registerWatchdog(cm.memoryEmergency) } decay, _ := NewDecayer(cfg.decayer, cm) @@ -176,7 +173,9 @@ func (cm *BasicConnMgr) memoryEmergency() { func (cm *BasicConnMgr) Close() error { cm.cancel() - cm.unregisterWatchdog() + if cm.unregisterMemoryWatcher != nil { + cm.unregisterMemoryWatcher() + } if err := cm.decayer.Close(); err != nil { return err } diff --git a/p2p/net/connmgr/options.go b/p2p/net/connmgr/options.go index d66884c779..76b4ef386d 100644 --- a/p2p/net/connmgr/options.go +++ b/p2p/net/connmgr/options.go @@ -51,11 +51,3 @@ func WithSilencePeriod(p time.Duration) Option { return nil } } - -// WithEmergencyTrim is an option to enable trimming connections on memory emergency. -func WithEmergencyTrim(enable bool) Option { - return func(cfg *config) error { - cfg.emergencyTrim = enable - return nil - } -} diff --git a/p2p/net/connmgr/watchdog_cgo.go b/p2p/net/connmgr/watchdog_cgo.go new file mode 100644 index 0000000000..b048111f36 --- /dev/null +++ b/p2p/net/connmgr/watchdog_cgo.go @@ -0,0 +1,18 @@ +//go:build cgo +// +build cgo + +package connmgr + +import "github.com/raulk/go-watchdog" + +func registerWatchdog(cb func()) (unregister func()) { + return watchdog.RegisterPostGCNotifee(cb) +} + +// WithEmergencyTrim is an option to enable trimming connections on memory emergency. +func WithEmergencyTrim(enable bool) Option { + return func(cfg *config) error { + cfg.emergencyTrim = enable + return nil + } +} diff --git a/p2p/net/connmgr/watchdog_no_cgo.go b/p2p/net/connmgr/watchdog_no_cgo.go new file mode 100644 index 0000000000..fb83c286de --- /dev/null +++ b/p2p/net/connmgr/watchdog_no_cgo.go @@ -0,0 +1,16 @@ +//go:build !cgo +// +build !cgo + +package connmgr + +func registerWatchdog(func()) (unregister func()) { + return nil +} + +// WithEmergencyTrim is an option to enable trimming connections on memory emergency. +func WithEmergencyTrim(enable bool) Option { + return func(cfg *config) error { + log.Warn("platform doesn't support go-watchdog") + return nil + } +}