Skip to content

Commit

Permalink
[FAB-4535] Use deadlines/timeouts in gossip Probing
Browse files Browse the repository at this point in the history
** Post V1.0 item **

In gossip we have an RPC call that is used to probe
whether a remote peer is responsive or not:

rpc Ping (Empty) returns (Empty) {}

The implementation of the method doesn't do anything and immediately
returns.
It is used to probe remote peers in 2 situations:

    Handshake method (when probing anchor / bootstrap peers )
    When connecting to a remote peer for long term connection
    Used by the discovery layer to probe dead peers to see if
    they have come back alive

A malicious peer can implement his own implementation of the server-side
Ping() method that blocks indefinitely and this would slow down the peer
 and waste resources for it.

Therefore I suggest that we use timeouts when invoking the Ping RPC call
in order to defend against this.

Change-Id: Iec5c632019eea3310e789c11b793ab7d8ff00832
Signed-off-by: Yacov Manevich <yacovm@il.ibm.com>
  • Loading branch information
yacovm committed Jul 16, 2017
1 parent d9c3202 commit 0969b38
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 3 deletions.
12 changes: 9 additions & 3 deletions gossip/comm/comm_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,9 @@ func (c *commImpl) createConnection(endpoint string, expectedPKIID common.PKIidT

cl := proto.NewGossipClient(cc)

if _, err = cl.Ping(context.Background(), &proto.Empty{}); err != nil {
ctx, cancel := context.WithTimeout(context.Background(), defConnTimeout)
defer cancel()
if _, err = cl.Ping(ctx, &proto.Empty{}); err != nil {
cc.Close()
return nil, err
}
Expand Down Expand Up @@ -276,7 +278,9 @@ func (c *commImpl) Probe(remotePeer *RemotePeer) error {
}
defer cc.Close()
cl := proto.NewGossipClient(cc)
_, err = cl.Ping(context.Background(), &proto.Empty{})
ctx, cancel := context.WithTimeout(context.Background(), defConnTimeout)
defer cancel()
_, err = cl.Ping(ctx, &proto.Empty{})
c.logger.Debug("Returning", err)
return err
}
Expand All @@ -294,7 +298,9 @@ func (c *commImpl) Handshake(remotePeer *RemotePeer) (api.PeerIdentityType, erro
defer cc.Close()

cl := proto.NewGossipClient(cc)
if _, err = cl.Ping(context.Background(), &proto.Empty{}); err != nil {
ctx, cancel := context.WithTimeout(context.Background(), defConnTimeout)
defer cancel()
if _, err = cl.Ping(ctx, &proto.Empty{}); err != nil {
return nil, err
}

Expand Down
54 changes: 54 additions & 0 deletions gossip/comm/comm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,60 @@ func TestParallelSend(t *testing.T) {
assert.Equal(t, messages2Send, c)
}

type nonResponsivePeer struct {
net.Listener
*grpc.Server
port int
}

func newNonResponsivePeer() *nonResponsivePeer {
rand.Seed(time.Now().UnixNano())
port := 50000 + rand.Intn(1000)
s, l, _, _ := createGRPCLayer(port)
nrp := &nonResponsivePeer{
Listener: l,
Server: s,
port: port,
}
proto.RegisterGossipServer(s, nrp)
go s.Serve(l)
return nrp
}

func (bp *nonResponsivePeer) Ping(context.Context, *proto.Empty) (*proto.Empty, error) {
time.Sleep(time.Second * 15)
return &proto.Empty{}, nil
}

func (bp *nonResponsivePeer) GossipStream(stream proto.Gossip_GossipStreamServer) error {
return nil
}

func (bp *nonResponsivePeer) stop() {
bp.Server.Stop()
bp.Listener.Close()
}

func TestNonResponsivePing(t *testing.T) {
t.Parallel()
port := 50000 - rand.Intn(1000)
c, _ := newCommInstance(port, naiveSec)
defer c.Stop()
nonRespPeer := newNonResponsivePeer()
defer nonRespPeer.stop()
s := make(chan struct{})
go func() {
c.Probe(remotePeer(nonRespPeer.port))
s <- struct{}{}
}()
select {
case <-time.After(time.Second * 10):
assert.Fail(t, "Request wasn't cancelled on time")
case <-s:
}

}

func TestResponses(t *testing.T) {
t.Parallel()
comm1, _ := newCommInstance(8611, naiveSec)
Expand Down

0 comments on commit 0969b38

Please sign in to comment.