diff --git a/network/peer/metrics.go b/network/peer/metrics.go index 6569d4a682a3..1372806b73fa 100644 --- a/network/peer/metrics.go +++ b/network/peer/metrics.go @@ -31,6 +31,9 @@ type Metrics struct { ClockSkewCount prometheus.Counter ClockSkewSum prometheus.Gauge + RTTCount prometheus.Counter + RTTSum prometheus.Gauge + NumFailedToParse prometheus.Counter NumSendFailed *prometheus.CounterVec // op @@ -41,6 +44,14 @@ type Metrics struct { func NewMetrics(registerer prometheus.Registerer) (*Metrics, error) { m := &Metrics{ + RTTCount: prometheus.NewCounter(prometheus.CounterOpts{ + Name: "round_trip_count", + Help: "number of RTT samples taken (n)", + }), + RTTSum: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "round_trip_sum", + Help: "sum of RTT samples taken (ms)", + }), ClockSkewCount: prometheus.NewCounter(prometheus.CounterOpts{ Name: "clock_skew_count", Help: "number of handshake timestamps inspected (n)", @@ -83,6 +94,8 @@ func NewMetrics(registerer prometheus.Registerer) (*Metrics, error) { ), } return m, errors.Join( + registerer.Register(m.RTTCount), + registerer.Register(m.RTTSum), registerer.Register(m.ClockSkewCount), registerer.Register(m.ClockSkewSum), registerer.Register(m.NumFailedToParse), diff --git a/network/peer/peer.go b/network/peer/peer.go index 616da073390b..fbf9760ab5f2 100644 --- a/network/peer/peer.go +++ b/network/peer/peer.go @@ -191,6 +191,9 @@ type peer struct { // Must only be accessed atomically lastSent, lastReceived int64 + // lastPingSent is the milliseconds since 1970-01-01 UTC when the last ping was sent + lastPingSent int64 + // getPeerListChan signals that we should attempt to send a GetPeerList to // this peer getPeerListChan chan struct{} @@ -613,6 +616,10 @@ func (p *peer) writeMessage(writer io.Writer, msg message.OutboundMessage) { return } + if msg.Op() == message.PingOp { + atomic.StoreInt64(&p.lastPingSent, p.Clock.Time().UnixMilli()) + } + // Write the message var buf net.Buffers = [][]byte{msgLenBytes[:], msgBytes} if _, err := io.CopyN(writer, &buf, int64(wrappers.IntLen+msgLen)); err != nil { @@ -819,7 +826,23 @@ func (p *peer) getUptime() uint32 { return primaryUptimePercent } -func (*peer) handlePong(*p2p.Pong) {} +func (p *peer) handlePong(*p2p.Pong) { + pingSent := atomic.SwapInt64(&p.lastPingSent, 0) + if pingSent == 0 { + p.Log.Debug(malformedMessageLog, + zap.Stringer("nodeID", p.id), + zap.Stringer("messageOp", message.PongOp), + zap.String("reason", "received unexpected pong"), + ) + p.StartClose() + return + } + + elapsed := p.Clock.Time().UnixMilli() - pingSent + + p.Metrics.RTTCount.Inc() + p.Metrics.RTTSum.Add(float64(elapsed)) +} func (p *peer) handleHandshake(msg *p2p.Handshake) { if p.gotHandshake.Get() {