Skip to content

Commit 0ecd4c2

Browse files
authored
Implement RTT measurement (#4454)
Signed-off-by: Yacov Manevich <yacov.manevich@avalabs.org>
1 parent b89cdaf commit 0ecd4c2

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

network/peer/metrics.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ type Metrics struct {
3131
ClockSkewCount prometheus.Counter
3232
ClockSkewSum prometheus.Gauge
3333

34+
RTTCount prometheus.Counter
35+
RTTSum prometheus.Gauge
36+
3437
NumFailedToParse prometheus.Counter
3538
NumSendFailed *prometheus.CounterVec // op
3639

@@ -41,6 +44,14 @@ type Metrics struct {
4144

4245
func NewMetrics(registerer prometheus.Registerer) (*Metrics, error) {
4346
m := &Metrics{
47+
RTTCount: prometheus.NewCounter(prometheus.CounterOpts{
48+
Name: "round_trip_count",
49+
Help: "number of RTT samples taken (n)",
50+
}),
51+
RTTSum: prometheus.NewGauge(prometheus.GaugeOpts{
52+
Name: "round_trip_sum",
53+
Help: "sum of RTT samples taken (ms)",
54+
}),
4455
ClockSkewCount: prometheus.NewCounter(prometheus.CounterOpts{
4556
Name: "clock_skew_count",
4657
Help: "number of handshake timestamps inspected (n)",
@@ -83,6 +94,8 @@ func NewMetrics(registerer prometheus.Registerer) (*Metrics, error) {
8394
),
8495
}
8596
return m, errors.Join(
97+
registerer.Register(m.RTTCount),
98+
registerer.Register(m.RTTSum),
8699
registerer.Register(m.ClockSkewCount),
87100
registerer.Register(m.ClockSkewSum),
88101
registerer.Register(m.NumFailedToParse),

network/peer/peer.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ type peer struct {
191191
// Must only be accessed atomically
192192
lastSent, lastReceived int64
193193

194+
// lastPingSent is the milliseconds since 1970-01-01 UTC when the last ping was sent
195+
lastPingSent int64
196+
194197
// getPeerListChan signals that we should attempt to send a GetPeerList to
195198
// this peer
196199
getPeerListChan chan struct{}
@@ -613,6 +616,10 @@ func (p *peer) writeMessage(writer io.Writer, msg message.OutboundMessage) {
613616
return
614617
}
615618

619+
if msg.Op() == message.PingOp {
620+
atomic.StoreInt64(&p.lastPingSent, p.Clock.Time().UnixMilli())
621+
}
622+
616623
// Write the message
617624
var buf net.Buffers = [][]byte{msgLenBytes[:], msgBytes}
618625
if _, err := io.CopyN(writer, &buf, int64(wrappers.IntLen+msgLen)); err != nil {
@@ -819,7 +826,23 @@ func (p *peer) getUptime() uint32 {
819826
return primaryUptimePercent
820827
}
821828

822-
func (*peer) handlePong(*p2p.Pong) {}
829+
func (p *peer) handlePong(*p2p.Pong) {
830+
pingSent := atomic.SwapInt64(&p.lastPingSent, 0)
831+
if pingSent == 0 {
832+
p.Log.Debug(malformedMessageLog,
833+
zap.Stringer("nodeID", p.id),
834+
zap.Stringer("messageOp", message.PongOp),
835+
zap.String("reason", "received unexpected pong"),
836+
)
837+
p.StartClose()
838+
return
839+
}
840+
841+
elapsed := p.Clock.Time().UnixMilli() - pingSent
842+
843+
p.Metrics.RTTCount.Inc()
844+
p.Metrics.RTTSum.Add(float64(elapsed))
845+
}
823846

824847
func (p *peer) handleHandshake(msg *p2p.Handshake) {
825848
if p.gotHandshake.Get() {

0 commit comments

Comments
 (0)