From a5bbc0680f4956cdc219c50d4f7ae5653ffe0a89 Mon Sep 17 00:00:00 2001 From: SkalaNetworks Date: Thu, 10 Oct 2024 09:52:31 +0000 Subject: [PATCH] feat(metrics): add various new metrics This commit adds new metrics to GoBGP, inspired by https://github.com/greenpau/gobgp_exporter It also uses Prometheus namespaces for cleaner refactors and the possibility of overriding the metric prefix to accomodate possible conflict with a generic one like "bgp" Signed-off-by: SkalaNetworks --- internal/pkg/metrics/metrics.go | 239 ++++++++++++++++++++++++++++---- 1 file changed, 209 insertions(+), 30 deletions(-) diff --git a/internal/pkg/metrics/metrics.go b/internal/pkg/metrics/metrics.go index efcfbb07f..8679c7af8 100644 --- a/internal/pkg/metrics/metrics.go +++ b/internal/pkg/metrics/metrics.go @@ -14,45 +14,173 @@ type bgpCollector struct { server *server.BgpServer } +const ( + // Global namespace of the metrics + namespace = "bgp" +) + var ( - peerLabels = []string{"peer"} - peerStateLabels = []string{"peer", "session_state", "admin_state"} - rfLabels = []string{"peer", "route_family"} - - bgpReceivedUpdateTotalDesc = prometheus.NewDesc("bgp_received_update_total", "Number of received BGP UPDATE messages from peer", peerLabels, nil) - bgpReceivedNotificationTotalDesc = prometheus.NewDesc("bgp_received_notification_total", "Number of received BGP NOTIFICATION messages from peer", peerLabels, nil) - bgpReceivedOpenTotalDesc = prometheus.NewDesc("bgp_received_open_total", "Number of received BGP OPEN messages from peer", peerLabels, nil) - bgpReceivedRefreshTotalDesc = prometheus.NewDesc("bgp_received_refresh_total", "Number of received BGP REFRESH messages from peer", peerLabels, nil) - bgpReceivedKeepaliveTotalDesc = prometheus.NewDesc("bgp_received_keepalive_total", "Number of received BGP KEEPALIVE messages from peer", peerLabels, nil) - bgpReceivedWithdrawUpdateTotalDesc = prometheus.NewDesc("bgp_received_withdraw_update_total", "Number of received BGP WITHDRAW-UPDATE messages from peer", peerLabels, nil) - bgpReceivedWithdrawPrefixTotalDesc = prometheus.NewDesc("bgp_received_withdraw_prefix_total", "Number of received BGP WITHDRAW-PREFIX messages from peer", peerLabels, nil) - bgpReceivedDiscardedTotalDesc = prometheus.NewDesc("bgp_received_discarded_total", "Number of discarded BGP messages from peer", peerLabels, nil) - bgpReceivedMessageTotalDesc = prometheus.NewDesc("bgp_received_message_total", "Number of received BGP messages from peer", peerLabels, nil) - - bgpSentUpdateTotalDesc = prometheus.NewDesc("bgp_sent_update_total", "Number of sent BGP UPDATE messages from peer", peerLabels, nil) - bgpSentNotificationTotalDesc = prometheus.NewDesc("bgp_sent_notification_total", "Number of sent BGP NOTIFICATION messages from peer", peerLabels, nil) - bgpSentOpenTotalDesc = prometheus.NewDesc("bgp_sent_open_total", "Number of sent BGP OPEN messages from peer", peerLabels, nil) - bgpSentRefreshTotalDesc = prometheus.NewDesc("bgp_sent_refresh_total", "Number of sent BGP REFRESH messages from peer", peerLabels, nil) - bgpSentKeepaliveTotalDesc = prometheus.NewDesc("bgp_sent_keepalive_total", "Number of sent BGP KEEPALIVE messages from peer", peerLabels, nil) - bgpSentWithdrawUpdateTotalDesc = prometheus.NewDesc("bgp_sent_withdraw_update_total", "Number of sent BGP WITHDRAW-UPDATE messages from peer", peerLabels, nil) - bgpSentWithdrawPrefixTotalDesc = prometheus.NewDesc("bgp_sent_withdraw_prefix_total", "Number of sent BGP WITHDRAW-PREFIX messages from peer", peerLabels, nil) - bgpSentDiscardedTotalDesc = prometheus.NewDesc("bgp_sent_discarded_total", "Number of discarded BGP messages from peer", peerLabels, nil) - bgpSentMessageTotalDesc = prometheus.NewDesc("bgp_sent_message_total", "Number of sent BGP messages from peer", peerLabels, nil) - - bgpPeerStateDesc = prometheus.NewDesc("bgp_peer_state", "State of the BGP session with peer", peerStateLabels, nil) + // Labels appended to the metrics + peerLabels = []string{"peer"} + peerRouterIdLabels = []string{"peer", "routerId"} + peerStateLabels = []string{"peer", "session_state", "admin_state"} + rfLabels = []string{"peer", "route_family"} + + bgpReceivedUpdateTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "update_total"), + "Number of received BGP UPDATE messages from peer", + peerLabels, nil, + ) + bgpReceivedNotificationTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "notification_total"), + "Number of received BGP NOTIFICATION messages from peer", + peerLabels, nil, + ) + bgpReceivedOpenTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "open_total"), + "Number of received BGP OPEN messages from peer", + peerLabels, nil, + ) + bgpReceivedRefreshTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "refresh_total"), + "Number of received BGP REFRESH messages from peer", + peerLabels, nil, + ) + bgpReceivedKeepaliveTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "keepalive_total"), + "Number of received BGP KEEPALIVE messages from peer", + peerLabels, nil, + ) + bgpReceivedWithdrawUpdateTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "withdraw_update_total"), + "Number of received BGP WITHDRAW-UPDATE messages from peer", + peerLabels, nil, + ) + bgpReceivedWithdrawPrefixTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "withdraw_prefix_total"), + "Number of received BGP WITHDRAW-PREFIX messages from peer", + peerLabels, nil, + ) + bgpReceivedDiscardedTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "discarded_total"), + "Number of discarded BGP messages from peer", + peerLabels, nil, + ) + bgpReceivedMessageTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "received", "message_total"), + "Number of received BGP messages from peer", + peerLabels, nil, + ) + + bgpSentUpdateTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "update_total"), + "Number of sent BGP UPDATE messages from peer", + peerLabels, nil, + ) + bgpSentNotificationTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "notification_total"), + "Number of sent BGP NOTIFICATION messages from peer", + peerLabels, nil, + ) + bgpSentOpenTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "open_total"), + "Number of sent BGP OPEN messages from peer", + peerLabels, nil, + ) + bgpSentRefreshTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "refresh_total"), + "Number of sent BGP REFRESH messages from peer", + peerLabels, nil, + ) + bgpSentKeepaliveTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "keepalive_total"), + "Number of sent BGP KEEPALIVE messages from peer", + peerLabels, nil, + ) + bgpSentWithdrawUpdateTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "withdraw_update_total"), + "Number of sent BGP WITHDRAW-UPDATE messages from peer", + peerLabels, nil, + ) + bgpSentWithdrawPrefixTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "withdraw_prefix_total"), + "Number of sent BGP WITHDRAW-PREFIX messages from peer", + peerLabels, nil, + ) + bgpSentDiscardedTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "discarded_total"), + "Number of discarded BGP messages to peer", peerLabels, + nil, + ) + bgpSentMessageTotalDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "sent", "message_total"), + "Number of sent BGP messages from peer", peerLabels, + nil, + ) + + bgpPeerOutQueueDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "out_queue_count"), + "Length of the outgoing message queue", + peerLabels, nil, + ) + bgpPeerFlopsDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "flop_count"), + "Number of flops with the peer", + peerLabels, nil, + ) + bgpPeerUptimeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "uptime"), + "For how long the peer has been in its current state", + peerLabels, nil, + ) + bgpPeerSendCommunityFlagDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "send_community"), + "BGP community with the peer", + peerLabels, nil, + ) + bgpPeerRemovePrivateAsFlagDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "remove_private_as"), + "Do we remove private ASNs from the paths sent to the peer", + peerLabels, nil, + ) + bgpPeerPasswordSetFlagDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "password_set"), + "Whether the GoBGP peer has been configured (1) for authentication or not (0)", + peerLabels, nil, + ) + bgpPeerTypeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "type"), + "Type of the BGP peer, internal (0) or external (1)", + peerLabels, nil, + ) + bgpPeerAsnDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "asn"), + "What is the AS number of the peer", + peerRouterIdLabels, nil, + ) + bgpPeerLocalAsnDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "local_asn"), + "What is the AS number presented to the peer by this router", + peerRouterIdLabels, nil, + ) + bgpPeerStateDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "peer", "state"), + "State of the BGP session with peer and its administrative state", + peerStateLabels, nil, + ) bgpRoutesReceivedDesc = prometheus.NewDesc( - "bgp_routes_received", + prometheus.BuildFQName(namespace, "routes", "received"), "Number of routes received from peer", rfLabels, nil, ) bgpRoutesAcceptedDesc = prometheus.NewDesc( - "bgp_routes_accepted", + prometheus.BuildFQName(namespace, "routes", "accepted"), "Number of routes accepted from peer", rfLabels, nil, ) bgpRoutesAdvertisedDesc = prometheus.NewDesc( - "bgp_routes_advertised", + prometheus.BuildFQName(namespace, "routes", "advertised"), "Number of routes advertised to peer", rfLabels, nil, ) @@ -83,6 +211,15 @@ func (c *bgpCollector) Describe(out chan<- *prometheus.Desc) { out <- bgpSentDiscardedTotalDesc out <- bgpSentMessageTotalDesc + out <- bgpPeerOutQueueDesc + out <- bgpPeerFlopsDesc + out <- bgpPeerUptimeDesc + out <- bgpPeerSendCommunityFlagDesc + out <- bgpPeerRemovePrivateAsFlagDesc + out <- bgpPeerPasswordSetFlagDesc + out <- bgpPeerTypeDesc + out <- bgpPeerAsnDesc + out <- bgpPeerLocalAsnDesc out <- bgpPeerStateDesc out <- bgpRoutesReceivedDesc @@ -95,12 +232,14 @@ func (c *bgpCollector) Collect(out chan<- prometheus.Metric) { err := c.server.ListPeer(context.Background(), req, func(p *api.Peer) { peerState := p.GetState() peerAddr := peerState.GetNeighborAddress() + peerTimers := p.GetTimers() msg := peerState.GetMessages() send := func(desc *prometheus.Desc, cnt uint64) { out <- prometheus.MustNewConstMetric(desc, prometheus.CounterValue, float64(cnt), peerAddr) } + // Statistics about BGP announcements we've received from our peers send(bgpReceivedUpdateTotalDesc, msg.Received.Update) send(bgpReceivedNotificationTotalDesc, msg.Received.Notification) send(bgpReceivedOpenTotalDesc, msg.Received.Open) @@ -111,16 +250,56 @@ func (c *bgpCollector) Collect(out chan<- prometheus.Metric) { send(bgpReceivedDiscardedTotalDesc, msg.Received.Discarded) send(bgpReceivedMessageTotalDesc, msg.Received.Total) + // Statistics about BGP announcements we've sent to our peers send(bgpSentUpdateTotalDesc, msg.Sent.Update) send(bgpSentNotificationTotalDesc, msg.Sent.Notification) send(bgpSentOpenTotalDesc, msg.Sent.Open) send(bgpSentRefreshTotalDesc, msg.Sent.Refresh) send(bgpSentKeepaliveTotalDesc, msg.Sent.Keepalive) - send(bgpSentWithdrawUpdateTotalDesc, uint64(msg.Sent.WithdrawUpdate)) - send(bgpSentWithdrawPrefixTotalDesc, uint64(msg.Sent.WithdrawPrefix)) + send(bgpSentWithdrawUpdateTotalDesc, msg.Sent.WithdrawUpdate) + send(bgpSentWithdrawPrefixTotalDesc, msg.Sent.WithdrawPrefix) send(bgpSentDiscardedTotalDesc, msg.Sent.Discarded) send(bgpSentMessageTotalDesc, msg.Sent.Total) + // The outbound queue message size + send(bgpPeerOutQueueDesc, uint64(peerState.GetOutQ())) + // The number of neighbor flops + send(bgpPeerFlopsDesc, uint64(peerState.GetFlops())) + // Uptime in seconds of the session + send(bgpPeerUptimeDesc, uint64(peerTimers.GetState().GetUptime().GetSeconds())) + // Whether BGP community is being sent + send(bgpPeerSendCommunityFlagDesc, uint64(peerState.GetSendCommunity())) + // Whether BGP Private AS is being removed (1) or not (0) + send(bgpPeerRemovePrivateAsFlagDesc, uint64(peerState.GetRemovePrivate())) + // Peer Type (0) for internal, (1) for external + send(bgpPeerTypeDesc, uint64(peerState.GetType())) + + // Whether authentication password is being set (1) or not (0) + passwordSetFlag := 0 + if peerState.GetAuthPassword() != "" { + passwordSetFlag = 1 + } + send(bgpPeerPasswordSetFlagDesc, uint64(passwordSetFlag)) + + // Remote peer router ID and ASN + out <- prometheus.MustNewConstMetric( + bgpPeerAsnDesc, + prometheus.GaugeValue, + float64(peerState.GetPeerAsn()), + peerAddr, + peerState.GetRouterId(), + ) + + // Local router ID and ASN advertised to peer + out <- prometheus.MustNewConstMetric( + bgpPeerLocalAsnDesc, + prometheus.GaugeValue, + float64(peerState.GetLocalAsn()), + peerAddr, + p.Transport.GetLocalAddress(), + ) + + // Session and administrative state of the peer out <- prometheus.MustNewConstMetric( bgpPeerStateDesc, prometheus.GaugeValue,