Skip to content

Commit

Permalink
Merge pull request #392 from alphagov/route-reload-duration-metrics
Browse files Browse the repository at this point in the history
Add a summary metric for route reload durations
  • Loading branch information
richardTowers authored Aug 29, 2023
2 parents 511e043 + 624060b commit da612d8
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 18 deletions.
25 changes: 12 additions & 13 deletions lib/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ var (
[]string{"host"},
)

routeReloadCountMetric = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "router_route_reload_total",
Help: "Total number of attempts to reload the routing table",
},
)

routeReloadErrorCountMetric = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "router_route_reload_error_total",
Help: "Number of failed attempts to reload the routing table",
routeReloadDurationMetric = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "router_route_reload_duration_seconds",
Help: "Histogram of route reload durations in seconds",
Objectives: map[float64]float64{
0.5: 0.01,
0.9: 0.01,
0.95: 0.01,
0.99: 0.005,
},
},
[]string{"success"},
)

routesCountMetric = prometheus.NewGauge(
Expand All @@ -40,8 +40,7 @@ var (
func registerMetrics(r prometheus.Registerer) {
r.MustRegister(
internalServerErrorCountMetric,
routeReloadCountMetric,
routeReloadErrorCountMetric,
routeReloadDurationMetric,
routesCountMetric,
)
handlers.RegisterMetrics(r)
Expand Down
14 changes: 9 additions & 5 deletions lib/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"net/http"
"net/url"
"os"
"strconv"
"sync"
"time"

Expand Down Expand Up @@ -213,21 +214,24 @@ type mongoDatabase interface {
// create a new proxy mux, load applications (backends) and routes into it, and
// then flip the "mux" pointer in the Router.
func (rt *Router) reloadRoutes(db *mgo.Database, currentOptime bson.MongoTimestamp) {
var success bool
timer := prometheus.NewTimer(prometheus.ObserverFunc(func(v float64) {
labels := prometheus.Labels{"success": strconv.FormatBool(success)}
routeReloadDurationMetric.With(labels).Observe(v)
}))
defer func() {
// increment this metric regardless of whether the route reload succeeded
routeReloadCountMetric.Inc()

success = true
if r := recover(); r != nil {
success = false
logWarn("router: recovered from panic in reloadRoutes:", r)
logInfo("router: original routes have not been modified")
errorMessage := fmt.Sprintf("panic: %v", r)
err := logger.RecoveredError{ErrorMessage: errorMessage}
logger.NotifySentry(logger.ReportableError{Error: err})

routeReloadErrorCountMetric.Inc()
} else {
rt.mongoReadToOptime = currentOptime
}
timer.ObserveDuration()
}()

logInfo("router: reloading routes")
Expand Down

0 comments on commit da612d8

Please sign in to comment.