Skip to content

Commit

Permalink
Revert "Removed new metrics that seem to causing Prometheus to use a …
Browse files Browse the repository at this point in the history
…very lar…"
  • Loading branch information
turetske committed Nov 22, 2024
1 parent 62b413d commit e677197
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 0 deletions.
38 changes: 38 additions & 0 deletions director/director.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,17 @@ func checkRedirectQuery(query url.Values) error {

func redirectToCache(ginCtx *gin.Context) {
reqVer, service, _ := extractVersionAndService(ginCtx)
// Flag to indicate if the request was redirected to a cache
// For metric collection purposes
// see collectDirectorRedirectionMetric
redirectedToCache := true
defer func() {
if !redirectedToCache {
collectDirectorRedirectionMetric(ginCtx, "origin")
} else {
collectDirectorRedirectionMetric(ginCtx, "cache")
}
}()
err := versionCompatCheck(reqVer, service)
if err != nil {
log.Warningf("A version incompatibility was encountered while redirecting to a cache and no response was served: %v", err)
Expand Down Expand Up @@ -486,6 +497,11 @@ func redirectToCache(ginCtx *gin.Context) {
})
return
}
// At this point, the cacheAds is full of originAds
// We need to indicate that we are redirecting to an origin and not a cache
// This is for the purpose of metrics
// See collectDirectorRedirectionMetric
redirectedToCache = false
}

ctx := context.Background()
Expand Down Expand Up @@ -552,6 +568,7 @@ func redirectToCache(ginCtx *gin.Context) {

func redirectToOrigin(ginCtx *gin.Context) {
reqVer, service, _ := extractVersionAndService(ginCtx)
defer collectDirectorRedirectionMetric(ginCtx, "origin")
err := versionCompatCheck(reqVer, service)
if err != nil {
log.Warningf("A version incompatibility was encountered while redirecting to an origin and no response was served: %v", err)
Expand Down Expand Up @@ -1352,6 +1369,27 @@ func collectClientVersionMetric(reqVer *version.Version, service string) {
metrics.PelicanDirectorClientVersionTotal.With(prometheus.Labels{"version": shortendVersion, "service": service}).Inc()
}

func collectDirectorRedirectionMetric(ctx *gin.Context, destination string) {
labels := prometheus.Labels{
"destination": destination,
"status_code": strconv.Itoa(ctx.Writer.Status()),
"version": "",
}

version, _, err := extractVersionAndService(ctx)
if err != nil {
log.Warningf("Failed to extract version and service from request: %v", err)
return
}
if version != nil {
labels["version"] = version.String()
} else {
labels["version"] = "unknown"
}

metrics.PelicanDirectorRedirectionsTotal.With(labels).Inc()
}

func RegisterDirectorAPI(ctx context.Context, router *gin.RouterGroup) {
directorAPIV1 := router.Group("/api/v1.0/director")
{
Expand Down
9 changes: 9 additions & 0 deletions director/sort.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"

"github.com/pelicanplatform/pelican/metrics"
"github.com/pelicanplatform/pelican/param"
"github.com/pelicanplatform/pelican/server_structs"
)
Expand Down Expand Up @@ -171,10 +172,14 @@ func getLatLong(ctx context.Context, addr netip.Addr) (lat float64, long float64
reader := maxMindReader.Load()
if reader == nil {
err = errors.New("No GeoIP database is available")
labels["source"] = "server"
metrics.PelicanDirectorGeoIPErrors.With(labels).Inc()
return
}
record, err := reader.City(ip)
if err != nil {
labels["source"] = "server"
metrics.PelicanDirectorGeoIPErrors.With(labels).Inc()
return
}
lat = record.Location.Latitude
Expand All @@ -185,6 +190,8 @@ func getLatLong(ctx context.Context, addr netip.Addr) (lat float64, long float64
// comes from a private range.
if lat == 0 && long == 0 {
log.Warningf("GeoIP Resolution of the address %s resulted in the null lat/long. This will result in random server sorting.", ip.String())
labels["source"] = "client"
metrics.PelicanDirectorGeoIPErrors.With(labels).Inc()
}

// MaxMind provides an accuracy radius in kilometers. When it actually has no clue how to resolve a valid, public
Expand All @@ -196,6 +203,8 @@ func getLatLong(ctx context.Context, addr netip.Addr) (lat float64, long float64
"This will be treated as GeoIP resolution failure and result in random server sorting. Setting lat/long to null.", ip.String(), record.Location.AccuracyRadius)
lat = 0
long = 0
labels["source"] = "client"
metrics.PelicanDirectorGeoIPErrors.With(labels).Inc()
}

return
Expand Down
10 changes: 10 additions & 0 deletions metrics/director.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,14 @@ var (
Name: "pelican_director_client_version_total",
Help: "The total number of requests from client versions.",
}, []string{"version", "service"})

PelicanDirectorRedirectionsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "pelican_director_redirections_total",
Help: "The total number of redirections the director issued.",
}, []string{"destination", "status_code", "version"})

PelicanDirectorGeoIPErrors = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "pelican_director_geoip_errors",
Help: "The total number of errors encountered trying to resolve coordinates using the GeoIP MaxMind database",
}, []string{"source", "proj"})
)

0 comments on commit e677197

Please sign in to comment.