diff --git a/director/director.go b/director/director.go index 21bf7c348..37b9a37f2 100644 --- a/director/director.go +++ b/director/director.go @@ -346,6 +346,17 @@ func checkRedirectQuery(query url.Values) error { func redirectToCache(ginCtx *gin.Context) { reqVer, service, _ := extractVersionAndService(ginCtx) + // Flag to indicate if the request was redirected to a cache + // For metric collection purposes + // see collectDirectorRedirectionMetric + redirectedToCache := true + defer func() { + if !redirectedToCache { + collectDirectorRedirectionMetric(ginCtx, "origin") + } else { + collectDirectorRedirectionMetric(ginCtx, "cache") + } + }() err := versionCompatCheck(reqVer, service) if err != nil { log.Warningf("A version incompatibility was encountered while redirecting to a cache and no response was served: %v", err) @@ -486,6 +497,11 @@ func redirectToCache(ginCtx *gin.Context) { }) return } + // At this point, the cacheAds is full of originAds + // We need to indicate that we are redirecting to an origin and not a cache + // This is for the purpose of metrics + // See collectDirectorRedirectionMetric + redirectedToCache = false } ctx := context.Background() @@ -552,6 +568,7 @@ func redirectToCache(ginCtx *gin.Context) { func redirectToOrigin(ginCtx *gin.Context) { reqVer, service, _ := extractVersionAndService(ginCtx) + defer collectDirectorRedirectionMetric(ginCtx, "origin") err := versionCompatCheck(reqVer, service) if err != nil { log.Warningf("A version incompatibility was encountered while redirecting to an origin and no response was served: %v", err) @@ -1352,6 +1369,27 @@ func collectClientVersionMetric(reqVer *version.Version, service string) { metrics.PelicanDirectorClientVersionTotal.With(prometheus.Labels{"version": shortendVersion, "service": service}).Inc() } +func collectDirectorRedirectionMetric(ctx *gin.Context, destination string) { + labels := prometheus.Labels{ + "destination": destination, + "status_code": strconv.Itoa(ctx.Writer.Status()), + "version": "", + } + + version, _, err := extractVersionAndService(ctx) + if err != nil { + log.Warningf("Failed to extract version and service from request: %v", err) + return + } + if version != nil { + labels["version"] = version.String() + } else { + labels["version"] = "unknown" + } + + metrics.PelicanDirectorRedirectionsTotal.With(labels).Inc() +} + func RegisterDirectorAPI(ctx context.Context, router *gin.RouterGroup) { directorAPIV1 := router.Group("/api/v1.0/director") { diff --git a/director/sort.go b/director/sort.go index 37dab4d69..b261f6d08 100644 --- a/director/sort.go +++ b/director/sort.go @@ -34,6 +34,7 @@ import ( "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" + "github.com/pelicanplatform/pelican/metrics" "github.com/pelicanplatform/pelican/param" "github.com/pelicanplatform/pelican/server_structs" ) @@ -171,10 +172,14 @@ func getLatLong(ctx context.Context, addr netip.Addr) (lat float64, long float64 reader := maxMindReader.Load() if reader == nil { err = errors.New("No GeoIP database is available") + labels["source"] = "server" + metrics.PelicanDirectorGeoIPErrors.With(labels).Inc() return } record, err := reader.City(ip) if err != nil { + labels["source"] = "server" + metrics.PelicanDirectorGeoIPErrors.With(labels).Inc() return } lat = record.Location.Latitude @@ -185,6 +190,8 @@ func getLatLong(ctx context.Context, addr netip.Addr) (lat float64, long float64 // comes from a private range. if lat == 0 && long == 0 { log.Warningf("GeoIP Resolution of the address %s resulted in the null lat/long. This will result in random server sorting.", ip.String()) + labels["source"] = "client" + metrics.PelicanDirectorGeoIPErrors.With(labels).Inc() } // MaxMind provides an accuracy radius in kilometers. When it actually has no clue how to resolve a valid, public @@ -196,6 +203,8 @@ func getLatLong(ctx context.Context, addr netip.Addr) (lat float64, long float64 "This will be treated as GeoIP resolution failure and result in random server sorting. Setting lat/long to null.", ip.String(), record.Location.AccuracyRadius) lat = 0 long = 0 + labels["source"] = "client" + metrics.PelicanDirectorGeoIPErrors.With(labels).Inc() } return diff --git a/metrics/director.go b/metrics/director.go index 5675498c6..b5a91a4cd 100644 --- a/metrics/director.go +++ b/metrics/director.go @@ -91,4 +91,14 @@ var ( Name: "pelican_director_client_version_total", Help: "The total number of requests from client versions.", }, []string{"version", "service"}) + + PelicanDirectorRedirectionsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pelican_director_redirections_total", + Help: "The total number of redirections the director issued.", + }, []string{"destination", "status_code", "version"}) + + PelicanDirectorGeoIPErrors = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "pelican_director_geoip_errors", + Help: "The total number of errors encountered trying to resolve coordinates using the GeoIP MaxMind database", + }, []string{"source", "proj"}) )