From 3684eea191f4cf18a9e2a04e61eea151e812a560 Mon Sep 17 00:00:00 2001 From: Craig Peterson <192540+captncraig@users.noreply.github.com> Date: Tue, 3 Oct 2023 15:49:57 -0400 Subject: [PATCH] `prometheus.operator.*` add debug endpoint to view generated scrape configs. (#5311) * add endpoint to debug generated scrape configs * changelog --- CHANGELOG.md | 2 + .../prometheus/operator/common/component.go | 38 +++++++++++++++++++ .../prometheus/operator/common/crdmanager.go | 33 ++++++++++++---- .../operator/podmonitors/operator.go | 3 +- .../prometheus/operator/probes/probes.go | 3 +- .../servicemonitors/servicemonitors.go | 3 +- component/prometheus/operator/types.go | 9 +++-- 7 files changed, 76 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce5c398936f2..c1c5ae9c6938 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -132,6 +132,8 @@ Main (unreleased) - Add new `agent_component_dependencies_wait_seconds` histogram metric and a dashboard panel that measures how long components wait to be evaluated after their dependency is updated (@thampiotr) +- Add additional endpoint to debug scrape configs generated inside `prometheus.operator.*` components (@captncraig) + - Components evaluation is now performed in parallel, reducing the impact of slow components potentially blocking the entire telemetry pipeline. The `agent_component_evaluation_seconds` metric now measures evaluation time diff --git a/component/prometheus/operator/common/component.go b/component/prometheus/operator/common/component.go index feff191557b2..cc3e44768c57 100644 --- a/component/prometheus/operator/common/component.go +++ b/component/prometheus/operator/common/component.go @@ -3,6 +3,8 @@ package common import ( "context" "fmt" + "net/http" + "strings" "sync" "time" @@ -10,6 +12,7 @@ import ( "github.com/grafana/agent/component" "github.com/grafana/agent/component/prometheus/operator" "github.com/grafana/agent/service/cluster" + "gopkg.in/yaml.v3" ) type Component struct { @@ -143,3 +146,38 @@ func (c *Component) reportHealth(err error) { } } } + +func (c *Component) Handler() http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // very simple path handling + // only responds to `/scrapeConfig/$NS/$NAME` + c.mut.RLock() + man := c.manager + c.mut.RUnlock() + path := strings.Trim(r.URL.Path, "/") + parts := strings.Split(path, "/") + if man == nil || len(parts) != 3 || parts[0] != "scrapeConfig" { + w.WriteHeader(404) + return + } + ns := parts[1] + name := parts[2] + scs := man.getScrapeConfig(ns, name) + if len(scs) == 0 { + w.WriteHeader(404) + return + } + dat, err := yaml.Marshal(scs) + if err != nil { + if _, err = w.Write([]byte(err.Error())); err != nil { + return + } + w.WriteHeader(500) + return + } + _, err = w.Write(dat) + if err != nil { + w.WriteHeader(500) + } + }) +} diff --git a/component/prometheus/operator/common/crdmanager.go b/component/prometheus/operator/common/crdmanager.go index 9c8926906a1d..7ce1aa368827 100644 --- a/component/prometheus/operator/common/crdmanager.go +++ b/component/prometheus/operator/common/crdmanager.go @@ -14,6 +14,7 @@ import ( "github.com/grafana/agent/component" "github.com/grafana/agent/component/prometheus" "github.com/grafana/agent/service/cluster" + "github.com/grafana/agent/service/http" "github.com/grafana/ckit/shard" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" @@ -215,6 +216,17 @@ func (c *crdManager) DebugInfo() interface{} { return info } +func (c *crdManager) getScrapeConfig(ns, name string) []*config.ScrapeConfig { + prefix := fmt.Sprintf("%s/%s/%s", c.kind, ns, name) + matches := []*config.ScrapeConfig{} + for k, v := range c.scrapeConfigs { + if strings.HasPrefix(k, prefix) { + matches = append(matches, v) + } + } + return matches +} + // runInformers starts all the informers that are required to discover CRDs. func (c *crdManager) runInformers(restConfig *rest.Config, ctx context.Context) error { scheme := runtime.NewScheme() @@ -358,6 +370,11 @@ func (c *crdManager) addDebugInfo(ns string, name string, err error) { } else { debug.ReconcileError = "" } + if data, err := c.opts.GetServiceData(http.ServiceName); err == nil { + if hdata, ok := data.(http.Data); ok { + debug.ScrapeConfigsURL = fmt.Sprintf("%s%s/scrapeConfig/%s/%s", hdata.HTTPListenAddr, hdata.HTTPPathForComponent(c.opts.ID), ns, name) + } + } prefix := fmt.Sprintf("%s/%s/%s", c.kind, ns, name) c.debugInfo[prefix] = debug } @@ -400,13 +417,13 @@ func (c *crdManager) onAddPodMonitor(obj interface{}) { } func (c *crdManager) onUpdatePodMonitor(oldObj, newObj interface{}) { pm := oldObj.(*promopv1.PodMonitor) - c.clearConfigs("podMonitor", pm.Namespace, pm.Name) + c.clearConfigs(pm.Namespace, pm.Name) c.addPodMonitor(newObj.(*promopv1.PodMonitor)) } func (c *crdManager) onDeletePodMonitor(obj interface{}) { pm := obj.(*promopv1.PodMonitor) - c.clearConfigs("podMonitor", pm.Namespace, pm.Name) + c.clearConfigs(pm.Namespace, pm.Name) if err := c.apply(); err != nil { level.Error(c.logger).Log("name", pm.Name, "err", err, "msg", "error applying scrape configs after deleting "+c.kind) } @@ -450,13 +467,13 @@ func (c *crdManager) onAddServiceMonitor(obj interface{}) { } func (c *crdManager) onUpdateServiceMonitor(oldObj, newObj interface{}) { pm := oldObj.(*promopv1.ServiceMonitor) - c.clearConfigs("serviceMonitor", pm.Namespace, pm.Name) + c.clearConfigs(pm.Namespace, pm.Name) c.addServiceMonitor(newObj.(*promopv1.ServiceMonitor)) } func (c *crdManager) onDeleteServiceMonitor(obj interface{}) { pm := obj.(*promopv1.ServiceMonitor) - c.clearConfigs("serviceMonitor", pm.Namespace, pm.Name) + c.clearConfigs(pm.Namespace, pm.Name) if err := c.apply(); err != nil { level.Error(c.logger).Log("name", pm.Name, "err", err, "msg", "error applying scrape configs after deleting "+c.kind) } @@ -498,22 +515,22 @@ func (c *crdManager) onAddProbe(obj interface{}) { } func (c *crdManager) onUpdateProbe(oldObj, newObj interface{}) { pm := oldObj.(*promopv1.Probe) - c.clearConfigs("probe", pm.Namespace, pm.Name) + c.clearConfigs(pm.Namespace, pm.Name) c.addProbe(newObj.(*promopv1.Probe)) } func (c *crdManager) onDeleteProbe(obj interface{}) { pm := obj.(*promopv1.Probe) - c.clearConfigs("probe", pm.Namespace, pm.Name) + c.clearConfigs(pm.Namespace, pm.Name) if err := c.apply(); err != nil { level.Error(c.logger).Log("name", pm.Name, "err", err, "msg", "error applying scrape configs after deleting "+c.kind) } } -func (c *crdManager) clearConfigs(kind, ns, name string) { +func (c *crdManager) clearConfigs(ns, name string) { c.mut.Lock() defer c.mut.Unlock() - prefix := fmt.Sprintf("%s/%s/%s", kind, ns, name) + prefix := fmt.Sprintf("%s/%s/%s", c.kind, ns, name) for k := range c.discoveryConfigs { if strings.HasPrefix(k, prefix) { delete(c.discoveryConfigs, k) diff --git a/component/prometheus/operator/podmonitors/operator.go b/component/prometheus/operator/podmonitors/operator.go index a9895511ece4..c35c277acee0 100644 --- a/component/prometheus/operator/podmonitors/operator.go +++ b/component/prometheus/operator/podmonitors/operator.go @@ -5,13 +5,14 @@ import ( "github.com/grafana/agent/component/prometheus/operator" "github.com/grafana/agent/component/prometheus/operator/common" "github.com/grafana/agent/service/cluster" + "github.com/grafana/agent/service/http" ) func init() { component.Register(component.Registration{ Name: "prometheus.operator.podmonitors", Args: operator.Arguments{}, - NeedsServices: []string{cluster.ServiceName}, + NeedsServices: []string{cluster.ServiceName, http.ServiceName}, Build: func(opts component.Options, args component.Arguments) (component.Component, error) { return common.New(opts, args, common.KindPodMonitor) diff --git a/component/prometheus/operator/probes/probes.go b/component/prometheus/operator/probes/probes.go index 219b167119ce..e8d73ef10bf6 100644 --- a/component/prometheus/operator/probes/probes.go +++ b/component/prometheus/operator/probes/probes.go @@ -5,13 +5,14 @@ import ( "github.com/grafana/agent/component/prometheus/operator" "github.com/grafana/agent/component/prometheus/operator/common" "github.com/grafana/agent/service/cluster" + "github.com/grafana/agent/service/http" ) func init() { component.Register(component.Registration{ Name: "prometheus.operator.probes", Args: operator.Arguments{}, - NeedsServices: []string{cluster.ServiceName}, + NeedsServices: []string{cluster.ServiceName, http.ServiceName}, Build: func(opts component.Options, args component.Arguments) (component.Component, error) { return common.New(opts, args, common.KindProbe) diff --git a/component/prometheus/operator/servicemonitors/servicemonitors.go b/component/prometheus/operator/servicemonitors/servicemonitors.go index beb063352b58..9abc214b969d 100644 --- a/component/prometheus/operator/servicemonitors/servicemonitors.go +++ b/component/prometheus/operator/servicemonitors/servicemonitors.go @@ -5,13 +5,14 @@ import ( "github.com/grafana/agent/component/prometheus/operator" "github.com/grafana/agent/component/prometheus/operator/common" "github.com/grafana/agent/service/cluster" + "github.com/grafana/agent/service/http" ) func init() { component.Register(component.Registration{ Name: "prometheus.operator.servicemonitors", Args: operator.Arguments{}, - NeedsServices: []string{cluster.ServiceName}, + NeedsServices: []string{cluster.ServiceName, http.ServiceName}, Build: func(opts component.Options, args component.Arguments) (component.Component, error) { return common.New(opts, args, common.KindServiceMonitor) diff --git a/component/prometheus/operator/types.go b/component/prometheus/operator/types.go index 2696f0c4fce0..b40b0f2fe70c 100644 --- a/component/prometheus/operator/types.go +++ b/component/prometheus/operator/types.go @@ -71,8 +71,9 @@ type DebugInfo struct { } type DiscoveredResource struct { - Namespace string `river:"namespace,attr"` - Name string `river:"name,attr"` - LastReconcile time.Time `river:"last_reconcile,attr,optional"` - ReconcileError string `river:"reconcile_error,attr,optional"` + Namespace string `river:"namespace,attr"` + Name string `river:"name,attr"` + LastReconcile time.Time `river:"last_reconcile,attr,optional"` + ReconcileError string `river:"reconcile_error,attr,optional"` + ScrapeConfigsURL string `river:"scrape_configs_url,attr,optional"` }