From dea747c0af6a955ecdba6bf313350cc86706e7a7 Mon Sep 17 00:00:00 2001 From: Brian Candler Date: Wed, 16 Aug 2023 20:03:29 +0100 Subject: [PATCH] Permit comma-separated list of module names in "module" parameter Fixes #731 --- collector/collector.go | 90 ++++++++++++++++++++++++------------------ main.go | 36 +++++++++++------ 2 files changed, 75 insertions(+), 51 deletions(-) diff --git a/collector/collector.go b/collector/collector.go index 1b62288b..0c9bc106 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -359,7 +359,7 @@ type Collector struct { ctx context.Context target string auth *config.Auth - module *config.Module + modules []*config.Module logger log.Logger metrics internalMetrics } @@ -403,9 +403,9 @@ func newInternalMetrics(reg prometheus.Registerer) internalMetrics { } } -func New(ctx context.Context, target string, auth *config.Auth, module *config.Module, logger log.Logger, reg prometheus.Registerer) *Collector { +func New(ctx context.Context, target string, auth *config.Auth, modules []*config.Module, logger log.Logger, reg prometheus.Registerer) *Collector { internalMetrics := newInternalMetrics(reg) - return &Collector{ctx: ctx, target: target, auth: auth, module: module, logger: logger, metrics: internalMetrics} + return &Collector{ctx: ctx, target: target, auth: auth, modules: modules, logger: logger, metrics: internalMetrics} } // Describe implements Prometheus.Collector. @@ -415,56 +415,68 @@ func (c Collector) Describe(ch chan<- *prometheus.Desc) { // Collect implements Prometheus.Collector. func (c Collector) Collect(ch chan<- prometheus.Metric) { + var totalScrapeDuration float64 + var totalPackets, totalRetries, totalPdus uint64 start := time.Now() - results, err := ScrapeTarget(c.ctx, c.target, c.auth, c.module, c.logger, c.metrics) - if err != nil { - level.Info(c.logger).Log("msg", "Error scraping target", "err", err) - ch <- prometheus.NewInvalidMetric(prometheus.NewDesc("snmp_error", "Error scraping target", nil, nil), err) - return + for _, module := range c.modules { + startThisModule := time.Now() + results, err := ScrapeTarget(c.ctx, c.target, c.auth, module, c.logger, c.metrics) + if err != nil { + level.Info(c.logger).Log("msg", "Error scraping target", "err", err) + ch <- prometheus.NewInvalidMetric(prometheus.NewDesc("snmp_error", "Error scraping target", nil, nil), err) + return + } + totalScrapeDuration += time.Since(startThisModule).Seconds() + totalPackets += results.packets + totalRetries += results.retries + totalPdus += uint64(len(results.pdus)) + oidToPdu := make(map[string]gosnmp.SnmpPDU, len(results.pdus)) + for _, pdu := range results.pdus { + oidToPdu[pdu.Name[1:]] = pdu + } + + metricTree := buildMetricTree(module.Metrics) + // Look for metrics that match each pdu. + PduLoop: + for oid, pdu := range oidToPdu { + head := metricTree + oidList := oidToList(oid) + for i, o := range oidList { + var ok bool + head, ok = head.children[o] + if !ok { + continue PduLoop + } + if head.metric != nil { + // Found a match. + samples := pduToSamples(oidList[i+1:], &pdu, head.metric, oidToPdu, c.logger, c.metrics) + for _, sample := range samples { + // TODO: if multiple modules are invoked, and return duplicate metrics, + // should we skip them? Otherwise we get a 500 error: + // "collected metric "foo" {...} was collected before with the same name and label values" + ch <- sample + } + break + } + } + } } ch <- prometheus.MustNewConstMetric( prometheus.NewDesc("snmp_scrape_walk_duration_seconds", "Time SNMP walk/bulkwalk took.", nil, nil), prometheus.GaugeValue, - time.Since(start).Seconds()) + totalScrapeDuration) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc("snmp_scrape_packets_sent", "Packets sent for get, bulkget, and walk; including retries.", nil, nil), prometheus.GaugeValue, - float64(results.packets)) + float64(totalPackets)) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc("snmp_scrape_packets_retried", "Packets retried for get, bulkget, and walk.", nil, nil), prometheus.GaugeValue, - float64(results.retries)) + float64(totalRetries)) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc("snmp_scrape_pdus_returned", "PDUs returned from get, bulkget, and walk.", nil, nil), prometheus.GaugeValue, - float64(len(results.pdus))) - oidToPdu := make(map[string]gosnmp.SnmpPDU, len(results.pdus)) - for _, pdu := range results.pdus { - oidToPdu[pdu.Name[1:]] = pdu - } - - metricTree := buildMetricTree(c.module.Metrics) - // Look for metrics that match each pdu. -PduLoop: - for oid, pdu := range oidToPdu { - head := metricTree - oidList := oidToList(oid) - for i, o := range oidList { - var ok bool - head, ok = head.children[o] - if !ok { - continue PduLoop - } - if head.metric != nil { - // Found a match. - samples := pduToSamples(oidList[i+1:], &pdu, head.metric, oidToPdu, c.logger, c.metrics) - for _, sample := range samples { - ch <- sample - } - break - } - } - } + float64(totalPdus)) ch <- prometheus.MustNewConstMetric( prometheus.NewDesc("snmp_scrape_duration_seconds", "Total SNMP time scrape took (walk and processing).", nil, nil), prometheus.GaugeValue, diff --git a/main.go b/main.go index c28316f4..c361dc22 100644 --- a/main.go +++ b/main.go @@ -19,6 +19,7 @@ import ( _ "net/http/pprof" "os" "os/signal" + "strings" "sync" "syscall" "time" @@ -94,43 +95,54 @@ func handler(w http.ResponseWriter, r *http.Request, logger log.Logger) { authName = "public_v2" } - moduleName := query.Get("module") + // Accept comma-separated list: module=foo,bar + // TODO: should be also accept module=foo&module=bar (and combinations)? + moduleParam := query.Get("module") if len(query["module"]) > 1 { http.Error(w, "'module' parameter must only be specified once", http.StatusBadRequest) snmpRequestErrors.Inc() return } - if moduleName == "" { - moduleName = "if_mib" + if moduleParam == "" { + moduleParam = "if_mib" } + // TODO: should we reject duplicates? + moduleNames := strings.Split(moduleParam, ",") + + modules := make([]*config.Module, 0, len(moduleNames)) sc.RLock() auth, authOk := sc.C.Auths[authName] - module, moduleOk := sc.C.Modules[moduleName] - sc.RUnlock() if !authOk { + sc.RUnlock() http.Error(w, fmt.Sprintf("Unknown auth '%s'", authName), http.StatusBadRequest) snmpRequestErrors.Inc() return } - if !moduleOk { - http.Error(w, fmt.Sprintf("Unknown module '%s'", moduleName), http.StatusBadRequest) - snmpRequestErrors.Inc() - return + for _, moduleName := range moduleNames { + module, moduleOk := sc.C.Modules[moduleName] + if !moduleOk { + sc.RUnlock() + http.Error(w, fmt.Sprintf("Unknown module '%s'", moduleName), http.StatusBadRequest) + snmpRequestErrors.Inc() + return + } + modules = append(modules, module) } + sc.RUnlock() - logger = log.With(logger, "auth", authName, "module", moduleName, "target", target) + logger = log.With(logger, "auth", authName, "module", moduleParam, "target", target) level.Debug(logger).Log("msg", "Starting scrape") start := time.Now() registry := prometheus.NewRegistry() - c := collector.New(r.Context(), target, auth, module, logger, registry) + c := collector.New(r.Context(), target, auth, modules, logger, registry) registry.MustRegister(c) // Delegate http serving to Prometheus client library, which will call collector.Collect. h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{}) h.ServeHTTP(w, r) duration := time.Since(start).Seconds() - snmpDuration.WithLabelValues(authName, moduleName).Observe(duration) + snmpDuration.WithLabelValues(authName, moduleParam).Observe(duration) level.Debug(logger).Log("msg", "Finished scrape", "duration_seconds", duration) }