Skip to content

Commit

Permalink
Fix supervisord collector (prometheus#978)
Browse files Browse the repository at this point in the history
* Replace supervisord xmlrpc library
* Use `github.com/mattn/go-xmlrpc` that doesn't leak goroutines.
* Fix uptime metric

* Use Prometheus best practices for uptime metric.
  * Use "start time" rather than "uptime".
  * Don't emit a start time if the process is down.
* Add changelog entry.
* Add example compatibility rules.

Signed-off-by: Ben Kochie <superq@gmail.com>
  • Loading branch information
SuperQ authored and oblitorum committed Apr 9, 2024
1 parent 354db1d commit 586fa74
Show file tree
Hide file tree
Showing 9 changed files with 496 additions and 29 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

**Breaking changes**

supvervisord collector reports "start_time_seconds" rather than "uptime"

* [CHANGE] Filter out non-installed units when collecting all systemd units #1011
* [FEATURE] Collect NRefused property for systemd socket units (available as of systemd v239)
* [FEATURE] Collect NRestarts property for systemd service units
Expand All @@ -10,6 +12,8 @@
* [ENHANCEMENT]
* [BUGFIX]

* [BUGFIX] Fix goroutine leak in supervisord collector

## 0.16.0 / 2018-05-15

**Breaking changes**
Expand Down
55 changes: 37 additions & 18 deletions collector/supervisord.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
package collector

import (
"github.com/kolo/xmlrpc"
"fmt"

"github.com/mattn/go-xmlrpc"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
"gopkg.in/alecthomas/kingpin.v2"
Expand All @@ -27,11 +29,10 @@ var (
)

type supervisordCollector struct {
client *xmlrpc.Client
upDesc *prometheus.Desc
stateDesc *prometheus.Desc
exitStatusDesc *prometheus.Desc
uptimeDesc *prometheus.Desc
startTimeDesc *prometheus.Desc
}

func init() {
Expand All @@ -40,17 +41,11 @@ func init() {

// NewSupervisordCollector returns a new Collector exposing supervisord statistics.
func NewSupervisordCollector() (Collector, error) {
client, err := xmlrpc.NewClient(*supervisordURL, nil)
if err != nil {
return nil, err
}

var (
subsystem = "supervisord"
labelNames = []string{"name", "group"}
)
return &supervisordCollector{
client: client,
upDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "up"),
"Process Up",
Expand All @@ -69,9 +64,9 @@ func NewSupervisordCollector() (Collector, error) {
labelNames,
nil,
),
uptimeDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "uptime"),
"Process Uptime",
startTimeDesc: prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, "start_time_seconds"),
"Process start time",
labelNames,
nil,
),
Expand All @@ -98,7 +93,7 @@ func (c *supervisordCollector) isRunning(state int) bool {
}

func (c *supervisordCollector) Update(ch chan<- prometheus.Metric) error {
var infos []struct {
var info struct {
Name string `xmlrpc:"name"`
Group string `xmlrpc:"group"`
Start int `xmlrpc:"start"`
Expand All @@ -112,21 +107,45 @@ func (c *supervisordCollector) Update(ch chan<- prometheus.Metric) error {
StderrLogfile string `xmlrcp:"stderr_logfile"`
PID int `xmlrpc:"pid"`
}
if err := c.client.Call("supervisor.getAllProcessInfo", nil, &infos); err != nil {
return err

res, err := xmlrpc.Call(*supervisordURL, "supervisor.getAllProcessInfo")
if err != nil {
return fmt.Errorf("unable to call supervisord: %s", err)
}
for _, info := range infos {

for _, p := range res.(xmlrpc.Array) {
for k, v := range p.(xmlrpc.Struct) {
switch k {
case "name":
info.Name = v.(string)
case "group":
info.Group = v.(string)
case "start":
info.Start = v.(int)
case "stop":
info.Stop = v.(int)
case "now":
info.Now = v.(int)
case "state":
info.State = v.(int)
case "statename":
info.StateName = v.(string)
case "exitstatus":
info.ExitStatus = v.(int)
case "pid":
info.PID = v.(int)
}
}
labels := []string{info.Name, info.Group}

ch <- prometheus.MustNewConstMetric(c.stateDesc, prometheus.GaugeValue, float64(info.State), labels...)
ch <- prometheus.MustNewConstMetric(c.exitStatusDesc, prometheus.GaugeValue, float64(info.ExitStatus), labels...)

if c.isRunning(info.State) {
ch <- prometheus.MustNewConstMetric(c.upDesc, prometheus.GaugeValue, 1, labels...)
ch <- prometheus.MustNewConstMetric(c.uptimeDesc, prometheus.CounterValue, float64(info.Now-info.Start), labels...)
ch <- prometheus.MustNewConstMetric(c.startTimeDesc, prometheus.CounterValue, float64(info.Start), labels...)
} else {
ch <- prometheus.MustNewConstMetric(c.upDesc, prometheus.GaugeValue, 0, labels...)
ch <- prometheus.MustNewConstMetric(c.uptimeDesc, prometheus.CounterValue, 0, labels...)
}
log.Debugf("%s:%s is %s on pid %d", info.Group, info.Name, info.StateName, info.PID)
}
Expand Down
5 changes: 5 additions & 0 deletions docs/example-17-compatibility-rules-new-to-old.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
groups:
- name: node_exporter-17-supervisord
rules:
- record: node_supervisord_start_time_seconds
expr: node_supervisord_uptime + time()
5 changes: 5 additions & 0 deletions docs/example-17-compatibility-rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
groups:
- name: node_exporter-17-supervisord
rules:
- record: node_supervisord_uptime
expr: time() - node_supervisord_start_time_seconds
21 changes: 21 additions & 0 deletions vendor/github.com/mattn/go-xmlrpc/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 48 additions & 0 deletions vendor/github.com/mattn/go-xmlrpc/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 586fa74

Please sign in to comment.