Skip to content

Commit

Permalink
Add collector for systemd service status.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason Harvey committed Oct 6, 2015
1 parent c8e3cd3 commit b65072f
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 17 deletions.
5 changes: 5 additions & 0 deletions cmd/scollector/collectors/collectors.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ var (
}
WatchProcessesDotNet = func() {}

AddSystemdServiceConfig = func(params conf.ServiceParams) error {
return fmt.Errorf("systemd service watching not implemented on this platform")
}
WatchSystemd = func() {}

KeepalivedCommunity = ""
)

Expand Down
20 changes: 14 additions & 6 deletions cmd/scollector/collectors/processes_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,15 +165,11 @@ func getLinuxProccesses() ([]*Process, error) {
}
var lps []*Process
for _, pid := range pids {
cmdline, err := ioutil.ReadFile("/proc/" + pid + "/cmdline")
if err != nil {
cl, err := getLinuxCmdline(pid)
if err != nil || cl == nil {
//Continue because the pid might not exist any more
continue
}
cl := strings.Split(string(cmdline), "\x00")
if len(cl) < 1 || len(cl[0]) == 0 {
continue
}
lp := &Process{
Pid: pid,
Command: cl[0],
Expand All @@ -186,6 +182,18 @@ func getLinuxProccesses() ([]*Process, error) {
return lps, nil
}

func getLinuxCmdline(pid string) ([]string, error) {
cmdline, err := ioutil.ReadFile("/proc/" + pid + "/cmdline")
if err != nil {
return nil, err
}
cl := strings.Split(string(cmdline), "\x00")
if len(cl) < 1 || len(cl[0]) == 0 {
return nil, nil
}
return cl, nil
}

func c_linux_processes(procs []*WatchedProc) (opentsdb.MultiDataPoint, error) {
var md opentsdb.MultiDataPoint
lps, err := getLinuxProccesses()
Expand Down
137 changes: 137 additions & 0 deletions cmd/scollector/collectors/systemd_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package collectors

import (
"fmt"
"regexp"
"strings"

"bosun.org/_third_party/github.com/coreos/go-systemd/dbus"
"bosun.org/cmd/scollector/conf"
"bosun.org/metadata"
"bosun.org/opentsdb"
"bosun.org/util"
)

type systemdServiceConfig struct {
regex *regexp.Regexp
watch bool
}

var systemdServices []*systemdServiceConfig

func init() {
// The following two func defs have no-op stubs in collectors.go, as they are
// called by main.go, but only functional on Linux environments.
WatchSystemd = func() {
collectors = append(collectors, &IntervalCollector{
F: func() (opentsdb.MultiDataPoint, error) {
return c_systemd()
},
name: "c_systemd",
})
}

AddSystemdServiceConfig = func(params conf.ServiceParams) error {
if params.Name == "" {
return fmt.Errorf("empty service Name")
}
reg, err := regexp.Compile(params.Name)
if err != nil {
return err
}
serviceConfig := systemdServiceConfig{regex: reg, watch: params.WatchProc}
systemdServices = append(systemdServices, &serviceConfig)
return nil
}
}

// c_systemd() iterates through all units provided by systemd's dbus info.
// If the unit is a service we care about, it sends service metrics.
// Also sends process data if WatchProc was set to true in the [[SystemdService]] config.
func c_systemd() (opentsdb.MultiDataPoint, error) {
conn, err := dbus.New()
if err != nil {
return nil, err
}

units, err := conn.ListUnits()
if err != nil {
return nil, err
}

var md opentsdb.MultiDataPoint
for _, unit := range units {
if strings.HasSuffix(unit.Name, ".service") {
shortName := strings.TrimSuffix(unit.Name, ".service")
for _, config := range systemdServices {
if config.regex.MatchString(shortName) {
if config.watch {
err = watchSystemdServiceProc(&md, conn, unit)
if err != nil {
return nil, err
}
}
systemdTags := opentsdb.TagSet{"name": unit.Name}
osTags := opentsdb.TagSet{"name": shortName}
Add(&md, "linux.systemd.unit.activestate", activeState[unit.ActiveState], systemdTags, metadata.Gauge, metadata.StatusCode, descActiveState)
Add(&md, "os.service.running", util.Btoi(unit.ActiveState != "active"), osTags, metadata.Gauge, metadata.Ok, "")
}
}
}
}

return md, err
}

// watchSystemdService() attempts to determine the main PID of a service and
// builds a WatchedProc{} which is then sent to linuxProcMonitor()
func watchSystemdServiceProc(md *opentsdb.MultiDataPoint, conn *dbus.Conn, unit dbus.UnitStatus) error {
// ExecMainPID can be non-running. MainPID is the pid of the running service.
mainPIDProp, err := conn.GetUnitTypeProperty(unit.Name, "Service", "MainPID")
if err != nil {
return err
}

mainPID := mainPIDProp.Value.Value().(uint32)
// MainPID is 0 if there is no running service.
if mainPID == 0 {
return nil
}

cmdline, err := getLinuxCmdline(fmt.Sprint(mainPID))
if err != nil {
return err
}
if cmdline == nil {
return nil
}

wp := WatchedProc{
Command: cmdline[0],
Name: strings.TrimSuffix(unit.Name, ".service"),
Processes: make(map[string]int),
ArgMatch: regexp.MustCompile(""),
idPool: new(idPool)}

// Since we only have one PID per service (at the moment), this is always set to 1
wp.Processes[fmt.Sprint(mainPID)] = wp.get()

if e := linuxProcMonitor(&wp, md); e != nil {
return e
}

return err
}

var activeState = map[string]int{
"active": 0,
"reloading": 1,
"inactive": 2,
"failed": 3,
"activating": 4,
"deactivating": 5,
}

const (
descActiveState = "0: active, 1: reloading, 2: inactive, 3: failed, 4: activating, 5: deactivating"
)
23 changes: 12 additions & 11 deletions cmd/scollector/conf/conf.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,18 @@ type Conf struct {
// the specified community.
KeepalivedCommunity string

HAProxy []HAProxy
SNMP []SNMP
MIBS map[string]MIB
ICMP []ICMP
Vsphere []Vsphere
AWS []AWS
Process []ProcessParams
ProcessDotNet []ProcessDotNet
HTTPUnit []HTTPUnit
Riak []Riak
Github []Github
HAProxy []HAProxy
SNMP []SNMP
MIBS map[string]MIB
ICMP []ICMP
Vsphere []Vsphere
AWS []AWS
Process []ProcessParams
SystemdService []ServiceParams
ProcessDotNet []ProcessDotNet
HTTPUnit []HTTPUnit
Riak []Riak
Github []Github
// ElasticIndexFilters takes regular expressions and excludes indicies that
// match those filters from being monitored for metrics in the elastic.indices
// namespace
Expand Down
5 changes: 5 additions & 0 deletions cmd/scollector/conf/conf_darwin.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
package conf

type ProcessParams struct{}

type ServiceParams struct {
Name string
WatchProc bool
}
5 changes: 5 additions & 0 deletions cmd/scollector/conf/conf_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,8 @@ type ProcessParams struct {
Args string
IncludeCount bool
}

type ServiceParams struct {
Name string
WatchProc bool
}
5 changes: 5 additions & 0 deletions cmd/scollector/conf/conf_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,8 @@ package conf
type ProcessParams struct {
Name string
}

type ServiceParams struct {
Name string
WatchProc bool
}
5 changes: 5 additions & 0 deletions cmd/scollector/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ func main() {
for _, p := range conf.Process {
check(collectors.AddProcessConfig(p))
}
for _, p := range conf.SystemdService {
check(collectors.AddSystemdServiceConfig(p))
}
for _, p := range conf.ProcessDotNet {
check(collectors.AddProcessDotNetConfig(p))
}
Expand All @@ -142,6 +145,8 @@ func main() {
collectors.WatchProcesses()
collectors.WatchProcessesDotNet()

collectors.WatchSystemd()

if *flagFake > 0 {
collectors.InitFake(*flagFake)
}
Expand Down

0 comments on commit b65072f

Please sign in to comment.