Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix nagios parser to parse all perfdata and report info message. #5601

Merged
merged 6 commits into from
Mar 25, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 33 additions & 51 deletions plugins/inputs/exec/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"runtime"
"strings"
"sync"
"syscall"
"time"

"github.com/kballard/go-shellquote"
Expand Down Expand Up @@ -66,26 +65,6 @@ type Runner interface {

type CommandRunner struct{}

func AddNagiosState(exitCode error, acc telegraf.Accumulator) error {
nagiosState := 0
if exitCode != nil {
exiterr, ok := exitCode.(*exec.ExitError)
if ok {
status, ok := exiterr.Sys().(syscall.WaitStatus)
if ok {
nagiosState = status.ExitStatus()
} else {
return fmt.Errorf("exec: unable to get nagios plugin exit code")
}
} else {
return fmt.Errorf("exec: unable to get nagios plugin exit code")
}
}
fields := map[string]interface{}{"state": nagiosState}
acc.AddFields("nagios_state", fields, nil)
return nil
}

func (c CommandRunner) Run(
e *Exec,
command string,
Expand All @@ -105,43 +84,46 @@ func (c CommandRunner) Run(
cmd.Stdout = &out
cmd.Stderr = &stderr

if err := internal.RunTimeout(cmd, e.Timeout.Duration); err != nil {
switch e.parser.(type) {
case *nagios.NagiosParser:
AddNagiosState(err, acc)
default:
var errMessage = ""
if stderr.Len() > 0 {
stderr = removeCarriageReturns(stderr)
// Limit the number of bytes.
didTruncate := false
if stderr.Len() > MaxStderrBytes {
stderr.Truncate(MaxStderrBytes)
_, isNagiosParser := e.parser.(*nagios.NagiosParser)

err = internal.RunTimeout(cmd, e.Timeout.Duration)
nagiosErr := err
if err != nil && !isNagiosParser {
var errMessage = ""
if stderr.Len() > 0 {
stderr = removeCarriageReturns(stderr)
// Limit the number of bytes.
didTruncate := false
if stderr.Len() > MaxStderrBytes {
stderr.Truncate(MaxStderrBytes)
didTruncate = true
}
if i := bytes.IndexByte(stderr.Bytes(), '\n'); i > 0 {
// Only show truncation if the newline wasn't the last character.
if i < stderr.Len()-1 {
didTruncate = true
}
if i := bytes.IndexByte(stderr.Bytes(), '\n'); i > 0 {
// Only show truncation if the newline wasn't the last character.
if i < stderr.Len()-1 {
didTruncate = true
}
stderr.Truncate(i)
}
if didTruncate {
stderr.WriteString("...")
}

errMessage = fmt.Sprintf(": %s", stderr.String())
stderr.Truncate(i)
}
return nil, fmt.Errorf("exec: %s for command '%s'%s", err, command, errMessage)
}
} else {
switch e.parser.(type) {
case *nagios.NagiosParser:
AddNagiosState(nil, acc)
if didTruncate {
stderr.WriteString("...")
}

errMessage = fmt.Sprintf(": %s", stderr.String())
}
return nil, fmt.Errorf("exec: %s for command '%s'%s", err, command, errMessage)
}

out = removeCarriageReturns(out)

if isNagiosParser {
exitCode, err := nagios.GetExitCode(nagiosErr)
if err != nil {
return nil, fmt.Errorf("exec: get exit code: %s", err)
}
nagios.AppendExitCode(&out, exitCode)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I like injecting the return code into the parser data, is there a reason this needs to be changed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like the state, service_output, long_service_output semantically belong together and should be a part of the same measurement.

The current parser interface doesn't allow to pass any extra information to the parser, whilst the service_output and the long_service_output are known during the parsing time only.

The parser will properly parse bytes for which the nagios.AppendExitCode(...) wasn't called, it will assume the default state to be 0.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not my primary concern but wouldn't the state be set incorrectly depending on how the output ends? Perhaps we could add the state to the telegraf.Metric after it has been returned from the parser?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Because we encode the state in the exec plugin, no matter how the output ends, the parser will always properly decode the state and strip those additional bytes.

    There might be an issue, when the parser is used somewhere else. And it is my bad, because I did not properly document the behavior of the Parse method. I will add the docs, but first, could you please comment on the next point:

  • Appending the error code after we return from the parser is definitely possible. But will require me to do something like this:

// To return the exit code through the error handling mechanism.
//
type exitCode int

func (c exitCode) Error() string {}

// The callee changes
//
func (c CommandRunner) Run(
	e *Exec,
	command string,
	acc telegraf.Accumulator,
) ([]byte, error) {
	<...irrelevant...>

	out = <output from the check>

	if isNagiosParser {
		exitCode, err := nagios.GetExitCode(nagiosErr)
		if err != nil {
			return nil, fmt.Errorf("exec: get exit code: %s", err)
		}
		return out.Bytes(), exitCode(exitCode)
	}

	return out.Bytes(), nil
}

// The caller changes
//
func (e *Exec) ProcessCommand(command string, acc telegraf.Accumulator, wg *sync.WaitGroup) {
	defer wg.Done()

	out, err := e.runner.Run(e, command, acc)
        var exitCode int
	if err != nil {
		if v, ok := err.(exitCode); ok {
			exitCode = int(v)
		} else {
			acc.AddError(err)
			return
		}
	}
      

	metrics, err := e.parser.Parse(out)
	if err != nil {
		acc.AddError(err)
		return
	}
	
        if _, ok := e.parser.(*nagios.NagiosParser); ok {
                // Find the metric with name == "nagios_state" and append the "state" field to it.
                // Can be a convention that it is the last metric, or iterate through all and find it.
               m := <somehow find the metric>
               m.AddField("state", exitCode)
        }
        
        for _, metric := range metrics {
		acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time())
	}
}
  • I am relying on your expertise here. Which approach seem to be better for you?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@danielnelson, what is your take on this ^^?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be sure, neither solution is great, and if I could do it over then I would probably make nagios it's own plugin separate from exec and scrap the standalone parser.

Putting that aside though, since I don't think it is worth a breaking change... I would parse, then if this is the NagiosParser, get the error code and iterate over the metrics looking for the nagios_state metric using metric.Name(). You will also want to watch for the case where the state metric is not found, and add the field as a new metric.

One more thing, could you do me a favor and use this form for adding the metrics?

        for _, metric := range metrics {
		acc.AddMetric(metric)
	}

}

return out.Bytes(), nil
}

Expand Down
146 changes: 135 additions & 11 deletions plugins/parsers/nagios/parser.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,76 @@
package nagios

import (
"bufio"
"bytes"
"encoding/binary"
"errors"
"log"
"os/exec"
"regexp"
"strconv"
"strings"
"syscall"
"time"

"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
)

// GetExitCode get the exit code from an error value which is the result
// of running a command through exec package api.
func GetExitCode(err error) (int, error) {
if err == nil {
return 0, nil
}

ee, ok := err.(*exec.ExitError)
if !ok {
// If it is not an *exec.ExitError, then it must be
// an io error, but docs do not say anything about the
// exit code in this case.
return 0, errors.New("expected *exec.ExitError")
}

ws, ok := ee.Sys().(syscall.WaitStatus)
if !ok {
return 0, errors.New("expected syscall.WaitStatus")
}

return ws.ExitStatus(), nil
}

// AppendExitCode appends exit code to the given buffer.
// The exit code is encoded as uint64 in the little endian notation. An
// empty byte is inserted before the 8 bytes of the code.
//
// See ExtractExitCode.
func AppendExitCode(buf *bytes.Buffer, exitCode int) {
bytes := make([]byte, 8)
binary.LittleEndian.PutUint64(bytes, uint64(exitCode))

buf.WriteByte(0)
buf.Write(bytes)
}

const defaultExitCode int = 0

// ExtractExitCode extracts exit code from the given byte slice.
//
// See AppendExitCode.
func ExtractExitCode(buf []byte) ([]byte, int) {
n := len(buf)

if n < 9 {
return buf, defaultExitCode
}
if buf[n-9] == 0 {
return buf[:n-9], int(binary.LittleEndian.Uint64(buf[n-8:]))
}

return buf, defaultExitCode
}

type NagiosParser struct {
MetricName string
DefaultTags map[string]string
Expand All @@ -34,27 +93,92 @@ func (p *NagiosParser) SetDefaultTags(tags map[string]string) {
}

func (p *NagiosParser) Parse(buf []byte) ([]telegraf.Metric, error) {
ts := time.Now().UTC()

var state int
buf, state = ExtractExitCode(buf)

s := bufio.NewScanner(bytes.NewReader(buf))

var msg bytes.Buffer
var longmsg bytes.Buffer

metrics := make([]telegraf.Metric, 0)
lines := strings.Split(strings.TrimSpace(string(buf)), "\n")

for _, line := range lines {
data_splitted := strings.Split(line, "|")
// Scan the first line.
if !s.Scan() && s.Err() != nil {
return nil, s.Err()
}
parts := bytes.Split(s.Bytes(), []byte{'|'})
switch len(parts) {
case 2:
ms, err := parsePerfData(string(parts[1]), ts)
if err != nil {
log.Printf("E! [parser.nagios] failed to parse performance data: %s\n", err.Error())
}
metrics = append(metrics, ms...)
fallthrough
case 1:
msg.Write(bytes.TrimSpace(parts[0]))
default:
return nil, errors.New("illegal output format")
}

if len(data_splitted) != 2 {
// got human readable output only or bad line
continue
// Read long output.
for s.Scan() {
if bytes.Contains(s.Bytes(), []byte{'|'}) {
parts := bytes.Split(s.Bytes(), []byte{'|'})
if longmsg.Len() != 0 {
longmsg.WriteByte('\n')
}
longmsg.Write(bytes.TrimSpace(parts[0]))

ms, err := parsePerfData(string(parts[1]), ts)
if err != nil {
log.Printf("E! [parser.nagios] failed to parse performance data: %s\n", err.Error())
}
metrics = append(metrics, ms...)
break
}
if longmsg.Len() != 0 {
longmsg.WriteByte('\n')
}
m, err := parsePerfData(data_splitted[1])
longmsg.Write(bytes.TrimSpace((s.Bytes())))
}

// Parse extra performance data.
for s.Scan() {
ms, err := parsePerfData(s.Text(), ts)
if err != nil {
log.Printf("E! [parser.nagios] failed to parse performance data: %s\n", err.Error())
continue
}
metrics = append(metrics, m...)
metrics = append(metrics, ms...)
}

if s.Err() != nil {
log.Printf("D! [parser.nagios] unexpected io error: %s\n", s.Err())
}

// Create nagios state.
fields := map[string]interface{}{
"state": state,
"service_output": msg.String(),
}
if longmsg.Len() != 0 {
fields["long_service_output"] = longmsg.String()
}

m, err := metric.New("nagios_state", nil, fields, ts)
if err == nil {
metrics = append(metrics, m)
} else {
log.Printf("E! [parser.nagios] failed to add nagios_state: %s\n", err)
}

return metrics, nil
}

func parsePerfData(perfdatas string) ([]telegraf.Metric, error) {
func parsePerfData(perfdatas string, timestamp time.Time) ([]telegraf.Metric, error) {
metrics := make([]telegraf.Metric, 0)

for _, unParsedPerf := range perfSplitRegExp.FindAllString(perfdatas, -1) {
Expand Down Expand Up @@ -125,7 +249,7 @@ func parsePerfData(perfdatas string) ([]telegraf.Metric, error) {
}

// Create metric
metric, err := metric.New("nagios", tags, fields, time.Now().UTC())
metric, err := metric.New("nagios", tags, fields, timestamp)
if err != nil {
return nil, err
}
Expand Down
Loading