From 9ef3f83aca3342c71032abeb0329a82d0ae01777 Mon Sep 17 00:00:00 2001 From: Mariana Dima Date: Wed, 13 May 2020 15:19:31 +0200 Subject: [PATCH] perfmon - remove negative counter value errors from the event output (#18361) (#18449) * handle errors * update changelog * fix tests * address review (cherry picked from commit 89a4f3a44e0281386632cf825c1905b15e8e4d62) --- CHANGELOG.next.asciidoc | 1 + .../helper/windows/pdh/pdh_query_windows.go | 27 +++++++++++--- metricbeat/helper/windows/pdh/pdh_windows.go | 6 +-- .../helper/windows/pdh/pdh_windows_test.go | 3 +- metricbeat/module/windows/perfmon/data.go | 37 ++++++++++++++----- .../module/windows/perfmon/data_test.go | 2 +- .../module/windows/perfmon/perfmon_test.go | 5 ++- .../perfmon/reader_integration_test.go | 2 +- 8 files changed, 60 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 5eac615ff24..7347a683cc9 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -222,6 +222,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d - Fix storage metricset to allow config without region/zone. {issue}17623[17623] {pull}17624[17624] - Fix overflow on Prometheus rates when new buckets are added on the go. {pull}17753[17753] - Add a switch to the driver definition on SQL module to use pretty names {pull}17378[17378] +- Remove specific win32 api errors from events in perfmon. {issue}18292[18292] {pull}18361[18361] - Remove required for region/zone and make stackdriver a metricset in googlecloud. {issue}16785[16785] {pull}18398[18398] *Packetbeat* diff --git a/metricbeat/helper/windows/pdh/pdh_query_windows.go b/metricbeat/helper/windows/pdh/pdh_query_windows.go index 898d5573a60..65ad0372fcb 100644 --- a/metricbeat/helper/windows/pdh/pdh_query_windows.go +++ b/metricbeat/helper/windows/pdh/pdh_query_windows.go @@ -51,7 +51,13 @@ type Query struct { type CounterValue struct { Instance string Measurement interface{} - Err error + Err CounterValueError +} + +// CounterValueError contains the performance counter error. +type CounterValueError struct { + Error error + CStatus uint32 } // Open creates a new query. @@ -235,31 +241,40 @@ func MatchInstanceName(counterPath string) (string, error) { // getCounterValue will retrieve the counter value based on the format applied in the config options func getCounterValue(counter *Counter) CounterValue { - counterValue := CounterValue{Instance: counter.instanceName} + counterValue := CounterValue{Instance: counter.instanceName, Err: CounterValueError{CStatus: 0}} switch counter.format { case PdhFmtLong: _, value, err := PdhGetFormattedCounterValueLong(counter.handle) if err != nil { - counterValue.Err = err + counterValue.Err.Error = err + if value != nil { + counterValue.Err.CStatus = value.CStatus + } } else { counterValue.Measurement = value.Value } case PdhFmtLarge: _, value, err := PdhGetFormattedCounterValueLarge(counter.handle) if err != nil { - counterValue.Err = err + counterValue.Err.Error = err + if value != nil { + counterValue.Err.CStatus = value.CStatus + } } else { counterValue.Measurement = value.Value } case PdhFmtDouble: _, value, err := PdhGetFormattedCounterValueDouble(counter.handle) if err != nil { - counterValue.Err = err + counterValue.Err.Error = err + if value != nil { + counterValue.Err.CStatus = value.CStatus + } } else { counterValue.Measurement = value.Value } default: - counterValue.Err = errors.Errorf("initialization failed: format '%#v' "+ + counterValue.Err.Error = errors.Errorf("initialization failed: format '%#v' "+ "for instance '%s' is invalid (must be PdhFmtDouble, PdhFmtLarge or PdhFmtLong)", counter.format, counter.instanceName) } diff --git a/metricbeat/helper/windows/pdh/pdh_windows.go b/metricbeat/helper/windows/pdh/pdh_windows.go index ccfb2ea50f3..4d109549175 100644 --- a/metricbeat/helper/windows/pdh/pdh_windows.go +++ b/metricbeat/helper/windows/pdh/pdh_windows.go @@ -161,7 +161,7 @@ func PdhGetFormattedCounterValueDouble(counter PdhCounterHandle) (uint32, *PdhCo var counterType uint32 var value PdhCounterValueDouble if err := _PdhGetFormattedCounterValueDouble(counter, PdhFmtDouble|PdhFmtNoCap100, &counterType, &value); err != nil { - return 0, nil, PdhErrno(err.(syscall.Errno)) + return 0, &value, PdhErrno(err.(syscall.Errno)) } return counterType, &value, nil @@ -172,7 +172,7 @@ func PdhGetFormattedCounterValueLarge(counter PdhCounterHandle) (uint32, *PdhCou var counterType uint32 var value PdhCounterValueLarge if err := _PdhGetFormattedCounterValueLarge(counter, PdhFmtLarge|PdhFmtNoCap100, &counterType, &value); err != nil { - return 0, nil, PdhErrno(err.(syscall.Errno)) + return 0, &value, PdhErrno(err.(syscall.Errno)) } return counterType, &value, nil @@ -183,7 +183,7 @@ func PdhGetFormattedCounterValueLong(counter PdhCounterHandle) (uint32, *PdhCoun var counterType uint32 var value PdhCounterValueLong if err := _PdhGetFormattedCounterValueLong(counter, PdhFmtLong|PdhFmtNoCap100, &counterType, &value); err != nil { - return 0, nil, PdhErrno(err.(syscall.Errno)) + return 0, &value, PdhErrno(err.(syscall.Errno)) } return counterType, &value, nil diff --git a/metricbeat/helper/windows/pdh/pdh_windows_test.go b/metricbeat/helper/windows/pdh/pdh_windows_test.go index c17a68c31c3..88286751fba 100644 --- a/metricbeat/helper/windows/pdh/pdh_windows_test.go +++ b/metricbeat/helper/windows/pdh/pdh_windows_test.go @@ -51,7 +51,8 @@ func TestPdhAddCounterInvalidCounter(t *testing.T) { func TestPdhGetFormattedCounterValueInvalidCounter(t *testing.T) { counterType, counterValue, err := PdhGetFormattedCounterValueDouble(InvalidCounterHandle) assert.EqualValues(t, counterType, 0) - assert.EqualValues(t, counterValue, (*PdhCounterValueDouble)(nil)) + assert.NotNil(t, counterValue) + assert.Equal(t, counterValue.Value, float64(0)) assert.EqualValues(t, err, PDH_INVALID_HANDLE) } diff --git a/metricbeat/module/windows/perfmon/data.go b/metricbeat/module/windows/perfmon/data.go index 68de068ed05..833c5757220 100644 --- a/metricbeat/module/windows/perfmon/data.go +++ b/metricbeat/module/windows/perfmon/data.go @@ -42,13 +42,23 @@ func (re *Reader) groupToEvents(counters map[string][]pdh.CounterValue) []mb.Eve for ind, val := range values { // Some counters, such as rate counters, require two counter values in order to compute a displayable value. In this case we must call PdhCollectQueryData twice before calling PdhGetFormattedCounterValue. // For more information, see Collecting Performance Data (https://docs.microsoft.com/en-us/windows/desktop/PerfCtrs/collecting-performance-data). - if val.Err != nil && !re.executed { - re.log.Debugw("Ignoring the first measurement because the data isn't ready", - "error", val.Err, logp.Namespace("perfmon"), "query", counterPath) - continue + if val.Err.Error != nil { + if !re.executed { + re.log.Debugw("Ignoring the first measurement because the data isn't ready", + "error", val.Err.Error, logp.Namespace("perfmon"), "query", counterPath) + continue + } + // The counter has a negative value or the counter was successfully found, but the data returned is not valid. + // This error can occur if the counter value is less than the previous value. (Because counter values always increment, the counter value rolls over to zero when it reaches its maximum value.) + // This is not an error that stops the application from running successfully and a positive counter value should be retrieved in the later calls. + if val.Err.Error == pdh.PDH_CALC_NEGATIVE_VALUE || val.Err.Error == pdh.PDH_INVALID_DATA { + re.log.Debugw("Counter value retrieval returned", + "error", val.Err.Error, "cstatus", pdh.PdhErrno(val.Err.CStatus), logp.Namespace("perfmon"), "query", counterPath) + continue + } } var eventKey string - if re.config.GroupMeasurements && val.Err == nil { + if re.config.GroupMeasurements && val.Err.Error == nil { // Send measurements with the same instance label as part of the same event eventKey = val.Instance } else { @@ -60,7 +70,7 @@ func (re *Reader) groupToEvents(counters map[string][]pdh.CounterValue) []mb.Eve if _, ok := eventMap[eventKey]; !ok { eventMap[eventKey] = &mb.Event{ MetricSetFields: common.MapStr{}, - Error: errors.Wrapf(val.Err, "failed on query=%v", counterPath), + Error: errors.Wrapf(val.Err.Error, "failed on query=%v", counterPath), } if val.Instance != "" { //will ignore instance counter @@ -100,10 +110,17 @@ func (re *Reader) groupToSingleEvent(counters map[string][]pdh.CounterValue) mb. for _, val := range values { // Some counters, such as rate counters, require two counter values in order to compute a displayable value. In this case we must call PdhCollectQueryData twice before calling PdhGetFormattedCounterValue. // For more information, see Collecting Performance Data (https://docs.microsoft.com/en-us/windows/desktop/PerfCtrs/collecting-performance-data). - if val.Err != nil && !re.executed { - re.log.Debugw("Ignoring the first measurement because the data isn't ready", - "error", val.Err, logp.Namespace("perfmon"), "query", counterPath) - continue + if val.Err.Error != nil { + if !re.executed { + re.log.Debugw("Ignoring the first measurement because the data isn't ready", + "error", val.Err, logp.Namespace("perfmon"), "query", counterPath) + continue + } + if val.Err.Error == pdh.PDH_CALC_NEGATIVE_VALUE || val.Err.Error == pdh.PDH_INVALID_DATA { + re.log.Debugw("Counter value retrieval returned", + "error", val.Err.Error, "cstatus", pdh.PdhErrno(val.Err.CStatus), logp.Namespace("perfmon"), "query", counterPath) + continue + } } if val.Measurement == nil { continue diff --git a/metricbeat/module/windows/perfmon/data_test.go b/metricbeat/module/windows/perfmon/data_test.go index 616d87d686f..df23d1667ff 100644 --- a/metricbeat/module/windows/perfmon/data_test.go +++ b/metricbeat/module/windows/perfmon/data_test.go @@ -49,7 +49,7 @@ func TestGroupToEvents(t *testing.T) { { Instance: "", Measurement: 23, - Err: nil, + Err: pdh.CounterValueError{}, }, }, } diff --git a/metricbeat/module/windows/perfmon/perfmon_test.go b/metricbeat/module/windows/perfmon/perfmon_test.go index 069138a4226..cb7961c4b00 100644 --- a/metricbeat/module/windows/perfmon/perfmon_test.go +++ b/metricbeat/module/windows/perfmon/perfmon_test.go @@ -175,7 +175,7 @@ func TestQuery(t *testing.T) { t.Fatal(path[0], "not found") } - assert.NoError(t, value[0].Err) + assert.NoError(t, value[0].Err.Error) assert.Equal(t, "TestInstanceName", value[0].Instance) } @@ -444,6 +444,7 @@ func TestWildcardQuery(t *testing.T) { } config.Queries[0].Name = "Processor Information" config.Queries[0].Instance = []string{"*"} + config.Queries[0].Namespace = "metrics" config.Queries[0].Counters = []QueryCounter{ { Name: "% Processor Time", @@ -478,6 +479,7 @@ func TestWildcardQueryNoInstanceName(t *testing.T) { } config.Queries[0].Name = "Process" config.Queries[0].Instance = []string{"*"} + config.Queries[0].Namespace = "metrics" config.Queries[0].Counters = []QueryCounter{ { Name: "Private Bytes", @@ -523,6 +525,7 @@ func TestGroupByInstance(t *testing.T) { } config.Queries[0].Name = "Processor Information" config.Queries[0].Instance = []string{"_Total"} + config.Queries[0].Namespace = "metrics" config.Queries[0].Counters = []QueryCounter{ { Name: "% Processor Time", diff --git a/metricbeat/module/windows/perfmon/reader_integration_test.go b/metricbeat/module/windows/perfmon/reader_integration_test.go index fd19b1e5c09..68fb593da32 100644 --- a/metricbeat/module/windows/perfmon/reader_integration_test.go +++ b/metricbeat/module/windows/perfmon/reader_integration_test.go @@ -77,7 +77,7 @@ func TestReadSuccessfully(t *testing.T) { // For more information, see Collecting Performance Data (https://docs.microsoft.com/en-us/windows/desktop/PerfCtrs/collecting-performance-data). events, err := reader.Read() assert.Nil(t, err) - assert.NotNil(t, events) + assert.Nil(t, events) assert.Zero(t, len(events)) events, err = reader.Read() assert.Nil(t, err)