Skip to content

Commit 56c6763

Browse files
authored
fix: ensure metrics collection is actually disabled with metricsInterval=0s (#2330)
Also guard against: - `metricsInterval:null` resulting in metrics being enabled without any log message, which is perhaps slightly surprising. - `metricsInterval:-1s` (any negative value) resulting in metrics being collected *as fast as possible* via `setInterval(collectAllTheMetrics, -1000)`. An invalid value for this config var, and the other "POSTIVE_TIME_OPTS", will result in (a) a log.warning and (b) falling back to the default value. A slight semantic change on the internal Metrics object means that the `getOrCreate...()` metric functions now return undefined when metrics are disabled via `metricsInterval`. This doesn't affect any public API.
1 parent aa357f0 commit 56c6763

File tree

6 files changed

+137
-31
lines changed

6 files changed

+137
-31
lines changed

CHANGELOG.asciidoc

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,29 @@ Notes:
2929
=== Node.js Agent version 3.x
3030
3131
32+
==== Unreleased
33+
34+
[float]
35+
===== Breaking changes
36+
37+
[float]
38+
===== Features
39+
40+
[float]
41+
===== Bug fixes
42+
43+
* Guard against a negative value of `metricsInterval`, which can lead to
44+
high CPU usage as metrics are collected as fast as possible. Also ensure
45+
no metrics collection can happen if `metricsInterval="0s"` as intended.
46+
Before this change it was possible for some metric collection to still
47+
happen, even though none would be reported. ({pull}2330[#2330])
48+
+
49+
This change also guards against negative and invalid values in the following
50+
configuration options: `abortedErrorThreshold`, `apiRequestTime`, and
51+
`serverTimeout`. If an invalid value is given, then will fallback to their
52+
default value.
53+
54+
3255
[[release-notes-3.21.1]]
3356
==== 3.21.1 2021/09/16
3457
@@ -51,16 +74,16 @@ Notes:
5174
Specific transaction/span/error fields (see the list below) will be truncated
5275
at this number of unicode characters. ({pull}2193[#2193], {issues}1921[#1921])
5376
+
54-
The `errorMessageMaxLength` configuration option is now *deprecated*, but
55-
still supported. Users should switch to using `longFieldMaxLength. If
56-
`errorMessageMaxLength` is not specified, truncation of error messages will
57-
now use the `longFieldMaxLength` value.
77+
The `errorMessageMaxLength` configuration option is now *deprecated*, but
78+
still supported. Users should switch to using `longFieldMaxLength`. If
79+
`errorMessageMaxLength` is not specified, truncation of error messages will
80+
now use the `longFieldMaxLength` value.
5881
+
59-
Note that ultimately the maximum length of any tracing field is limited by the
60-
{apm-server-ref-v}/configuration-process.html#max_event_size[`max_event_size`]
61-
configured for the receiving APM server.
82+
Note that ultimately the maximum length of any tracing field is limited by the
83+
{apm-server-ref-v}/configuration-process.html#max_event_size[`max_event_size`]
84+
configured for the receiving APM server.
6285
+
63-
The fields affected by `longFieldMaxLength` are:
86+
The fields affected by `longFieldMaxLength` are:
6487
+
6588
** `transaction.context.request.body`, `error.context.request.body` - Before
6689
this change these fields were not truncated.

docs/configuration.asciidoc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -948,15 +948,14 @@ it will be parsed out of the `/proc/self/cgroup` file.
948948
==== `metricsInterval`
949949

950950
* *Type:* String
951-
* *Default:* `30s`
951+
* *Default:* `"30s"`
952952
* *Env:* `ELASTIC_APM_METRICS_INTERVAL`
953953

954954
Specify the interval for reporting metrics to APM Server.
955955
The interval should be in seconds,
956956
or should include a time suffix.
957957

958-
To disable metrics reporting,
959-
set the interval to `0`.
958+
To disable metrics reporting, set the interval to `"0s"`.
960959

961960
[[metrics-limit]]
962961
==== `metricsLimit`

lib/config.js

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,14 @@ var NUM_OPTS = [
197197
'transactionSampleRate'
198198
]
199199

200-
var TIME_OPTS = [
200+
var POSITIVE_TIME_OPTS = [
201201
'abortedErrorThreshold',
202202
'apiRequestTime',
203203
'metricsInterval',
204-
'serverTimeout',
204+
'serverTimeout'
205+
]
206+
// Time options allowing negative values.
207+
var TIME_OPTS = [
205208
'spanFramesMinDuration'
206209
]
207210

@@ -400,6 +403,7 @@ function normalize (opts, logger) {
400403
normalizeNumbers(opts)
401404
normalizeBytes(opts)
402405
normalizeArrays(opts)
406+
normalizePositiveTime(opts, logger)
403407
normalizeTime(opts)
404408
normalizeBools(opts, logger)
405409
normalizeIgnoreOptions(opts)
@@ -512,9 +516,24 @@ function normalizeBytes (opts) {
512516
}
513517
}
514518

519+
function normalizePositiveTime (opts, logger) {
520+
for (const key of POSITIVE_TIME_OPTS) {
521+
if (key in opts) {
522+
let val = secondsFromTimeStr(String(opts[key]))
523+
if (val === null) {
524+
const def = DEFAULTS[key]
525+
logger.warn('invalid time value "%s" for "%s" config option: using default "%s"',
526+
opts[key], key, def)
527+
val = secondsFromTimeStr(def)
528+
}
529+
opts[key] = val
530+
}
531+
}
532+
}
533+
515534
function normalizeTime (opts) {
516535
for (const key of TIME_OPTS) {
517-
if (key in opts) opts[key] = toSeconds(String(opts[key]))
536+
if (key in opts) opts[key] = secondsFromTimeStr(String(opts[key]), true)
518537
}
519538
}
520539

@@ -594,14 +613,19 @@ function bytes (input) {
594613
}
595614
}
596615

597-
function toSeconds (value) {
598-
var matches = /^(-)?(\d+)(m|ms|s)?$/.exec(value)
599-
if (!matches) return null
616+
function secondsFromTimeStr (value, allowNegative) {
617+
let matches
618+
if (allowNegative) {
619+
matches = /^(-?\d+)(m|ms|s)?$/.exec(value)
620+
} else {
621+
matches = /^(\d+)(m|ms|s)?$/.exec(value)
622+
}
623+
if (!matches) {
624+
return null
625+
}
600626

601-
var negate = matches[1]
602-
var amount = Number(matches[2])
603-
if (negate) amount = -amount
604-
var scale = matches[3]
627+
var amount = Number(matches[1])
628+
var scale = matches[2]
605629

606630
if (scale === 'm') {
607631
amount *= 60
@@ -738,4 +762,7 @@ module.exports.INTAKE_STRING_MAX_SIZE = INTAKE_STRING_MAX_SIZE
738762
module.exports.CAPTURE_ERROR_LOG_STACK_TRACES_NEVER = CAPTURE_ERROR_LOG_STACK_TRACES_NEVER
739763
module.exports.CAPTURE_ERROR_LOG_STACK_TRACES_MESSAGES = CAPTURE_ERROR_LOG_STACK_TRACES_MESSAGES
740764
module.exports.CAPTURE_ERROR_LOG_STACK_TRACES_ALWAYS = CAPTURE_ERROR_LOG_STACK_TRACES_ALWAYS
765+
766+
// The following are exported for tests.
741767
module.exports.DEFAULTS = DEFAULTS
768+
module.exports.secondsFromTimeStr = secondsFromTimeStr

lib/instrumentation/modules/aws-sdk/sqs.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ function recordMetrics (queueName, data, agent) {
9292
}
9393
if (!queueMetrics.get(queueName)) {
9494
const collector = agent._metrics.createQueueMetricsCollector(queueName)
95+
if (!collector) {
96+
return
97+
}
9598
queueMetrics.set(queueName, collector)
9699
}
97100
const metrics = queueMetrics.get(queueName)

lib/metrics/index.js

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,16 @@ class Metrics {
2424
start (refTimers) {
2525
const metricsInterval = this[agentSymbol]._conf.metricsInterval
2626
const enabled = metricsInterval !== 0 && !this[agentSymbol]._conf.disableSend
27-
this[registrySymbol] = new MetricsRegistry(this[agentSymbol], {
28-
reporterOptions: {
29-
defaultReportingIntervalInSeconds: metricsInterval,
30-
enabled: enabled,
31-
unrefTimers: !refTimers,
32-
logger: new NoopLogger()
33-
}
34-
})
27+
if (enabled) {
28+
this[registrySymbol] = new MetricsRegistry(this[agentSymbol], {
29+
reporterOptions: {
30+
defaultReportingIntervalInSeconds: metricsInterval,
31+
enabled: enabled,
32+
unrefTimers: !refTimers,
33+
logger: new NoopLogger()
34+
}
35+
})
36+
}
3537
}
3638

3739
stop () {

test/config.test.js

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,14 +354,66 @@ bytesValues.forEach(function (key) {
354354
})
355355
})
356356

357-
var timeValues = [
357+
var POSITIVE_TIME_OPTS = [
358358
'abortedErrorThreshold',
359359
'apiRequestTime',
360360
'metricsInterval',
361361
'serverTimeout'
362362
]
363+
// Time options allowing negative values.
364+
var TIME_OPTS = [
365+
'spanFramesMinDuration'
366+
]
367+
368+
POSITIVE_TIME_OPTS.forEach(function (key) {
369+
test(key + ' should guard against a negative time', function (t) {
370+
var agent = new Agent()
371+
var logger = new CaptureLogger()
372+
agent.start(Object.assign(
373+
{},
374+
agentOptsNoopTransport,
375+
{
376+
[key]: '-1s',
377+
logger
378+
}
379+
))
380+
381+
t.strictEqual(agent._conf[key], config.secondsFromTimeStr(config.DEFAULTS[key]),
382+
'fell back to default value')
383+
const warning = logger.calls[0]
384+
t.equal(warning.type, 'warn', 'got a log.warn')
385+
t.ok(warning.message.indexOf('-1s') !== -1, 'warning message includes the invalid value')
386+
t.ok(warning.message.indexOf(key) !== -1, 'warning message includes the invalid key')
387+
388+
agent.destroy()
389+
t.end()
390+
})
391+
392+
test(key + ' should guard against a bogus non-time', function (t) {
393+
var agent = new Agent()
394+
var logger = new CaptureLogger()
395+
agent.start(Object.assign(
396+
{},
397+
agentOptsNoopTransport,
398+
{
399+
[key]: 'bogusvalue',
400+
logger
401+
}
402+
))
403+
404+
t.strictEqual(agent._conf[key], config.secondsFromTimeStr(config.DEFAULTS[key]),
405+
'fell back to default value')
406+
const warning = logger.calls[0]
407+
t.equal(warning.type, 'warn', 'got a log.warn')
408+
t.ok(warning.message.indexOf('bogusvalue') !== -1, 'warning message includes the invalid value')
409+
t.ok(warning.message.indexOf(key) !== -1, 'warning message includes the invalid key')
410+
411+
agent.destroy()
412+
t.end()
413+
})
414+
})
363415

364-
timeValues.forEach(function (key) {
416+
POSITIVE_TIME_OPTS.concat(TIME_OPTS).forEach(function (key) {
365417
test(key + ' should convert minutes to seconds', function (t) {
366418
var agent = new Agent()
367419
var opts = {}

0 commit comments

Comments
 (0)