Skip to content

Commit 3984ca5

Browse files
committed
Make tests more robust and always run native and non-native tests
In addition it does not propagate confusing Node.js event loop delay metrics (zero values).
1 parent 5fa89ca commit 3984ca5

File tree

2 files changed

+43
-43
lines changed

2 files changed

+43
-43
lines changed

packages/dd-trace/src/runtime_metrics/runtime_metrics.js

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ const { NODE_MAJOR } = require('../../../../version')
1616
const DD_RUNTIME_METRICS_FLUSH_INTERVAL = getEnvironmentVariable('DD_RUNTIME_METRICS_FLUSH_INTERVAL') ?? '10000'
1717
const INTERVAL = Number.parseInt(DD_RUNTIME_METRICS_FLUSH_INTERVAL, 10)
1818

19-
const eventLoopDelayResolution = 5
19+
const eventLoopDelayResolution = 4
2020

2121
let nativeMetrics = null
2222
let gcObserver = null
@@ -196,19 +196,22 @@ function captureEventLoopDelay () {
196196
if (eventLoopDelayObserver.count !== 0) {
197197
const minimum = eventLoopDelayResolution * 1e6
198198
const avg = Math.max(eventLoopDelayObserver.mean - minimum, 0)
199-
200199
const sum = Math.round(avg * eventLoopDelayObserver.count)
201-
// Normalize the metrics to the same format as the native metrics.
202-
const stats = {
203-
min: Math.max(eventLoopDelayObserver.min - minimum, 0),
204-
max: Math.max(eventLoopDelayObserver.max - minimum, 0),
205-
sum,
206-
total: sum,
207-
avg,
208-
count: eventLoopDelayObserver.count,
209-
p95: Math.max(eventLoopDelayObserver.percentile(95) - minimum, 0)
200+
201+
if (sum !== 0) {
202+
// Normalize the metrics to the same format as the native metrics.
203+
const stats = {
204+
min: Math.max(eventLoopDelayObserver.min - minimum, 0),
205+
max: Math.max(eventLoopDelayObserver.max - minimum, 0),
206+
sum,
207+
total: sum,
208+
avg,
209+
count: eventLoopDelayObserver.count,
210+
p95: Math.max(eventLoopDelayObserver.percentile(95) - minimum, 0)
211+
}
212+
213+
histogram('runtime.node.event_loop.delay', stats)
210214
}
211-
histogram('runtime.node.event_loop.delay', stats)
212215
}
213216
eventLoopDelayObserver = monitorEventLoopDelay({ resolution: eventLoopDelayResolution })
214217
eventLoopDelayObserver.enable()
@@ -290,7 +293,7 @@ function captureNativeMetrics (trackEventLoop, trackGc) {
290293
client.gauge('runtime.node.cpu.user', userPercent.toFixed(2))
291294
client.gauge('runtime.node.cpu.total', totalPercent.toFixed(2))
292295

293-
if (trackEventLoop && stats.eventLoop.count !== 0) {
296+
if (trackEventLoop) {
294297
histogram('runtime.node.event_loop.delay', stats.eventLoop)
295298
}
296299

@@ -315,6 +318,9 @@ function captureNativeMetrics (trackEventLoop, trackGc) {
315318
}
316319

317320
function histogram (name, stats, tag) {
321+
if (stats.count === 0) {
322+
return
323+
}
318324
client.gauge(`${name}.min`, stats.min, tag)
319325
client.gauge(`${name}.max`, stats.max, tag)
320326
client.increment(`${name}.sum`, stats.sum, tag)

packages/dd-trace/test/runtime_metrics.spec.js

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ function createGarbage (count = 50) {
2929

3030
[true, false].forEach((nativeMetrics) => {
3131
describe(`runtimeMetrics ${nativeMetrics ? 'with' : 'without'} native metrics`, () => {
32-
3332
suiteDescribe('runtimeMetrics (proxy)', () => {
3433
let runtimeMetrics
3534
let proxy
@@ -54,18 +53,9 @@ function createGarbage (count = 50) {
5453
decrement () {}
5554
})
5655

57-
const proxiedObject = {
56+
proxy = proxyquire('../src/runtime_metrics', {
5857
'./runtime_metrics': runtimeMetrics
59-
}
60-
if (!nativeMetrics) {
61-
proxiedObject['@datadog/native-metrics'] = {
62-
start () {
63-
throw new Error('Native metrics are not supported in this environment')
64-
},
65-
}
66-
}
67-
68-
proxy = proxyquire('../src/runtime_metrics', proxiedObject)
58+
})
6959
})
7060

7161
it('should be noop when disabled', () => {
@@ -381,7 +371,7 @@ function createGarbage (count = 50) {
381371

382372
// Wait for event loop delay observer to trigger.
383373
let startTime = Date.now()
384-
let waitTime = 10
374+
let waitTime = 40
385375
while (Date.now() - startTime < waitTime) {
386376
// Need ticks for the event loop delay
387377
await setTimeout(1)
@@ -405,7 +395,7 @@ function createGarbage (count = 50) {
405395

406396
// Wait for GC observer to trigger.
407397
startTime = Date.now()
408-
waitTime = 10
398+
waitTime = 40
409399
while (Date.now() - startTime < waitTime) {
410400
// Need ticks for the event loop delay
411401
await setTimeout(1)
@@ -445,6 +435,8 @@ function createGarbage (count = 50) {
445435
clock.tick(10000) // Second collection with delta
446436
clock.tick(10000) // Second collection with delta
447437

438+
performance.eventLoopUtilization.restore()
439+
448440
const eluCalls = client.gauge.getCalls().filter(call =>
449441
call.args[0] === 'runtime.node.event_loop.utilization'
450442
)
@@ -505,7 +497,10 @@ function createGarbage (count = 50) {
505497
assert.match(stringValue, /^\d+(\.\d{1,2})?$/)
506498
const number = Number(stringValue)
507499
if (metric === 'runtime.node.cpu.user') {
508-
assert(number >= 1, `${metric} sanity check failed (increase CPU load above with more ticks): ${number}`)
500+
assert(
501+
number >= 1,
502+
`${metric} sanity check failed (increase CPU load above with more ticks): ${number}`
503+
)
509504
userPercent = number
510505
}
511506
if (metric === 'runtime.node.cpu.system') {
@@ -562,14 +557,14 @@ function createGarbage (count = 50) {
562557
it('should show increasing uptime over time', () => {
563558
const startPerformanceNow = performance.now()
564559
clock.tick(10000)
565-
sinon.stub(performance, 'now').returns(startPerformanceNow + 10000)
566-
const firstUptimeCalls = client.gauge.getCalls().filter(call => call.args[0] === 'runtime.node.process.uptime')
560+
const firstUptimeCalls = client.gauge.getCalls()
561+
.filter(call => call.args[0] === 'runtime.node.process.uptime')
567562
const firstUptime = firstUptimeCalls[0].args[1]
568563

569564
client.gauge.resetHistory()
565+
sinon.stub(performance, 'now').returns(startPerformanceNow + 10000)
570566
clock.tick(10000) // Advance another 10 seconds
571567
performance.now.restore()
572-
sinon.stub(performance, 'now').returns(startPerformanceNow + 20001)
573568

574569
let nextUptimeCall = client.gauge.getCalls().filter(call => call.args[0] === 'runtime.node.process.uptime')
575570
assert.strictEqual(nextUptimeCall.length, 1)
@@ -579,17 +574,16 @@ function createGarbage (count = 50) {
579574
assert.strictEqual(nextUptime - firstUptime, 10)
580575
client.gauge.resetHistory()
581576

577+
sinon.stub(performance, 'now').returns(startPerformanceNow + 20001)
582578
clock.tick(10000) // Advance another 10 seconds
583579
performance.now.restore()
584-
sinon.stub(performance, 'now').returns(startPerformanceNow + 30003)
585580

586581
nextUptimeCall = client.gauge.getCalls().filter(call => call.args[0] === 'runtime.node.process.uptime')
587582
assert.strictEqual(nextUptimeCall.length, 1)
588583
nextUptime = nextUptimeCall[0].args[1]
589584

590585
// Uptime should be 10 seconds more
591586
assert.strictEqual(nextUptime - firstUptime, 20)
592-
performance.now.restore()
593587
})
594588
})
595589

@@ -607,14 +601,14 @@ function createGarbage (count = 50) {
607601
clock.tick(10000)
608602

609603
const metrics = client.gauge.getCalls().reduce((acc, call) => {
610-
acc.add(call.args[0])
604+
acc.set(call.args[0], call.args[1])
611605
return acc
612-
}, new Set())
606+
}, new Map())
613607

614608
// If event loop count or gc count is zero, the metrics are not reported.
615-
assert.strictEqual(metrics.size, nativeMetrics ? 27 : 22)
609+
assert.strictEqual(metrics.size, 22)
616610
assert.strictEqual(client.histogram.getCalls().length, 0)
617-
assert.strictEqual(client.increment.getCalls().length, nativeMetrics ? 3 : 0)
611+
assert.strictEqual(client.increment.getCalls().length, 0)
618612
}
619613
})
620614

@@ -629,6 +623,10 @@ function createGarbage (count = 50) {
629623

630624
clock.tick(10000)
631625

626+
process.memoryUsage.restore()
627+
os.totalmem.restore()
628+
os.freemem.restore()
629+
632630
const metrics = client.gauge.getCalls().reduce((acc, call) => {
633631
acc[call.args[0]] = call.args[1]
634632
return acc
@@ -637,13 +635,9 @@ function createGarbage (count = 50) {
637635
assert.strictEqual(metrics['runtime.node.mem.heap_total'], stats.heapTotal)
638636
assert.strictEqual(metrics['runtime.node.mem.heap_used'], stats.heapUsed)
639637
assert.strictEqual(metrics['runtime.node.mem.rss'], stats.rss)
640-
assert.strictEqual(metrics['runtime.node.mem.total'], os.totalmem())
641-
assert.strictEqual(metrics['runtime.node.mem.free'], os.freemem())
638+
assert.strictEqual(metrics['runtime.node.mem.total'], totalmem)
639+
assert.strictEqual(metrics['runtime.node.mem.free'], freemem)
642640
assert.strictEqual(metrics['runtime.node.mem.external'], stats.external)
643-
644-
process.memoryUsage.restore()
645-
os.totalmem.restore()
646-
os.freemem.restore()
647641
})
648642
})
649643

0 commit comments

Comments
 (0)