diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 645d611e603..adec037b8c5 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -71,6 +71,7 @@ https://github.com/elastic/beats/compare/v6.0.0-beta2...master[Check the HEAD di - Add config option for windows/perfmon metricset to ignore non existent counters. {pull}6432[6432] - Support apache status pages for versions older than 2.4.16. {pull}6450[6450] - Add support for huge pages on Linux. {pull}6436[6436] +- Refactor docker CPU calculations to be more consistent with `docker stats`. {pull}6608[6608] *Packetbeat* diff --git a/metricbeat/docs/fields.asciidoc b/metricbeat/docs/fields.asciidoc index fb40077bd69..b03695bdfec 100644 --- a/metricbeat/docs/fields.asciidoc +++ b/metricbeat/docs/fields.asciidoc @@ -2150,7 +2150,7 @@ type: scaled_float format: percentage -The system kernel consumed by the Docker server. +Percentage of time in kernel space. [float] @@ -2158,7 +2158,7 @@ The system kernel consumed by the Docker server. type: long -CPU kernel ticks. +CPU ticks in kernel space. [float] @@ -2168,6 +2168,7 @@ type: scaled_float format: percentage +Percentage of total CPU time in the system. [float] @@ -2185,6 +2186,7 @@ type: scaled_float format: percentage +Percentage of time in user space. [float] @@ -2192,7 +2194,7 @@ format: percentage type: long -CPU user ticks +CPU ticks in user space. [float] diff --git a/metricbeat/module/docker/cpu/_meta/fields.yml b/metricbeat/module/docker/cpu/_meta/fields.yml index 14e2190daea..ca37183bd83 100644 --- a/metricbeat/module/docker/cpu/_meta/fields.yml +++ b/metricbeat/module/docker/cpu/_meta/fields.yml @@ -8,15 +8,16 @@ type: scaled_float format: percentage description: > - The system kernel consumed by the Docker server. + Percentage of time in kernel space. - name: kernel.ticks type: long description: > - CPU kernel ticks. + CPU ticks in kernel space. - name: system.pct type: scaled_float format: percentage description: > + Percentage of total CPU time in the system. - name: system.ticks type: long description: > @@ -25,10 +26,11 @@ type: scaled_float format: percentage description: > + Percentage of time in user space. - name: user.ticks type: long description: > - CPU user ticks + CPU ticks in user space. - name: total.pct type: scaled_float format: percentage diff --git a/metricbeat/module/docker/cpu/cpu_test.go b/metricbeat/module/docker/cpu/cpu_test.go index 0fa5a649416..db09f1a921a 100644 --- a/metricbeat/module/docker/cpu/cpu_test.go +++ b/metricbeat/module/docker/cpu/cpu_test.go @@ -7,14 +7,26 @@ import ( "github.com/docker/docker/api/types" "github.com/elastic/beats/libbeat/common" + "github.com/elastic/beats/metricbeat/module/docker" ) var cpuService CPUService -var statsList = make([]types.StatsJSON, 3) + +func cpuUsageFor(stats types.StatsJSON) *cpuUsage { + u := cpuUsage{ + Stat: &docker.Stat{Stats: stats}, + systemDelta: 1000000000, // Nanoseconds in a second + } + if len(stats.CPUStats.CPUUsage.PercpuUsage) == 0 { + u.cpus = 1 + } + return &u +} func TestCPUService_PerCpuUsage(t *testing.T) { oldPerCpuValuesTest := [][]uint64{{1, 9, 9, 5}, {1, 2, 3, 4}, {0, 0, 0, 0}} newPerCpuValuesTest := [][]uint64{{100000001, 900000009, 900000009, 500000005}, {101, 202, 303, 404}, {0, 0, 0, 0}} + var statsList = make([]types.StatsJSON, 3) for index := range statsList { statsList[index].PreCPUStats.CPUUsage.PercpuUsage = oldPerCpuValuesTest[index] statsList[index].CPUStats.CPUUsage.PercpuUsage = newPerCpuValuesTest[index] @@ -24,16 +36,16 @@ func TestCPUService_PerCpuUsage(t *testing.T) { expected common.MapStr }{ {statsList[0], common.MapStr{ - "0": common.MapStr{"pct": float64(0.10)}, - "1": common.MapStr{"pct": float64(0.90)}, - "2": common.MapStr{"pct": float64(0.90)}, - "3": common.MapStr{"pct": float64(0.50)}, + "0": common.MapStr{"pct": float64(0.40)}, + "1": common.MapStr{"pct": float64(3.60)}, + "2": common.MapStr{"pct": float64(3.60)}, + "3": common.MapStr{"pct": float64(2.00)}, }}, {statsList[1], common.MapStr{ - "0": common.MapStr{"pct": float64(0.0000001)}, - "1": common.MapStr{"pct": float64(0.0000002)}, - "2": common.MapStr{"pct": float64(0.0000003)}, - "3": common.MapStr{"pct": float64(0.0000004)}, + "0": common.MapStr{"pct": float64(0.0000004)}, + "1": common.MapStr{"pct": float64(0.0000008)}, + "2": common.MapStr{"pct": float64(0.0000012)}, + "3": common.MapStr{"pct": float64(0.0000016)}, }}, {statsList[2], common.MapStr{ "0": common.MapStr{"pct": float64(0)}, @@ -43,7 +55,8 @@ func TestCPUService_PerCpuUsage(t *testing.T) { }}, } for _, tt := range testCase { - out := perCpuUsage(&tt.given) + usage := cpuUsageFor(tt.given) + out := usage.PerCPU() // Remove ticks for test for _, s := range out { s.(common.MapStr).Delete("ticks") @@ -57,6 +70,7 @@ func TestCPUService_PerCpuUsage(t *testing.T) { func TestCPUService_TotalUsage(t *testing.T) { oldTotalValuesTest := []uint64{100, 50, 10} totalValuesTest := []uint64{2, 500000050, 10} + var statsList = make([]types.StatsJSON, 3) for index := range statsList { statsList[index].PreCPUStats.CPUUsage.TotalUsage = oldTotalValuesTest[index] statsList[index].CPUStats.CPUUsage.TotalUsage = totalValuesTest[index] @@ -70,7 +84,8 @@ func TestCPUService_TotalUsage(t *testing.T) { {statsList[2], 0}, } for _, tt := range testCase { - out := totalUsage(&tt.given) + usage := cpuUsageFor(tt.given) + out := usage.Total() if tt.expected != out { t.Errorf("totalUsage(%v) => %v, want %v", tt.given.CPUStats.CPUUsage.TotalUsage, out, tt.expected) } @@ -80,6 +95,7 @@ func TestCPUService_TotalUsage(t *testing.T) { func TestCPUService_UsageInKernelmode(t *testing.T) { usageOldValuesTest := []uint64{100, 10, 500000050} usageValuesTest := []uint64{3, 500000010, 500000050} + var statsList = make([]types.StatsJSON, 3) for index := range statsList { statsList[index].PreCPUStats.CPUUsage.UsageInKernelmode = usageOldValuesTest[index] statsList[index].CPUStats.CPUUsage.UsageInKernelmode = usageValuesTest[index] @@ -93,7 +109,8 @@ func TestCPUService_UsageInKernelmode(t *testing.T) { {statsList[2], 0}, } for _, tt := range testCase { - out := usageInKernelmode(&tt.given) + usage := cpuUsageFor(tt.given) + out := usage.InKernelMode() if out != tt.expected { t.Errorf("usageInKernelmode(%v) => %v, want %v", tt.given.CPUStats.CPUUsage.UsageInKernelmode, out, tt.expected) } @@ -103,6 +120,7 @@ func TestCPUService_UsageInKernelmode(t *testing.T) { func TestCPUService_UsageInUsermode(t *testing.T) { usageOldValuesTest := []uint64{0, 1965, 500} usageValuesTest := []uint64{500000000, 325, 1000000500} + var statsList = make([]types.StatsJSON, 3) for index := range statsList { statsList[index].PreCPUStats.CPUUsage.UsageInUsermode = usageOldValuesTest[index] statsList[index].CPUStats.CPUUsage.UsageInUsermode = usageValuesTest[index] @@ -116,7 +134,8 @@ func TestCPUService_UsageInUsermode(t *testing.T) { {statsList[2], 1}, } for _, tt := range testCase { - out := usageInUsermode(&tt.given) + usage := cpuUsageFor(tt.given) + out := usage.InUserMode() if out != tt.expected { t.Errorf("usageInUsermode(%v) => %v, want %v", tt.given.CPUStats.CPUUsage.UsageInUsermode, out, tt.expected) } diff --git a/metricbeat/module/docker/cpu/helper.go b/metricbeat/module/docker/cpu/helper.go index 7a283b85a42..0d61b78d299 100644 --- a/metricbeat/module/docker/cpu/helper.go +++ b/metricbeat/module/docker/cpu/helper.go @@ -3,8 +3,6 @@ package cpu import ( "strconv" - "github.com/docker/docker/api/types" - "github.com/elastic/beats/libbeat/common" "github.com/elastic/beats/libbeat/logp" "github.com/elastic/beats/metricbeat/module/docker" @@ -40,61 +38,90 @@ func (c *CPUService) getCPUStatsList(rawStats []docker.Stat, dedot bool) []CPUSt } func (c *CPUService) getCPUStats(myRawStat *docker.Stat, dedot bool) CPUStats { + usage := cpuUsage{Stat: myRawStat} + return CPUStats{ Time: common.Time(myRawStat.Stats.Read), Container: docker.NewContainer(myRawStat.Container, dedot), - PerCpuUsage: perCpuUsage(&myRawStat.Stats), - TotalUsage: totalUsage(&myRawStat.Stats), + PerCpuUsage: usage.PerCPU(), + TotalUsage: usage.Total(), UsageInKernelmode: myRawStat.Stats.CPUStats.CPUUsage.UsageInKernelmode, - UsageInKernelmodePercentage: usageInKernelmode(&myRawStat.Stats), + UsageInKernelmodePercentage: usage.InKernelMode(), UsageInUsermode: myRawStat.Stats.CPUStats.CPUUsage.UsageInUsermode, - UsageInUsermodePercentage: usageInUsermode(&myRawStat.Stats), + UsageInUsermodePercentage: usage.InUserMode(), SystemUsage: myRawStat.Stats.CPUStats.SystemUsage, - SystemUsagePercentage: systemUsage(&myRawStat.Stats), + SystemUsagePercentage: usage.System(), } } -func perCpuUsage(stats *types.StatsJSON) common.MapStr { +// TODO: These helper should be merged with the cpu helper in system/cpu + +type cpuUsage struct { + *docker.Stat + + cpus int + systemDelta uint64 +} + +func (u *cpuUsage) CPUs() int { + if u.cpus == 0 { + u.cpus = len(u.Stats.CPUStats.CPUUsage.PercpuUsage) + } + return u.cpus +} + +func (u *cpuUsage) SystemDelta() uint64 { + if u.systemDelta == 0 { + u.systemDelta = u.Stats.CPUStats.SystemUsage - u.Stats.PreCPUStats.SystemUsage + } + return u.systemDelta +} + +func (u *cpuUsage) PerCPU() common.MapStr { var output common.MapStr - if len(stats.CPUStats.CPUUsage.PercpuUsage) == len(stats.PreCPUStats.CPUUsage.PercpuUsage) { + if len(u.Stats.CPUStats.CPUUsage.PercpuUsage) == len(u.Stats.PreCPUStats.CPUUsage.PercpuUsage) { output = common.MapStr{} - for index := range stats.CPUStats.CPUUsage.PercpuUsage { + for index := range u.Stats.CPUStats.CPUUsage.PercpuUsage { cpu := common.MapStr{} - cpu["pct"] = calculateLoad(stats.CPUStats.CPUUsage.PercpuUsage[index], stats.PreCPUStats.CPUUsage.PercpuUsage[index]) - cpu["ticks"] = stats.CPUStats.CPUUsage.PercpuUsage[index] + cpu["pct"] = u.calculatePercentage( + u.Stats.CPUStats.CPUUsage.PercpuUsage[index], + u.Stats.PreCPUStats.CPUUsage.PercpuUsage[index]) + cpu["ticks"] = u.Stats.CPUStats.CPUUsage.PercpuUsage[index] output[strconv.Itoa(index)] = cpu } } return output } -// TODO: These helper should be merged with the cpu helper in system/cpu - -func totalUsage(stats *types.StatsJSON) float64 { - return calculateLoad(stats.CPUStats.CPUUsage.TotalUsage, stats.PreCPUStats.CPUUsage.TotalUsage) +func (u *cpuUsage) Total() float64 { + return u.calculatePercentage(u.Stats.CPUStats.CPUUsage.TotalUsage, u.Stats.PreCPUStats.CPUUsage.TotalUsage) } -func usageInKernelmode(stats *types.StatsJSON) float64 { - return calculateLoad(stats.CPUStats.CPUUsage.UsageInKernelmode, stats.PreCPUStats.CPUUsage.UsageInKernelmode) +func (u *cpuUsage) InKernelMode() float64 { + return u.calculatePercentage(u.Stats.CPUStats.CPUUsage.UsageInKernelmode, u.Stats.PreCPUStats.CPUUsage.UsageInKernelmode) } -func usageInUsermode(stats *types.StatsJSON) float64 { - return calculateLoad(stats.CPUStats.CPUUsage.UsageInUsermode, stats.PreCPUStats.CPUUsage.UsageInUsermode) +func (u *cpuUsage) InUserMode() float64 { + return u.calculatePercentage(u.Stats.CPUStats.CPUUsage.UsageInUsermode, u.Stats.PreCPUStats.CPUUsage.UsageInUsermode) } -func systemUsage(stats *types.StatsJSON) float64 { - return calculateLoad(stats.CPUStats.SystemUsage, stats.PreCPUStats.SystemUsage) +func (u *cpuUsage) System() float64 { + return u.calculatePercentage(u.Stats.CPUStats.SystemUsage, u.Stats.PreCPUStats.SystemUsage) } // This function is meant to calculate the % CPU time change between two successive readings. // The "oldValue" refers to the CPU statistics of the last read. // Time here is expressed by second and not by nanoseconde. // The main goal is to expose the %, in the same way, it's displayed by docker Client. -func calculateLoad(newValue uint64, oldValue uint64) float64 { - value := float64(newValue) - float64(oldValue) - if value < 0 { +func (u *cpuUsage) calculatePercentage(newValue uint64, oldValue uint64) float64 { + if newValue < oldValue { logp.Err("Error calculating CPU time change for docker module: new stats value (%v) is lower than the old one(%v)", newValue, oldValue) return -1 } - return value / float64(1000000000) + value := newValue - oldValue + if value == 0 || u.SystemDelta() == 0 { + return 0 + } + + return float64(uint64(u.CPUs())*value) / float64(u.SystemDelta()) }