Skip to content

Commit

Permalink
Metricbeat: Use system-wide ticks to calculate docker cpu usage (#6608)
Browse files Browse the repository at this point in the history
This implementation is more coherent with `docker stats`
implementation. It is based in the ratio between the number of CPU
ticks of the container and the number of ticks of the whole system
during a fixed period of time. Previous implementation considered
that the whole system had a cpu time of a second during each
measurement, what is not always 100% accurate.

New implementation also takes into account the number of CPUs, as other
tools like `top` or `docker stats` do, so if a container is using 25%
of CPU time of a 4 CPUs machine, the reported percentage will be 100%,
with a maximum of 400% total usage if it is using all the CPU time.

These changes can modify the resulting values of docker CPU metrics.
  • Loading branch information
jsoriano authored and ruflin committed Mar 28, 2018
1 parent 91b2f8e commit fa39f47
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 45 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ https://github.com/elastic/beats/compare/v6.0.0-beta2...master[Check the HEAD di
- Add config option for windows/perfmon metricset to ignore non existent counters. {pull}6432[6432]
- Support apache status pages for versions older than 2.4.16. {pull}6450[6450]
- Add support for huge pages on Linux. {pull}6436[6436]
- Refactor docker CPU calculations to be more consistent with `docker stats`. {pull}6608[6608]

*Packetbeat*

Expand Down
8 changes: 5 additions & 3 deletions metricbeat/docs/fields.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -2150,15 +2150,15 @@ type: scaled_float
format: percentage
The system kernel consumed by the Docker server.
Percentage of time in kernel space.
[float]
=== `docker.cpu.kernel.ticks`
type: long
CPU kernel ticks.
CPU ticks in kernel space.
[float]
Expand All @@ -2168,6 +2168,7 @@ type: scaled_float
format: percentage
Percentage of total CPU time in the system.
[float]
Expand All @@ -2185,14 +2186,15 @@ type: scaled_float
format: percentage
Percentage of time in user space.
[float]
=== `docker.cpu.user.ticks`
type: long
CPU user ticks
CPU ticks in user space.
[float]
Expand Down
8 changes: 5 additions & 3 deletions metricbeat/module/docker/cpu/_meta/fields.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@
type: scaled_float
format: percentage
description: >
The system kernel consumed by the Docker server.
Percentage of time in kernel space.
- name: kernel.ticks
type: long
description: >
CPU kernel ticks.
CPU ticks in kernel space.
- name: system.pct
type: scaled_float
format: percentage
description: >
Percentage of total CPU time in the system.
- name: system.ticks
type: long
description: >
Expand All @@ -25,10 +26,11 @@
type: scaled_float
format: percentage
description: >
Percentage of time in user space.
- name: user.ticks
type: long
description: >
CPU user ticks
CPU ticks in user space.
- name: total.pct
type: scaled_float
format: percentage
Expand Down
45 changes: 32 additions & 13 deletions metricbeat/module/docker/cpu/cpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,26 @@ import (
"github.com/docker/docker/api/types"

"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/metricbeat/module/docker"
)

var cpuService CPUService
var statsList = make([]types.StatsJSON, 3)

func cpuUsageFor(stats types.StatsJSON) *cpuUsage {
u := cpuUsage{
Stat: &docker.Stat{Stats: stats},
systemDelta: 1000000000, // Nanoseconds in a second
}
if len(stats.CPUStats.CPUUsage.PercpuUsage) == 0 {
u.cpus = 1
}
return &u
}

func TestCPUService_PerCpuUsage(t *testing.T) {
oldPerCpuValuesTest := [][]uint64{{1, 9, 9, 5}, {1, 2, 3, 4}, {0, 0, 0, 0}}
newPerCpuValuesTest := [][]uint64{{100000001, 900000009, 900000009, 500000005}, {101, 202, 303, 404}, {0, 0, 0, 0}}
var statsList = make([]types.StatsJSON, 3)
for index := range statsList {
statsList[index].PreCPUStats.CPUUsage.PercpuUsage = oldPerCpuValuesTest[index]
statsList[index].CPUStats.CPUUsage.PercpuUsage = newPerCpuValuesTest[index]
Expand All @@ -24,16 +36,16 @@ func TestCPUService_PerCpuUsage(t *testing.T) {
expected common.MapStr
}{
{statsList[0], common.MapStr{
"0": common.MapStr{"pct": float64(0.10)},
"1": common.MapStr{"pct": float64(0.90)},
"2": common.MapStr{"pct": float64(0.90)},
"3": common.MapStr{"pct": float64(0.50)},
"0": common.MapStr{"pct": float64(0.40)},
"1": common.MapStr{"pct": float64(3.60)},
"2": common.MapStr{"pct": float64(3.60)},
"3": common.MapStr{"pct": float64(2.00)},
}},
{statsList[1], common.MapStr{
"0": common.MapStr{"pct": float64(0.0000001)},
"1": common.MapStr{"pct": float64(0.0000002)},
"2": common.MapStr{"pct": float64(0.0000003)},
"3": common.MapStr{"pct": float64(0.0000004)},
"0": common.MapStr{"pct": float64(0.0000004)},
"1": common.MapStr{"pct": float64(0.0000008)},
"2": common.MapStr{"pct": float64(0.0000012)},
"3": common.MapStr{"pct": float64(0.0000016)},
}},
{statsList[2], common.MapStr{
"0": common.MapStr{"pct": float64(0)},
Expand All @@ -43,7 +55,8 @@ func TestCPUService_PerCpuUsage(t *testing.T) {
}},
}
for _, tt := range testCase {
out := perCpuUsage(&tt.given)
usage := cpuUsageFor(tt.given)
out := usage.PerCPU()
// Remove ticks for test
for _, s := range out {
s.(common.MapStr).Delete("ticks")
Expand All @@ -57,6 +70,7 @@ func TestCPUService_PerCpuUsage(t *testing.T) {
func TestCPUService_TotalUsage(t *testing.T) {
oldTotalValuesTest := []uint64{100, 50, 10}
totalValuesTest := []uint64{2, 500000050, 10}
var statsList = make([]types.StatsJSON, 3)
for index := range statsList {
statsList[index].PreCPUStats.CPUUsage.TotalUsage = oldTotalValuesTest[index]
statsList[index].CPUStats.CPUUsage.TotalUsage = totalValuesTest[index]
Expand All @@ -70,7 +84,8 @@ func TestCPUService_TotalUsage(t *testing.T) {
{statsList[2], 0},
}
for _, tt := range testCase {
out := totalUsage(&tt.given)
usage := cpuUsageFor(tt.given)
out := usage.Total()
if tt.expected != out {
t.Errorf("totalUsage(%v) => %v, want %v", tt.given.CPUStats.CPUUsage.TotalUsage, out, tt.expected)
}
Expand All @@ -80,6 +95,7 @@ func TestCPUService_TotalUsage(t *testing.T) {
func TestCPUService_UsageInKernelmode(t *testing.T) {
usageOldValuesTest := []uint64{100, 10, 500000050}
usageValuesTest := []uint64{3, 500000010, 500000050}
var statsList = make([]types.StatsJSON, 3)
for index := range statsList {
statsList[index].PreCPUStats.CPUUsage.UsageInKernelmode = usageOldValuesTest[index]
statsList[index].CPUStats.CPUUsage.UsageInKernelmode = usageValuesTest[index]
Expand All @@ -93,7 +109,8 @@ func TestCPUService_UsageInKernelmode(t *testing.T) {
{statsList[2], 0},
}
for _, tt := range testCase {
out := usageInKernelmode(&tt.given)
usage := cpuUsageFor(tt.given)
out := usage.InKernelMode()
if out != tt.expected {
t.Errorf("usageInKernelmode(%v) => %v, want %v", tt.given.CPUStats.CPUUsage.UsageInKernelmode, out, tt.expected)
}
Expand All @@ -103,6 +120,7 @@ func TestCPUService_UsageInKernelmode(t *testing.T) {
func TestCPUService_UsageInUsermode(t *testing.T) {
usageOldValuesTest := []uint64{0, 1965, 500}
usageValuesTest := []uint64{500000000, 325, 1000000500}
var statsList = make([]types.StatsJSON, 3)
for index := range statsList {
statsList[index].PreCPUStats.CPUUsage.UsageInUsermode = usageOldValuesTest[index]
statsList[index].CPUStats.CPUUsage.UsageInUsermode = usageValuesTest[index]
Expand All @@ -116,7 +134,8 @@ func TestCPUService_UsageInUsermode(t *testing.T) {
{statsList[2], 1},
}
for _, tt := range testCase {
out := usageInUsermode(&tt.given)
usage := cpuUsageFor(tt.given)
out := usage.InUserMode()
if out != tt.expected {
t.Errorf("usageInUsermode(%v) => %v, want %v", tt.given.CPUStats.CPUUsage.UsageInUsermode, out, tt.expected)
}
Expand Down
79 changes: 53 additions & 26 deletions metricbeat/module/docker/cpu/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ package cpu
import (
"strconv"

"github.com/docker/docker/api/types"

"github.com/elastic/beats/libbeat/common"
"github.com/elastic/beats/libbeat/logp"
"github.com/elastic/beats/metricbeat/module/docker"
Expand Down Expand Up @@ -40,61 +38,90 @@ func (c *CPUService) getCPUStatsList(rawStats []docker.Stat, dedot bool) []CPUSt
}

func (c *CPUService) getCPUStats(myRawStat *docker.Stat, dedot bool) CPUStats {
usage := cpuUsage{Stat: myRawStat}

return CPUStats{
Time: common.Time(myRawStat.Stats.Read),
Container: docker.NewContainer(myRawStat.Container, dedot),
PerCpuUsage: perCpuUsage(&myRawStat.Stats),
TotalUsage: totalUsage(&myRawStat.Stats),
PerCpuUsage: usage.PerCPU(),
TotalUsage: usage.Total(),
UsageInKernelmode: myRawStat.Stats.CPUStats.CPUUsage.UsageInKernelmode,
UsageInKernelmodePercentage: usageInKernelmode(&myRawStat.Stats),
UsageInKernelmodePercentage: usage.InKernelMode(),
UsageInUsermode: myRawStat.Stats.CPUStats.CPUUsage.UsageInUsermode,
UsageInUsermodePercentage: usageInUsermode(&myRawStat.Stats),
UsageInUsermodePercentage: usage.InUserMode(),
SystemUsage: myRawStat.Stats.CPUStats.SystemUsage,
SystemUsagePercentage: systemUsage(&myRawStat.Stats),
SystemUsagePercentage: usage.System(),
}
}

func perCpuUsage(stats *types.StatsJSON) common.MapStr {
// TODO: These helper should be merged with the cpu helper in system/cpu

type cpuUsage struct {
*docker.Stat

cpus int
systemDelta uint64
}

func (u *cpuUsage) CPUs() int {
if u.cpus == 0 {
u.cpus = len(u.Stats.CPUStats.CPUUsage.PercpuUsage)
}
return u.cpus
}

func (u *cpuUsage) SystemDelta() uint64 {
if u.systemDelta == 0 {
u.systemDelta = u.Stats.CPUStats.SystemUsage - u.Stats.PreCPUStats.SystemUsage
}
return u.systemDelta
}

func (u *cpuUsage) PerCPU() common.MapStr {
var output common.MapStr
if len(stats.CPUStats.CPUUsage.PercpuUsage) == len(stats.PreCPUStats.CPUUsage.PercpuUsage) {
if len(u.Stats.CPUStats.CPUUsage.PercpuUsage) == len(u.Stats.PreCPUStats.CPUUsage.PercpuUsage) {
output = common.MapStr{}
for index := range stats.CPUStats.CPUUsage.PercpuUsage {
for index := range u.Stats.CPUStats.CPUUsage.PercpuUsage {
cpu := common.MapStr{}
cpu["pct"] = calculateLoad(stats.CPUStats.CPUUsage.PercpuUsage[index], stats.PreCPUStats.CPUUsage.PercpuUsage[index])
cpu["ticks"] = stats.CPUStats.CPUUsage.PercpuUsage[index]
cpu["pct"] = u.calculatePercentage(
u.Stats.CPUStats.CPUUsage.PercpuUsage[index],
u.Stats.PreCPUStats.CPUUsage.PercpuUsage[index])
cpu["ticks"] = u.Stats.CPUStats.CPUUsage.PercpuUsage[index]
output[strconv.Itoa(index)] = cpu
}
}
return output
}

// TODO: These helper should be merged with the cpu helper in system/cpu

func totalUsage(stats *types.StatsJSON) float64 {
return calculateLoad(stats.CPUStats.CPUUsage.TotalUsage, stats.PreCPUStats.CPUUsage.TotalUsage)
func (u *cpuUsage) Total() float64 {
return u.calculatePercentage(u.Stats.CPUStats.CPUUsage.TotalUsage, u.Stats.PreCPUStats.CPUUsage.TotalUsage)
}

func usageInKernelmode(stats *types.StatsJSON) float64 {
return calculateLoad(stats.CPUStats.CPUUsage.UsageInKernelmode, stats.PreCPUStats.CPUUsage.UsageInKernelmode)
func (u *cpuUsage) InKernelMode() float64 {
return u.calculatePercentage(u.Stats.CPUStats.CPUUsage.UsageInKernelmode, u.Stats.PreCPUStats.CPUUsage.UsageInKernelmode)
}

func usageInUsermode(stats *types.StatsJSON) float64 {
return calculateLoad(stats.CPUStats.CPUUsage.UsageInUsermode, stats.PreCPUStats.CPUUsage.UsageInUsermode)
func (u *cpuUsage) InUserMode() float64 {
return u.calculatePercentage(u.Stats.CPUStats.CPUUsage.UsageInUsermode, u.Stats.PreCPUStats.CPUUsage.UsageInUsermode)
}

func systemUsage(stats *types.StatsJSON) float64 {
return calculateLoad(stats.CPUStats.SystemUsage, stats.PreCPUStats.SystemUsage)
func (u *cpuUsage) System() float64 {
return u.calculatePercentage(u.Stats.CPUStats.SystemUsage, u.Stats.PreCPUStats.SystemUsage)
}

// This function is meant to calculate the % CPU time change between two successive readings.
// The "oldValue" refers to the CPU statistics of the last read.
// Time here is expressed by second and not by nanoseconde.
// The main goal is to expose the %, in the same way, it's displayed by docker Client.
func calculateLoad(newValue uint64, oldValue uint64) float64 {
value := float64(newValue) - float64(oldValue)
if value < 0 {
func (u *cpuUsage) calculatePercentage(newValue uint64, oldValue uint64) float64 {
if newValue < oldValue {
logp.Err("Error calculating CPU time change for docker module: new stats value (%v) is lower than the old one(%v)", newValue, oldValue)
return -1
}
return value / float64(1000000000)
value := newValue - oldValue
if value == 0 || u.SystemDelta() == 0 {
return 0
}

return float64(uint64(u.CPUs())*value) / float64(u.SystemDelta())
}

0 comments on commit fa39f47

Please sign in to comment.