Skip to content

Commit

Permalink
Fix CPU system plugin that get stuck after suspend
Browse files Browse the repository at this point in the history
Signed-off-by: Pierre Fersing <pierre.fersing@bleemeo.com>
  • Loading branch information
PierreF committed Oct 16, 2017
1 parent a179698 commit 57c75c5
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
- [#3136](https://github.com/influxdata/telegraf/issues/3136): Fix webhooks input address in use during reload.
- [#3258](https://github.com/influxdata/telegraf/issues/3258): Unlock Statsd when stopping to prevent deadlock.
- [#3319](https://github.com/influxdata/telegraf/issues/3319): Fix cloudwatch output requires unneeded permissions.
- [#3342](https://github.com/influxdata/telegraf/pull/3342): Fix CPU input plugin stuck after suspend on Linux.

## v1.4.3 [unreleased]

Expand Down
5 changes: 3 additions & 2 deletions plugins/inputs/system/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,8 @@ func (s *CPUStats) Gather(acc telegraf.Accumulator) error {
totalDelta := total - lastTotal

if totalDelta < 0 {
return fmt.Errorf("Error: current total CPU time is less than previous total CPU time")
err = fmt.Errorf("Error: current total CPU time is less than previous total CPU time")
break
}

if totalDelta == 0 {
Expand Down Expand Up @@ -126,7 +127,7 @@ func (s *CPUStats) Gather(acc telegraf.Accumulator) error {
s.lastStats[cts.CPU] = cts
}

return nil
return err
}

func totalCpuTime(t cpu.TimesStat) float64 {
Expand Down
69 changes: 69 additions & 0 deletions plugins/inputs/system/cpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,72 @@ func TestCPUCountIncrease(t *testing.T) {
err = cs.Gather(&acc)
require.NoError(t, err)
}

// TestCPUTimesDecrease tests that telegraf continue to works after
// CPU times decrease, which seems to occur when Linux system is suspended.
func TestCPUTimesDecrease(t *testing.T) {
var mps MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator

cts := cpu.TimesStat{
CPU: "cpu0",
User: 18,
Idle: 80,
Iowait: 2,
}

cts2 := cpu.TimesStat{
CPU: "cpu0",
User: 38, // increased by 20
Idle: 40, // decreased by 40
Iowait: 1, // decreased by 1
}

cts3 := cpu.TimesStat{
CPU: "cpu0",
User: 56, // increased by 18
Idle: 120, // increased by 80
Iowait: 3, // increased by 2
}

mps.On("CPUTimes").Return([]cpu.TimesStat{cts}, nil)

cs := NewCPUStats(&mps)

cputags := map[string]string{
"cpu": "cpu0",
}

err := cs.Gather(&acc)
require.NoError(t, err)

// Computed values are checked with delta > 0 becasue of floating point arithmatic
// imprecision
assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 18, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 80, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 2, 0, cputags)

mps2 := MockPS{}
mps2.On("CPUTimes").Return([]cpu.TimesStat{cts2}, nil)
cs.ps = &mps2

// CPU times decreased. An error should be raised
err = cs.Gather(&acc)
require.Error(t, err)

mps3 := MockPS{}
mps3.On("CPUTimes").Return([]cpu.TimesStat{cts3}, nil)
cs.ps = &mps3

err = cs.Gather(&acc)
require.NoError(t, err)

assertContainsTaggedFloat(t, &acc, "cpu", "time_user", 56, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_idle", 120, 0, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "time_iowait", 3, 0, cputags)

assertContainsTaggedFloat(t, &acc, "cpu", "usage_user", 18, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_idle", 80, 0.0005, cputags)
assertContainsTaggedFloat(t, &acc, "cpu", "usage_iowait", 2, 0.0005, cputags)
}

0 comments on commit 57c75c5

Please sign in to comment.