Skip to content

Commit

Permalink
Fix invalid CPU stats on Windows
Browse files Browse the repository at this point in the history
This PR fixes an issue introduced in Nomad 0.6.0 due to
shirou/gopsutil#420. The issue arised from the
fact that the Windows stats from gopsutil reports CPUs in
percentages where we expected ticks.
  • Loading branch information
dadgar committed Sep 10, 2017
1 parent 1e0b9cb commit db261cd
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 61 deletions.
42 changes: 42 additions & 0 deletions client/stats/cpu_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
package stats

import (
"log"
"math"
"os"
"testing"
"time"

shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/stretchr/testify/assert"
)

func TestCpuStatsPercent(t *testing.T) {
Expand All @@ -15,3 +21,39 @@ func TestCpuStatsPercent(t *testing.T) {
t.Fatalf("expected: %v, actual: %v", expectedPercent, percent)
}
}

func TestHostStats_CPU(t *testing.T) {
assert := assert.New(t)
assert.Nil(shelpers.Init())

logger := log.New(os.Stderr, "", log.LstdFlags|log.Lmicroseconds)
cwd, err := os.Getwd()
assert.Nil(err)
hs := NewHostStatsCollector(logger, cwd)

// Collect twice so we can calculate percents we need to generate some work
// so that the cpu values change
assert.Nil(hs.Collect())
total := 0
for i := 1; i < 1000000000; i++ {
total *= i
total = total % i
}
assert.Nil(hs.Collect())
stats := hs.Stats()

assert.NotZero(stats.CPUTicksConsumed)
assert.NotZero(len(stats.CPU))

for _, cpu := range stats.CPU {
assert.False(math.IsNaN(cpu.Idle))
assert.False(math.IsNaN(cpu.Total))
assert.False(math.IsNaN(cpu.System))
assert.False(math.IsNaN(cpu.User))

assert.False(math.IsInf(cpu.Idle, 0))
assert.False(math.IsInf(cpu.Total, 0))
assert.False(math.IsInf(cpu.System, 0))
assert.False(math.IsInf(cpu.User, 0))
}
}
36 changes: 36 additions & 0 deletions client/stats/cpu_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// +build !windows

package stats

import (
shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/shirou/gopsutil/cpu"
)

func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) {

ticksConsumed := 0.0
cpuStats, err := cpu.Times(true)
if err != nil {
return nil, 0.0, err
}
cs := make([]*CPUStats, len(cpuStats))
for idx, cpuStat := range cpuStats {
percentCalculator, ok := h.statsCalculator[cpuStat.CPU]
if !ok {
percentCalculator = NewHostCpuStatsCalculator()
h.statsCalculator[cpuStat.CPU] = percentCalculator
}
idle, user, system, total := percentCalculator.Calculate(cpuStat)
cs[idx] = &CPUStats{
CPU: cpuStat.CPU,
User: user,
System: system,
Idle: idle,
Total: total,
}
ticksConsumed += (total / 100.0) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats)))
}

return cs, ticksConsumed, nil
}
53 changes: 53 additions & 0 deletions client/stats/cpu_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// +build windows

package stats

import (
"fmt"

shelpers "github.com/hashicorp/nomad/helper/stats"
"github.com/shirou/gopsutil/cpu"
)

func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks float64, err error) {
// Get the per cpu stats
cpuStats, err := cpu.Times(true)
if err != nil {
return nil, 0.0, err
}

cs := make([]*CPUStats, len(cpuStats))
for idx, cpuStat := range cpuStats {

// On windows they are already in percent
cs[idx] = &CPUStats{
CPU: cpuStat.CPU,
User: cpuStat.User,
System: cpuStat.System,
Idle: cpuStat.Idle,
Total: cpuStat.Total(),
}
}

// Get the number of ticks
allCpu, err := cpu.Times(false)
if err != nil {
return nil, 0.0, err
}
if len(allCpu) != 1 {
return nil, 0.0, fmt.Errorf("unexpected number of cpus (%d)", len(allCpu))
}

// We use the calculator because when retrieving against all cpus it is
// returned as ticks.
all := allCpu[0]
percentCalculator, ok := h.statsCalculator[all.CPU]
if !ok {
percentCalculator = NewHostCpuStatsCalculator()
h.statsCalculator[all.CPU] = percentCalculator
}
_, _, _, total := percentCalculator.Calculate(all)
ticks := (total / 100) * shelpers.TotalTicksAvailable()

return cs, ticks, nil
}
126 changes: 65 additions & 61 deletions client/stats/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ import (
"github.com/shirou/gopsutil/disk"
"github.com/shirou/gopsutil/host"
"github.com/shirou/gopsutil/mem"

shelpers "github.com/hashicorp/nomad/helper/stats"
)

// HostStats represents resource usage stats of the host running a Nomad client
Expand Down Expand Up @@ -95,46 +93,72 @@ func NewHostStatsCollector(logger *log.Logger, allocDir string) *HostStatsCollec
// Collect collects stats related to resource usage of a host
func (h *HostStatsCollector) Collect() error {
hs := &HostStats{Timestamp: time.Now().UTC().UnixNano()}
memStats, err := mem.VirtualMemory()

// Determine up-time
uptime, err := host.Uptime()
if err != nil {
return err
}
hs.Memory = &MemoryStats{
Total: memStats.Total,
Available: memStats.Available,
Used: memStats.Used,
Free: memStats.Free,
hs.Uptime = uptime

// Collect memory stats
mstats, err := h.collectMemoryStats()
if err != nil {
return err
}
hs.Memory = mstats

ticksConsumed := 0.0
cpuStats, err := cpu.Times(true)
// Collect cpu stats
cpus, ticks, err := h.collectCPUStats()
if err != nil {
return err
}
cs := make([]*CPUStats, len(cpuStats))
for idx, cpuStat := range cpuStats {
percentCalculator, ok := h.statsCalculator[cpuStat.CPU]
if !ok {
percentCalculator = NewHostCpuStatsCalculator()
h.statsCalculator[cpuStat.CPU] = percentCalculator
}
idle, user, system, total := percentCalculator.Calculate(cpuStat)
cs[idx] = &CPUStats{
CPU: cpuStat.CPU,
User: user,
System: system,
Idle: idle,
Total: total,
}
ticksConsumed += (total / 100) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats)))
hs.CPU = cpus
hs.CPUTicksConsumed = ticks

// Collect disk stats
diskStats, err := h.collectDiskStats()
if err != nil {
return err
}
hs.CPU = cs
hs.CPUTicksConsumed = ticksConsumed
hs.DiskStats = diskStats

partitions, err := disk.Partitions(false)
// Getting the disk stats for the allocation directory
usage, err := disk.Usage(h.allocDir)
if err != nil {
return err
}
hs.AllocDirStats = h.toDiskStats(usage, nil)

// Update the collected status object.
h.hostStatsLock.Lock()
h.hostStats = hs
h.hostStatsLock.Unlock()

return nil
}

func (h *HostStatsCollector) collectMemoryStats() (*MemoryStats, error) {
memStats, err := mem.VirtualMemory()
if err != nil {
return nil, err
}
mem := &MemoryStats{
Total: memStats.Total,
Available: memStats.Available,
Used: memStats.Used,
Free: memStats.Free,
}

return mem, nil
}

func (h *HostStatsCollector) collectDiskStats() ([]*DiskStats, error) {
partitions, err := disk.Partitions(false)
if err != nil {
return nil, err
}

var diskStats []*DiskStats
for _, partition := range partitions {
usage, err := disk.Usage(partition.Mountpoint)
Expand All @@ -153,25 +177,8 @@ func (h *HostStatsCollector) Collect() error {
ds := h.toDiskStats(usage, &partition)
diskStats = append(diskStats, ds)
}
hs.DiskStats = diskStats

// Getting the disk stats for the allocation directory
usage, err := disk.Usage(h.allocDir)
if err != nil {
return err
}
hs.AllocDirStats = h.toDiskStats(usage, nil)

uptime, err := host.Uptime()
if err != nil {
return err
}
hs.Uptime = uptime

h.hostStatsLock.Lock()
h.hostStats = hs
h.hostStatsLock.Unlock()
return nil
return diskStats, nil
}

// Stats returns the host stats that has been collected
Expand Down Expand Up @@ -228,28 +235,26 @@ func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, u
currentUser := times.User
currentSystem := times.System
currentTotal := times.Total()
currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen

deltaTotal := currentTotal - h.prevTotal
idle = ((currentIdle - h.prevIdle) / deltaTotal) * 100
if math.IsNaN(idle) {
user = ((currentUser - h.prevUser) / deltaTotal) * 100
system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
total = ((currentBusy - h.prevBusy) / deltaTotal) * 100

// Protect against any invalid values
if math.IsNaN(idle) || math.IsInf(idle, 0) {
idle = 100.0
}

user = ((currentUser - h.prevUser) / deltaTotal) * 100
if math.IsNaN(user) {
if math.IsNaN(user) || math.IsInf(user, 0) {
user = 0.0
}

system = ((currentSystem - h.prevSystem) / deltaTotal) * 100
if math.IsNaN(system) {
if math.IsNaN(system) || math.IsInf(system, 0) {
system = 0.0
}

currentBusy := times.User + times.System + times.Nice + times.Iowait + times.Irq +
times.Softirq + times.Steal + times.Guest + times.GuestNice + times.Stolen

total = ((currentBusy - h.prevBusy) / deltaTotal) * 100
if math.IsNaN(total) {
if math.IsNaN(total) || math.IsInf(total, 0) {
total = 0.0
}

Expand All @@ -258,6 +263,5 @@ func (h *HostCpuStatsCalculator) Calculate(times cpu.TimesStat) (idle float64, u
h.prevSystem = currentSystem
h.prevTotal = currentTotal
h.prevBusy = currentBusy

return
}

0 comments on commit db261cd

Please sign in to comment.