Skip to content

Commit

Permalink
Expose PSI stats in libcontainer handler
Browse files Browse the repository at this point in the history
This adds 2 new set of metrics:
- `psi_total`: read total number of seconds a resource is under pressure
- `psi_avg`: read ratio of time a resource is under pressure over a
  sliding time window.

For more details about these definitions, see:
- https://www.kernel.org/doc/html/latest/accounting/psi.html
- https://facebookmicrosites.github.io/psi/docs/overview

Signed-off-by: Daniel Dao <dqminh89@gmail.com>
  • Loading branch information
dqminh committed May 13, 2022
1 parent ab13a27 commit dbd63c8
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cmd/cadvisor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ func TestToIncludedMetrics(t *testing.T) {
container.ResctrlMetrics: struct{}{},
container.CPUSetMetrics: struct{}{},
container.OOMMetrics: struct{}{},
container.PSITotalMetrics: struct{}{},
container.PSIAvgMetrics: struct{}{},
},
container.AllMetrics,
{},
Expand Down
4 changes: 4 additions & 0 deletions container/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ const (
ResctrlMetrics MetricKind = "resctrl"
CPUSetMetrics MetricKind = "cpuset"
OOMMetrics MetricKind = "oom_event"
PSITotalMetrics MetricKind = "psi_total"
PSIAvgMetrics MetricKind = "psi_avg"
)

// AllMetrics represents all kinds of metrics that cAdvisor supported.
Expand All @@ -93,6 +95,8 @@ var AllMetrics = MetricSet{
ResctrlMetrics: struct{}{},
CPUSetMetrics: struct{}{},
OOMMetrics: struct{}{},
PSITotalMetrics: struct{}{},
PSIAvgMetrics: struct{}{},
}

func (mk MetricKind) String() string {
Expand Down
20 changes: 20 additions & 0 deletions container/libcontainer/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,20 @@ func (h *Handler) GetProcesses() ([]int, error) {
return pids, nil
}

// Convert libcontainer cgroups.PSIData to info.PSIData
func convertPSIData(from *cgroups.PSIData, to *info.PSIData) {
to.Avg10 = from.Avg10
to.Avg60 = from.Avg60
to.Avg300 = from.Avg300
to.Total = from.Total
}

// Convert libcontainer cgroups.PSIStats to info.PSIStats
func convertPSI(from *cgroups.PSIStats, to *info.PSIStats) {
convertPSIData(&from.Some, &to.Some)
convertPSIData(&from.Full, &to.Full)
}

// Convert libcontainer stats to info.ContainerStats.
func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
ret.Cpu.Usage.User = s.CpuStats.CpuUsage.UsageInUsermode
Expand All @@ -776,6 +790,8 @@ func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
ret.Cpu.CFS.ThrottledPeriods = s.CpuStats.ThrottlingData.ThrottledPeriods
ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime

convertPSI(&s.CpuStats.PSI, &ret.Cpu.PSI)

if !withPerCPU {
return
}
Expand All @@ -796,13 +812,17 @@ func setDiskIoStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.DiskIo.IoWaitTime = diskStatsCopy(s.BlkioStats.IoWaitTimeRecursive)
ret.DiskIo.IoMerged = diskStatsCopy(s.BlkioStats.IoMergedRecursive)
ret.DiskIo.IoTime = diskStatsCopy(s.BlkioStats.IoTimeRecursive)

convertPSI(&s.BlkioStats.PSI, &ret.DiskIo.PSI)
}

func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.Memory.Usage = s.MemoryStats.Usage.Usage
ret.Memory.MaxUsage = s.MemoryStats.Usage.MaxUsage
ret.Memory.Failcnt = s.MemoryStats.Usage.Failcnt

convertPSI(&s.MemoryStats.PSI, &ret.Memory.PSI)

if cgroups.IsCgroup2UnifiedMode() {
ret.Memory.Cache = s.MemoryStats.Stats["file"]
ret.Memory.RSS = s.MemoryStats.Stats["anon"]
Expand Down
28 changes: 28 additions & 0 deletions container/libcontainer/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,20 @@ func TestSetCPUStats(t *testing.T) {
UsageInKernelmode: 734746 * nanosecondsInSeconds / clockTicks,
UsageInUsermode: 2767637 * nanosecondsInSeconds / clockTicks,
},
PSI: cgroups.PSIStats{
Some: cgroups.PSIData{
Avg10: 0.1,
Avg60: 0.2,
Avg300: 0.3,
Total: 100,
},
Full: cgroups.PSIData{
Avg10: 0.4,
Avg60: 0.5,
Avg300: 0.6,
Total: 200,
},
},
},
}
var ret info.ContainerStats
Expand All @@ -123,6 +137,20 @@ func TestSetCPUStats(t *testing.T) {
System: s.CpuStats.CpuUsage.UsageInKernelmode,
Total: 33802947350272,
},
PSI: info.PSIStats{
Some: info.PSIData{
Avg10: 0.1,
Avg60: 0.2,
Avg300: 0.3,
Total: 100,
},
Full: info.PSIData{
Avg10: 0.4,
Avg60: 0.5,
Avg300: 0.6,
Total: 200,
},
},
},
}

Expand Down
18 changes: 18 additions & 0 deletions info/v1/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,18 @@ func (ci *ContainerInfo) StatsEndTime() time.Time {
return ret
}

type PSIData struct {
Avg10 float64 `json:"avg10"`
Avg60 float64 `json:"avg60"`
Avg300 float64 `json:"avg300"`
Total uint64 `json:"total"`
}

type PSIStats struct {
Some PSIData `json:"some,omitempty"`
Full PSIData `json:"full,omitempty"`
}

// This mirrors kernel internal structure.
type LoadStats struct {
// Number of sleeping tasks.
Expand Down Expand Up @@ -333,6 +345,8 @@ type CpuStats struct {
// Load is smoothed over the last 10 seconds. Instantaneous value can be read
// from LoadStats.NrRunning.
LoadAverage int32 `json:"load_average"`

PSI PSIStats `json:"psi,omitempty"`
}

type PerDiskStats struct {
Expand All @@ -351,6 +365,8 @@ type DiskIoStats struct {
IoWaitTime []PerDiskStats `json:"io_wait_time,omitempty"`
IoMerged []PerDiskStats `json:"io_merged,omitempty"`
IoTime []PerDiskStats `json:"io_time,omitempty"`

PSI PSIStats `json:"psi,omitempty"`
}

type HugetlbStats struct {
Expand Down Expand Up @@ -397,6 +413,8 @@ type MemoryStats struct {

ContainerData MemoryStatsMemoryData `json:"container_data,omitempty"`
HierarchicalData MemoryStatsMemoryData `json:"hierarchical_data,omitempty"`

PSI PSIStats `json:"psi,omitempty"`
}

type CPUSetStats struct {
Expand Down

0 comments on commit dbd63c8

Please sign in to comment.