From fef81a3bfe71872aadfbe10cb31bfb914ab3ae03 Mon Sep 17 00:00:00 2001 From: Sambhav Jain <67923444+sambhav-jain-16@users.noreply.github.com> Date: Mon, 17 Nov 2025 15:45:20 +0100 Subject: [PATCH] feat: add ability to get individual controller stats This adds methods to retrieve statistics for individual cgroup controllers (CPU, memory, pids, IO, hugetlb, rdma, misc) instead of requiring all stats to be fetched at once. This enables tools like cadvisor to collect specific metrics with different housekeeping intervals, reducing computational overhead. Fixes: https://github.com/opencontainers/cgroups/issues/44 Signed-off-by: Sambhav Jain --- cgroups.go | 4 + fs/blkio.go | 5 + fs/cpu.go | 5 + fs/cpuacct.go | 5 + fs/cpuset.go | 5 + fs/devices.go | 6 + fs/freezer.go | 6 + fs/fs.go | 23 +++- fs/fs_test.go | 209 ++++++++++++++++++++++++++++++++ fs/hugetlb.go | 5 + fs/memory.go | 5 + fs/name.go | 6 + fs/net_cls.go | 6 + fs/net_prio.go | 6 + fs/perf_event.go | 6 + fs/pids.go | 5 + fs/rdma.go | 5 + fs2/fs2.go | 88 ++++++++++---- fs2/fs2_test.go | 306 +++++++++++++++++++++++++++++++++++++++++++++++ stats.go | 50 ++++++++ systemd/v1.go | 23 +++- systemd/v2.go | 6 + 22 files changed, 757 insertions(+), 28 deletions(-) create mode 100644 fs2/fs2_test.go diff --git a/cgroups.go b/cgroups.go index 5a97bd3..12bf74e 100644 --- a/cgroups.go +++ b/cgroups.go @@ -44,6 +44,10 @@ type Manager interface { // GetStats returns cgroups statistics. GetStats() (*Stats, error) + // Stats returns statistics for specified controllers. + // If opts is nil or opts.Controllers is 0, all controllers are queried. + Stats(opts *StatsOptions) (*Stats, error) + // Freeze sets the freezer cgroup to the specified state. Freeze(state FreezerState) error diff --git a/fs/blkio.go b/fs/blkio.go index f3c4c5c..e5b2d31 100644 --- a/fs/blkio.go +++ b/fs/blkio.go @@ -19,6 +19,11 @@ func (s *BlkioGroup) Name() string { return "blkio" } +// ID returns the controller ID for blkio subsystem. +func (s *BlkioGroup) ID() cgroups.Controller { + return cgroups.IO +} + func (s *BlkioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/cpu.go b/fs/cpu.go index 3e05788..84f9f74 100644 --- a/fs/cpu.go +++ b/fs/cpu.go @@ -18,6 +18,11 @@ func (s *CpuGroup) Name() string { return "cpu" } +// ID returns the controller ID for CPU subsystem. +func (s *CpuGroup) ID() cgroups.Controller { + return cgroups.CPU +} + func (s *CpuGroup) Apply(path string, r *cgroups.Resources, pid int) error { if err := os.MkdirAll(path, 0o755); err != nil { return err diff --git a/fs/cpuacct.go b/fs/cpuacct.go index 5930dfc..5a1be75 100644 --- a/fs/cpuacct.go +++ b/fs/cpuacct.go @@ -26,6 +26,11 @@ func (s *CpuacctGroup) Name() string { return "cpuacct" } +// ID returns the controller ID for cpuacct subsystem. +func (s *CpuacctGroup) ID() cgroups.Controller { + return cgroups.CPU +} + func (s *CpuacctGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/cpuset.go b/fs/cpuset.go index f3f96df..327d5e7 100644 --- a/fs/cpuset.go +++ b/fs/cpuset.go @@ -54,6 +54,11 @@ func (s *CpusetGroup) Name() string { return "cpuset" } +// ID returns the controller ID for cpuset subsystem. +func (s *CpusetGroup) ID() cgroups.Controller { + return cgroups.CPUSet +} + func (s *CpusetGroup) Apply(path string, r *cgroups.Resources, pid int) error { return s.ApplyDir(path, r, pid) } diff --git a/fs/devices.go b/fs/devices.go index 26483ec..5eee641 100644 --- a/fs/devices.go +++ b/fs/devices.go @@ -10,6 +10,12 @@ func (s *DevicesGroup) Name() string { return "devices" } +// ID returns the controller ID for devices subsystem. +// Returns 0 as devices is not a cgroups.Controller. +func (s *DevicesGroup) ID() cgroups.Controller { + return 0 +} + func (s *DevicesGroup) Apply(path string, r *cgroups.Resources, pid int) error { if r.SkipDevices { return nil diff --git a/fs/freezer.go b/fs/freezer.go index fe0f0dd..3edc7a8 100644 --- a/fs/freezer.go +++ b/fs/freezer.go @@ -18,6 +18,12 @@ func (s *FreezerGroup) Name() string { return "freezer" } +// ID returns the controller ID for freezer subsystem. +// Returns 0 as freezer is not a cgroups.Controller. +func (s *FreezerGroup) ID() cgroups.Controller { + return 0 +} + func (s *FreezerGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/fs.go b/fs/fs.go index 6259311..ca0a65e 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -29,7 +29,7 @@ var subsystems = []subsystem{ &FreezerGroup{}, &RdmaGroup{}, &NameGroup{GroupName: "name=systemd", Join: true}, - &NameGroup{GroupName: "misc", Join: true}, + &NameGroup{GroupName: "misc", Join: true, GroupID: cgroups.Misc}, } var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") @@ -45,6 +45,8 @@ func init() { type subsystem interface { // Name returns the name of the subsystem. Name() string + // ID returns the controller ID for filtering. + ID() cgroups.Controller // GetStats fills in the stats for the subsystem. GetStats(path string, stats *cgroups.Stats) error // Apply creates and joins a cgroup, adding pid into it. Some @@ -181,14 +183,33 @@ func (m *Manager) Path(subsys string) string { } func (m *Manager) GetStats() (*cgroups.Stats, error) { + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +// If opts is nil or opts.Controllers is zero, statistics for all controllers are returned. +func (m *Manager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() + + // Default: query all controllers + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + stats := cgroups.NewStats() for _, sys := range subsystems { path := m.paths[sys.Name()] if path == "" { continue } + + // Filter based on controller type + if sys.ID()&controllers == 0 { + continue + } + if err := sys.GetStats(path, stats); err != nil { return nil, err } diff --git a/fs/fs_test.go b/fs/fs_test.go index 331e9f1..a5a5fa0 100644 --- a/fs/fs_test.go +++ b/fs/fs_test.go @@ -6,6 +6,215 @@ import ( "github.com/opencontainers/cgroups" ) +// pointerTo returns a pointer to the given controller value. +func pointerTo(c cgroups.Controller) *cgroups.Controller { + return &c +} + +func TestStats(t *testing.T) { + testCases := []struct { + name string + controller *cgroups.Controller + subsystems map[string]map[string]string // subsystem -> file contents + validate func(*testing.T, *cgroups.Stats) + }{ + { + name: "CPU stats", + controller: pointerTo(cgroups.CPU), + subsystems: map[string]map[string]string{ + "cpu": { + "cpu.stat": "nr_periods 2000\nnr_throttled 200\nthrottled_time 18446744073709551615\n", + }, + "cpuacct": { + "cpuacct.usage": cpuAcctUsageContents, + "cpuacct.usage_percpu": cpuAcctUsagePerCPUContents, + "cpuacct.stat": cpuAcctStatContents, + }, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify throttling data from cpu.stat + expectedThrottling := cgroups.ThrottlingData{ + Periods: 2000, + ThrottledPeriods: 200, + ThrottledTime: 18446744073709551615, + } + expectThrottlingDataEquals(t, expectedThrottling, stats.CpuStats.ThrottlingData) + + // Verify total usage from cpuacct.usage + if stats.CpuStats.CpuUsage.TotalUsage != 12262454190222160 { + t.Errorf("expected TotalUsage 12262454190222160, got %d", stats.CpuStats.CpuUsage.TotalUsage) + } + }, + }, + { + name: "Memory stats", + controller: pointerTo(cgroups.Memory), + subsystems: map[string]map[string]string{ + "memory": { + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": "2048", + "memory.max_usage_in_bytes": "4096", + "memory.failcnt": "100", + "memory.limit_in_bytes": "8192", + "memory.use_hierarchy": "1", + }, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + expected := cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192} + expectMemoryDataEquals(t, expected, stats.MemoryStats.Usage) + }, + }, + { + name: "Pids stats", + controller: pointerTo(cgroups.Pids), + subsystems: map[string]map[string]string{ + "pids": { + "pids.current": "1337", + "pids.max": "1024", + }, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + if stats.PidsStats.Current != 1337 { + t.Errorf("expected Current 1337, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1024 { + t.Errorf("expected Limit 1024, got %d", stats.PidsStats.Limit) + } + }, + }, + { + name: "IO stats", + controller: pointerTo(cgroups.IO), + subsystems: map[string]map[string]string{ + "blkio": blkioBFQStatsTestFiles, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify we have entries + if len(stats.BlkioStats.IoServiceBytesRecursive) == 0 { + t.Error("expected IoServiceBytesRecursive to have entries") + } + if len(stats.BlkioStats.IoServicedRecursive) == 0 { + t.Error("expected IoServicedRecursive to have entries") + } + }, + }, + { + name: "Multiple controllers - CPU+Pids", + controller: pointerTo(cgroups.CPU | cgroups.Pids), + subsystems: map[string]map[string]string{ + "cpu": { + "cpu.stat": "nr_periods 100\nnr_throttled 10\nthrottled_time 5000\n", + }, + "pids": { + "pids.current": "42", + "pids.max": "1000", + }, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify both are populated + if stats.CpuStats.ThrottlingData.Periods != 100 { + t.Errorf("expected Periods 100, got %d", stats.CpuStats.ThrottlingData.Periods) + } + if stats.PidsStats.Current != 42 { + t.Errorf("expected Current 42, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1000 { + t.Errorf("expected Limit 1000, got %d", stats.PidsStats.Limit) + } + }, + }, + { + name: "All controllers with nil options", + controller: nil, // nil means all controllers (default behavior) + subsystems: map[string]map[string]string{ + "cpu": { + "cpu.stat": "nr_periods 2000\nnr_throttled 200\nthrottled_time 18446744073709551615\n", + }, + "cpuacct": { + "cpuacct.usage": cpuAcctUsageContents, + "cpuacct.usage_percpu": cpuAcctUsagePerCPUContents, + "cpuacct.stat": cpuAcctStatContents, + }, + "memory": { + "memory.stat": memoryStatContents, + "memory.usage_in_bytes": "2048", + "memory.max_usage_in_bytes": "4096", + "memory.failcnt": "100", + "memory.limit_in_bytes": "8192", + "memory.use_hierarchy": "1", + }, + "pids": { + "pids.current": "1337", + "pids.max": "1024", + }, + "blkio": blkioBFQStatsTestFiles, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify CPU stats + expectedThrottling := cgroups.ThrottlingData{ + Periods: 2000, + ThrottledPeriods: 200, + ThrottledTime: 18446744073709551615, + } + expectThrottlingDataEquals(t, expectedThrottling, stats.CpuStats.ThrottlingData) + if stats.CpuStats.CpuUsage.TotalUsage != 12262454190222160 { + t.Errorf("expected TotalUsage 12262454190222160, got %d", stats.CpuStats.CpuUsage.TotalUsage) + } + + // Verify Memory stats + expectedMemory := cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192} + expectMemoryDataEquals(t, expectedMemory, stats.MemoryStats.Usage) + + // Verify Pids stats + if stats.PidsStats.Current != 1337 { + t.Errorf("expected Current 1337, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1024 { + t.Errorf("expected Limit 1024, got %d", stats.PidsStats.Limit) + } + + // Verify IO stats + if len(stats.BlkioStats.IoServiceBytesRecursive) == 0 { + t.Error("expected IoServiceBytesRecursive to have entries") + } + if len(stats.BlkioStats.IoServicedRecursive) == 0 { + t.Error("expected IoServicedRecursive to have entries") + } + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Create temp directories for each subsystem and write files + paths := make(map[string]string) + for subsystem, files := range tc.subsystems { + path := tempDir(t, subsystem) + writeFileContents(t, path, files) + paths[subsystem] = path + } + m := &Manager{ + cgroups: &cgroups.Cgroup{Resources: &cgroups.Resources{}}, + paths: paths, + } + + var stats *cgroups.Stats + var err error + if tc.controller != nil { + stats, err = m.Stats(&cgroups.StatsOptions{Controllers: *tc.controller}) + } else { + stats, err = m.Stats(nil) + } + if err != nil { + t.Fatal(err) + } + + // Validate the results + tc.validate(t, stats) + }) + } +} + func BenchmarkGetStats(b *testing.B) { if cgroups.IsCgroup2UnifiedMode() { b.Skip("cgroup v2 is not supported") diff --git a/fs/hugetlb.go b/fs/hugetlb.go index 698fd69..39b8f80 100644 --- a/fs/hugetlb.go +++ b/fs/hugetlb.go @@ -15,6 +15,11 @@ func (s *HugetlbGroup) Name() string { return "hugetlb" } +// ID returns the controller ID for hugetlb subsystem. +func (s *HugetlbGroup) ID() cgroups.Controller { + return cgroups.HugeTLB +} + func (s *HugetlbGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/memory.go b/fs/memory.go index d92f232..0250819 100644 --- a/fs/memory.go +++ b/fs/memory.go @@ -29,6 +29,11 @@ func (s *MemoryGroup) Name() string { return "memory" } +// ID returns the controller ID for memory subsystem. +func (s *MemoryGroup) ID() cgroups.Controller { + return cgroups.Memory +} + func (s *MemoryGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/name.go b/fs/name.go index 2864351..47c6022 100644 --- a/fs/name.go +++ b/fs/name.go @@ -7,12 +7,18 @@ import ( type NameGroup struct { GroupName string Join bool + GroupID cgroups.Controller } func (s *NameGroup) Name() string { return s.GroupName } +// ID returns the controller ID for named subsystem. +func (s *NameGroup) ID() cgroups.Controller { + return s.GroupID +} + func (s *NameGroup) Apply(path string, _ *cgroups.Resources, pid int) error { if s.Join { // Ignore errors if the named cgroup does not exist. diff --git a/fs/net_cls.go b/fs/net_cls.go index 2bd6c5a..c61bded 100644 --- a/fs/net_cls.go +++ b/fs/net_cls.go @@ -12,6 +12,12 @@ func (s *NetClsGroup) Name() string { return "net_cls" } +// ID returns the controller ID for net_cls subsystem. +// Returns 0 as net_cls is not a cgroups.Controller. +func (s *NetClsGroup) ID() cgroups.Controller { + return 0 +} + func (s *NetClsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/net_prio.go b/fs/net_prio.go index b51682b..228eb34 100644 --- a/fs/net_prio.go +++ b/fs/net_prio.go @@ -10,6 +10,12 @@ func (s *NetPrioGroup) Name() string { return "net_prio" } +// ID returns the controller ID for net_prio subsystem. +// Returns 0 as net_prio is not a cgroups.Controller. +func (s *NetPrioGroup) ID() cgroups.Controller { + return 0 +} + func (s *NetPrioGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/perf_event.go b/fs/perf_event.go index 929c412..ba6f448 100644 --- a/fs/perf_event.go +++ b/fs/perf_event.go @@ -10,6 +10,12 @@ func (s *PerfEventGroup) Name() string { return "perf_event" } +// ID returns the controller ID for perf_event subsystem. +// Returns 0 as perf_event is not a cgroups.Controller. +func (s *PerfEventGroup) ID() cgroups.Controller { + return 0 +} + func (s *PerfEventGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/pids.go b/fs/pids.go index 36bd339..fa5b1c8 100644 --- a/fs/pids.go +++ b/fs/pids.go @@ -14,6 +14,11 @@ func (s *PidsGroup) Name() string { return "pids" } +// ID returns the controller ID for pids subsystem. +func (s *PidsGroup) ID() cgroups.Controller { + return cgroups.Pids +} + func (s *PidsGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs/rdma.go b/fs/rdma.go index 4b17536..8cc436c 100644 --- a/fs/rdma.go +++ b/fs/rdma.go @@ -11,6 +11,11 @@ func (s *RdmaGroup) Name() string { return "rdma" } +// ID returns the controller ID for rdma subsystem. +func (s *RdmaGroup) ID() cgroups.Controller { + return cgroups.RDMA +} + func (s *RdmaGroup) Apply(path string, _ *cgroups.Resources, pid int) error { return apply(path, pid) } diff --git a/fs2/fs2.go b/fs2/fs2.go index 356d087..d129a92 100644 --- a/fs2/fs2.go +++ b/fs2/fs2.go @@ -105,50 +105,86 @@ func (m *Manager) GetAllPids() ([]int, error) { } func (m *Manager) GetStats() (*cgroups.Stats, error) { - var errs []error + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +// If opts is nil or opts.Controllers is zero, statistics for all controllers are returned. +func (m *Manager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { + // Default: query all controllers + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + var errs []error + var err error st := cgroups.NewStats() // pids (since kernel 4.5) - if err := statPids(m.dirPath, st); err != nil { - errs = append(errs, err) + if controllers&cgroups.Pids != 0 { + if err = statPids(m.dirPath, st); err != nil { + errs = append(errs, err) + } } + // memory (since kernel 4.5) - if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.Memory != 0 { + if err = statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { + errs = append(errs, err) + } } + // io (since kernel 4.5) - if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.IO != 0 { + if err = statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { + errs = append(errs, err) + } } + // cpu (since kernel 4.15) // Note cpu.stat is available even if the controller is not enabled. - if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) - } - // PSI (since kernel 4.20). - var err error - if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { - errs = append(errs, err) - } - if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil { - errs = append(errs, err) - } - if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil { - errs = append(errs, err) + if controllers&cgroups.CPU != 0 { + if err = statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } + + // PSI (since kernel 4.20) + if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil { + errs = append(errs, err) + } + } + // hugetlb (since kernel 5.6) - if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.HugeTLB != 0 { + if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + // rdma (since kernel 4.11) - if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.RDMA != 0 { + if err := fscommon.RdmaGetStats(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + // misc (since kernel 5.13) - if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) + if controllers&cgroups.Misc != 0 { + if err := statMisc(m.dirPath, st); err != nil && !os.IsNotExist(err) { + errs = append(errs, err) + } } + if len(errs) > 0 && !m.config.Rootless { return st, fmt.Errorf("error while statting cgroup v2: %+v", errs) } diff --git a/fs2/fs2_test.go b/fs2/fs2_test.go new file mode 100644 index 0000000..736dc86 --- /dev/null +++ b/fs2/fs2_test.go @@ -0,0 +1,306 @@ +package fs2 + +import ( + "os" + "path/filepath" + "testing" + + "github.com/opencontainers/cgroups" +) + +const ( + exampleCPUStatData = `usage_usec 1000000 +user_usec 600000 +system_usec 400000 +nr_periods 100 +nr_throttled 10 +throttled_usec 50000 +nr_bursts 5 +burst_usec 10000` + + exampleCPUStatDataShort = `usage_usec 1000000 +user_usec 600000 +system_usec 400000` + + exampleMemoryCurrent = "4194304" + exampleMemoryMax = "max" + + examplePSIData = `some avg10=1.00 avg60=2.00 avg300=3.00 total=100000 +full avg10=0.50 avg60=1.00 avg300=1.50 total=50000` + + exampleRdmaCurrent = `mlx5_0 hca_handle=10 hca_object=20` +) + +func pointerTo(c cgroups.Controller) *cgroups.Controller { + return &c +} + +func TestStats(t *testing.T) { + // We're using a fake cgroupfs. + cgroups.TestMode = true + + testCases := []struct { + name string + controller *cgroups.Controller + setupFiles map[string]string + validate func(*testing.T, *cgroups.Stats) + }{ + { + name: "CPU stats", + controller: pointerTo(cgroups.CPU), + setupFiles: map[string]string{ + "cpu.stat": exampleCPUStatData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify CPU stats populated correctly (values are converted from usec to nsec) + if stats.CpuStats.CpuUsage.TotalUsage != 1000000000 { + t.Errorf("expected TotalUsage 1000000000, got %d", stats.CpuStats.CpuUsage.TotalUsage) + } + if stats.CpuStats.CpuUsage.UsageInUsermode != 600000000 { + t.Errorf("expected UsageInUsermode 600000000, got %d", stats.CpuStats.CpuUsage.UsageInUsermode) + } + if stats.CpuStats.CpuUsage.UsageInKernelmode != 400000000 { + t.Errorf("expected UsageInKernelmode 400000000, got %d", stats.CpuStats.CpuUsage.UsageInKernelmode) + } + if stats.CpuStats.ThrottlingData.Periods != 100 { + t.Errorf("expected Periods 100, got %d", stats.CpuStats.ThrottlingData.Periods) + } + if stats.CpuStats.ThrottlingData.ThrottledPeriods != 10 { + t.Errorf("expected ThrottledPeriods 10, got %d", stats.CpuStats.ThrottlingData.ThrottledPeriods) + } + }, + }, + { + name: "CPU stats with PSI", + controller: pointerTo(cgroups.CPU), + setupFiles: map[string]string{ + "cpu.stat": exampleCPUStatData, + "cpu.pressure": examplePSIData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify PSI data is populated + if stats.CpuStats.PSI == nil { + t.Fatal("expected PSI to be populated") + } + if stats.CpuStats.PSI.Some.Avg10 != 1.00 { + t.Errorf("expected PSI.Some.Avg10 1.00, got %f", stats.CpuStats.PSI.Some.Avg10) + } + if stats.CpuStats.PSI.Full.Total != 50000 { + t.Errorf("expected PSI.Full.Total 50000, got %d", stats.CpuStats.PSI.Full.Total) + } + }, + }, + { + name: "Memory stats", + controller: pointerTo(cgroups.Memory), + setupFiles: map[string]string{ + "memory.stat": exampleMemoryStatData, + "memory.current": exampleMemoryCurrent, + "memory.max": exampleMemoryMax, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify memory stats + if stats.MemoryStats.Usage.Usage != 4194304 { + t.Errorf("expected Usage 4194304, got %d", stats.MemoryStats.Usage.Usage) + } + // Cache comes from "file" field in memory.stat (6502666240 from exampleMemoryStatData) + if stats.MemoryStats.Cache != 6502666240 { + t.Errorf("expected Cache 6502666240, got %d", stats.MemoryStats.Cache) + } + }, + }, + { + name: "Memory stats with PSI", + controller: pointerTo(cgroups.Memory), + setupFiles: map[string]string{ + "memory.stat": exampleMemoryStatData, + "memory.current": exampleMemoryCurrent, + "memory.max": exampleMemoryMax, + "memory.pressure": examplePSIData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify PSI data is populated + if stats.MemoryStats.PSI == nil { + t.Fatal("expected PSI to be populated") + } + if stats.MemoryStats.PSI.Some.Avg60 != 2.00 { + t.Errorf("expected PSI.Some.Avg60 2.00, got %f", stats.MemoryStats.PSI.Some.Avg60) + } + }, + }, + { + name: "Pids stats", + controller: pointerTo(cgroups.Pids), + setupFiles: map[string]string{ + "pids.current": "42\n", + "pids.max": "1000\n", + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + if stats.PidsStats.Current != 42 { + t.Errorf("expected Current 42, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1000 { + t.Errorf("expected Limit 1000, got %d", stats.PidsStats.Limit) + } + }, + }, + { + name: "IO stats", + controller: pointerTo(cgroups.IO), + setupFiles: map[string]string{ + "io.stat": exampleIoStatData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify IO stats - check that we have entries + if len(stats.BlkioStats.IoServiceBytesRecursive) == 0 { + t.Error("expected IoServiceBytesRecursive to have entries") + } + if len(stats.BlkioStats.IoServicedRecursive) == 0 { + t.Error("expected IoServicedRecursive to have entries") + } + }, + }, + { + name: "IO stats with PSI", + controller: pointerTo(cgroups.IO), + setupFiles: map[string]string{ + "io.stat": exampleIoStatData, + "io.pressure": examplePSIData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify PSI data is populated + if stats.BlkioStats.PSI == nil { + t.Fatal("expected PSI to be populated") + } + if stats.BlkioStats.PSI.Full.Avg300 != 1.50 { + t.Errorf("expected PSI.Full.Avg300 1.50, got %f", stats.BlkioStats.PSI.Full.Avg300) + } + }, + }, + { + name: "Misc stats", + controller: pointerTo(cgroups.Misc), + setupFiles: map[string]string{ + "misc.current": exampleMiscCurrentData, + "misc.events": exampleMiscEventsData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify misc stats - exampleMiscCurrentData has res_a, res_b, res_c + if _, ok := stats.MiscStats["res_a"]; !ok { + t.Error("expected MiscStats to have 'res_a' entry") + } + if _, ok := stats.MiscStats["res_b"]; !ok { + t.Error("expected MiscStats to have 'res_b' entry") + } + if _, ok := stats.MiscStats["res_c"]; !ok { + t.Error("expected MiscStats to have 'res_c' entry") + } + }, + }, + { + name: "RDMA stats", + controller: pointerTo(cgroups.RDMA), + setupFiles: map[string]string{ + "rdma.current": exampleRdmaCurrent, + "rdma.max": "mlx5_0 hca_handle=max hca_object=max", + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify RDMA stats are populated + if len(stats.RdmaStats.RdmaCurrent) == 0 { + t.Error("expected RdmaStats.RdmaCurrent to have entries") + } + }, + }, + { + name: "HugeTLB stats", + controller: pointerTo(cgroups.HugeTLB), + setupFiles: map[string]string{}, + validate: func(_ *testing.T, _ *cgroups.Stats) { + // HugePageSizes() returns available page sizes from the system + // We can only test if files don't exist (should not error) + // No specific assertions needed - just verifying it doesn't error + }, + }, + { + name: "Multiple controllers - CPU+Pids", + controller: pointerTo(cgroups.CPU | cgroups.Pids), + setupFiles: map[string]string{ + "cpu.stat": exampleCPUStatDataShort, + "pids.current": "42\n", + "pids.max": "1000\n", + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify both stats are populated in the same object + if stats.CpuStats.CpuUsage.TotalUsage != 1000000000 { + t.Errorf("expected TotalUsage 1000000000, got %d", stats.CpuStats.CpuUsage.TotalUsage) + } + if stats.PidsStats.Current != 42 { + t.Errorf("expected Current 42, got %d", stats.PidsStats.Current) + } + if stats.PidsStats.Limit != 1000 { + t.Errorf("expected Limit 1000, got %d", stats.PidsStats.Limit) + } + }, + }, + { + name: "All controllers with nil options", + controller: nil, + setupFiles: map[string]string{ + "cpu.stat": exampleCPUStatData, + "memory.stat": exampleMemoryStatData, + "memory.current": exampleMemoryCurrent, + "memory.max": exampleMemoryMax, + "pids.current": "42\n", + "pids.max": "1000\n", + "io.stat": exampleIoStatData, + }, + validate: func(t *testing.T, stats *cgroups.Stats) { + // Verify all stats are populated (non-zero values) + if stats.CpuStats.CpuUsage.TotalUsage == 0 { + t.Error("expected non-zero CPU TotalUsage") + } + if stats.MemoryStats.Usage.Usage == 0 { + t.Error("expected non-zero Memory Usage") + } + if stats.PidsStats.Current == 0 { + t.Error("expected non-zero Pids Current") + } + if len(stats.BlkioStats.IoServiceBytesRecursive) == 0 { + t.Error("expected non-empty IO stats") + } + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + fakeCgroupDir := t.TempDir() + + // Setup + for filename, content := range tc.setupFiles { + if err := os.WriteFile(filepath.Join(fakeCgroupDir, filename), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + config := &cgroups.Cgroup{} + m, err := NewManager(config, fakeCgroupDir) + if err != nil { + t.Fatal(err) + } + + var stats *cgroups.Stats + if tc.controller == nil { + stats, err = m.Stats(nil) + } else { + stats, err = m.Stats(&cgroups.StatsOptions{Controllers: *tc.controller}) + } + if err != nil { + t.Fatal(err) + } + + if tc.validate != nil { + tc.validate(t, stats) + } + }) + } +} diff --git a/stats.go b/stats.go index debc2df..7501000 100644 --- a/stats.go +++ b/stats.go @@ -211,3 +211,53 @@ func NewStats() *Stats { miscStats := make(map[string]MiscStats) return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats, MiscStats: miscStats} } + +// Controller represents a cgroup controller type for stats collection. +type Controller int + +// Controller types for cgroup stats collection. +const ( + CPU Controller = 1 << iota + Memory + Pids + IO + HugeTLB + RDMA + Misc + CPUSet // v1 only +) + +// AllControllers is a bitmask of all available controllers. +const AllControllers = CPU | Memory | Pids | IO | HugeTLB | RDMA | Misc | CPUSet + +// String returns the controller name. +func (c Controller) String() string { + switch c { + case CPU: + return "cpu" + case Memory: + return "memory" + case Pids: + return "pids" + case IO: + return "io" + case HugeTLB: + return "hugetlb" + case RDMA: + return "rdma" + case Misc: + return "misc" + case CPUSet: + return "cpuset" + default: + panic("unknown controller") + } +} + +// StatsOptions specifies which controllers to retrieve statistics for. +type StatsOptions struct { + // Controllers is a bitmask of Controller values. + // If 0, all available controllers are queried (default behavior). + // Use Controller constants like: CPU | Memory | Pids + Controllers Controller +} diff --git a/systemd/v1.go b/systemd/v1.go index 96e69bb..4e71377 100644 --- a/systemd/v1.go +++ b/systemd/v1.go @@ -46,6 +46,8 @@ func NewLegacyManager(cg *cgroups.Cgroup, paths map[string]string) (*LegacyManag type subsystem interface { // Name returns the name of the subsystem. Name() string + // ID returns the controller ID for filtering. + ID() cgroups.Controller // GetStats returns the stats, as 'stats', corresponding to the cgroup under 'path'. GetStats(path string, stats *cgroups.Stats) error // Set sets cgroup resource limits. @@ -69,7 +71,7 @@ var legacySubsystems = []subsystem{ &fs.NetClsGroup{}, &fs.NameGroup{GroupName: "name=systemd"}, &fs.RdmaGroup{}, - &fs.NameGroup{GroupName: "misc"}, + &fs.NameGroup{GroupName: "misc", GroupID: cgroups.Misc}, } func genV1ResourcesProperties(r *cgroups.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { @@ -339,14 +341,33 @@ func (m *LegacyManager) GetAllPids() ([]int, error) { } func (m *LegacyManager) GetStats() (*cgroups.Stats, error) { + return m.Stats(nil) +} + +// Stats returns cgroup statistics for the specified controllers. +// If opts is nil or opts.Controllers is zero, statistics for all controllers are returned. +func (m *LegacyManager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { m.mu.Lock() defer m.mu.Unlock() + + // Default: query all controllers (same as original GetStats behavior) + controllers := cgroups.AllControllers + if opts != nil && opts.Controllers != 0 { + controllers = opts.Controllers + } + stats := cgroups.NewStats() for _, sys := range legacySubsystems { path := m.paths[sys.Name()] if path == "" { continue } + + // Filter based on controller type + if sys.ID()&controllers == 0 { + continue + } + if err := sys.GetStats(path, stats); err != nil { return nil, err } diff --git a/systemd/v2.go b/systemd/v2.go index f76c93e..6e1b7ec 100644 --- a/systemd/v2.go +++ b/systemd/v2.go @@ -497,6 +497,12 @@ func (m *UnifiedManager) GetStats() (*cgroups.Stats, error) { return m.fsMgr.GetStats() } +// Stats returns cgroup statistics for the specified controllers. +// If opts is nil or opts.Controllers is zero, statistics for all controllers are returned. +func (m *UnifiedManager) Stats(opts *cgroups.StatsOptions) (*cgroups.Stats, error) { + return m.fsMgr.Stats(opts) +} + func (m *UnifiedManager) Set(r *cgroups.Resources) error { if r == nil { return nil