From 0f93f1aeadd90e664e0502cef7147654ae7a7267 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Thu, 28 Jan 2021 16:25:52 +0100 Subject: [PATCH 01/49] Add mon_group support for resctrl. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- cmd/cadvisor.go | 4 +- docs/runtime_options.md | 5 + manager/container.go | 3 +- manager/manager.go | 24 +- resctrl/collector.go | 139 ++++++--- resctrl/collector_test.go | 116 ++++++++ resctrl/manager.go | 45 ++- resctrl/manager_test.go | 129 +++++++++ resctrl/utils.go | 285 +++++++++++++++++++ resctrl/utils_test.go | 576 ++++++++++++++++++++++++++++++++++++++ 10 files changed, 1264 insertions(+), 62 deletions(-) create mode 100644 resctrl/collector_test.go create mode 100644 resctrl/manager_test.go create mode 100644 resctrl/utils.go create mode 100644 resctrl/utils_test.go diff --git a/cmd/cadvisor.go b/cmd/cadvisor.go index acb9ce65bf..1f6c6adb88 100644 --- a/cmd/cadvisor.go +++ b/cmd/cadvisor.go @@ -77,6 +77,8 @@ var rawCgroupPrefixWhiteList = flag.String("raw_cgroup_prefix_whitelist", "", "A var perfEvents = flag.String("perf_events_config", "", "Path to a JSON file containing configuration of perf events to measure. Empty value disabled perf events measuring.") +var resctrlInterval = flag.Duration("resctrl_interval", 0, "Resctrl mon groups updating interval. Zero value disables updating mon groups.") + var ( // Metrics to be ignored. // Tcp metrics are ignored by default. @@ -173,7 +175,7 @@ func main() { collectorHttpClient := createCollectorHttpClient(*collectorCert, *collectorKey) - resourceManager, err := manager.New(memoryStorage, sysFs, housekeepingConfig, includedMetrics, &collectorHttpClient, strings.Split(*rawCgroupPrefixWhiteList, ","), *perfEvents) + resourceManager, err := manager.New(memoryStorage, sysFs, housekeepingConfig, includedMetrics, &collectorHttpClient, strings.Split(*rawCgroupPrefixWhiteList, ","), *perfEvents, *resctrlInterval) if err != nil { klog.Fatalf("Failed to create a manager: %s", err) } diff --git a/docs/runtime_options.md b/docs/runtime_options.md index 421cbc24fb..2f7a5c3892 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -419,6 +419,11 @@ should be a human readable string that will become a metric name. * `cas_count_read` will be measured as uncore non-grouped event on all Integrated Memory Controllers Performance Monitoring Units because of unset `type` field and `uncore_imc` prefix. +## Resctrl + +``` +--resctrl_interval=0: Resctrl mon groups updating interval. Zero value disables updating mon groups. +``` ## Storage driver specific instructions: diff --git a/manager/container.go b/manager/container.go index 09a1923b9d..db2e2b113b 100644 --- a/manager/container.go +++ b/manager/container.go @@ -130,6 +130,7 @@ func (cd *containerData) Stop() error { } close(cd.stop) cd.perfCollector.Destroy() + cd.resctrlCollector.Destroy() return nil } @@ -727,7 +728,7 @@ func (cd *containerData) updateStats() error { return perfStatsErr } if resctrlStatsErr != nil { - klog.Errorf("error occurred while collecting resctrl stats for container %s: %s", cInfo.Name, err) + klog.Errorf("error occurred while collecting resctrl stats for container %s: %s", cInfo.Name, resctrlStatsErr) return resctrlStatsErr } return customStatsErr diff --git a/manager/manager.go b/manager/manager.go index 03423ab3dc..9c6b972238 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -49,7 +49,6 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/opencontainers/runc/libcontainer/intelrdt" "k8s.io/klog/v2" "k8s.io/utils/clock" @@ -147,7 +146,7 @@ type HouskeepingConfig = struct { } // New takes a memory storage and returns a new manager. -func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig HouskeepingConfig, includedMetricsSet container.MetricSet, collectorHTTPClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string, perfEventsFile string) (Manager, error) { +func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig HouskeepingConfig, includedMetricsSet container.MetricSet, collectorHTTPClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string, perfEventsFile string, resctrlInterval time.Duration) (Manager, error) { if memoryCache == nil { return nil, fmt.Errorf("manager requires memory storage") } @@ -218,7 +217,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig return nil, err } - newManager.resctrlManager, err = resctrl.NewManager(selfContainer) + newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup) if err != nil { klog.V(4).Infof("Cannot gather resctrl metrics: %v", err) } @@ -263,7 +262,7 @@ type manager struct { collectorHTTPClient *http.Client nvidiaManager stats.Manager perfManager stats.Manager - resctrlManager stats.Manager + resctrlManager resctrl.Manager // List of raw container cgroup path prefix whitelist. rawContainerCgroupPathPrefixWhiteList []string } @@ -328,7 +327,7 @@ func (m *manager) Start() error { func (m *manager) Stop() error { defer m.nvidiaManager.Destroy() - defer m.destroyPerfCollectors() + defer m.destroyCollectors() // Stop and wait on all quit channels. for i, c := range m.quitChannels { // Send the exit signal and wait on the thread to exit (by closing the channel). @@ -346,9 +345,10 @@ func (m *manager) Stop() error { return nil } -func (m *manager) destroyPerfCollectors() { +func (m *manager) destroyCollectors() { for _, container := range m.containers { container.perfCollector.Destroy() + container.resctrlCollector.Destroy() } } @@ -957,14 +957,11 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche } if m.includedMetrics.Has(container.ResctrlMetrics) { - resctrlPath, err := intelrdt.GetIntelRdtPath(containerName) + cont.resctrlCollector, err = m.resctrlManager.GetCollector(containerName, func() ([]string, error) { + return cont.getContainerPids(true) + }) if err != nil { - klog.V(4).Infof("Error getting resctrl path: %q", err) - } else { - cont.resctrlCollector, err = m.resctrlManager.GetCollector(resctrlPath) - if err != nil { - klog.V(4).Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err) - } + klog.V(4).Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err) } } @@ -1006,7 +1003,6 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche if err != nil { return err } - // Start the container's housekeeping. return cont.Start() } diff --git a/resctrl/collector.go b/resctrl/collector.go index f677b8d044..9ca2fe77b4 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -1,6 +1,6 @@ // +build linux -// Copyright 2020 Google Inc. All Rights Reserved. +// Copyright 2021 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,57 +18,128 @@ package resctrl import ( - info "github.com/google/cadvisor/info/v1" - "github.com/google/cadvisor/stats" + "fmt" + "os" + "time" + + "k8s.io/klog/v2" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/intelrdt" + info "github.com/google/cadvisor/info/v1" ) type collector struct { - resctrl intelrdt.Manager - stats.NoopDestroy + id string + interval time.Duration + getContainerPids func() ([]string, error) + resctrlPath string + running bool } -func newCollector(id string, resctrlPath string) *collector { - collector := &collector{ - resctrl: intelrdt.NewManager( - &configs.Config{ - IntelRdt: &configs.IntelRdt{}, - }, - id, - resctrlPath, - ), +func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration) *collector { + return &collector{id: id, interval: interval, getContainerPids: getContainerPids} +} + +func (c *collector) setup() error { + err := c.prepareMonGroup() + + if c.interval != 0 && c.id != rootContainer { + if err != nil { + klog.Errorf("Failed to setup container %q resctrl collector: %v \n Trying again in next intervals!", c.id, err) + } + go func() { + for { + if c.running { + err = c.prepareMonGroup() + if err != nil { + klog.Errorf("checking %q resctrl collector but: %v", c.id, err) + } + } else { + err = c.clear() + if err != nil { + klog.Errorf("trying to end %q resctrl collector interval but: %v", c.id, err) + } + break + } + time.Sleep(c.interval) + } + }() + } else { + // There is no interval set, if setup fail, stop. + if err != nil { + c.running = false + return err + } } - return collector + return nil } -func (c *collector) UpdateStats(stats *info.ContainerStats) error { - stats.Resctrl = info.ResctrlStats{} - - resctrlStats, err := c.resctrl.GetStats() +func (c *collector) prepareMonGroup() error { + newPath, err := getResctrlPath(c.id, c.getContainerPids) if err != nil { - return err + return fmt.Errorf("couldn't obtain mon_group path: %v", err) } - numberOfNUMANodes := len(*resctrlStats.MBMStats) + // Check if container moved between control groups. + if newPath != c.resctrlPath { + err = c.clear() + if err != nil { + c.running = false + return fmt.Errorf("couldn't clear previous mon group: %v", err) + } + c.resctrlPath = newPath + } - stats.Resctrl.MemoryBandwidth = make([]info.MemoryBandwidthStats, 0, numberOfNUMANodes) - stats.Resctrl.Cache = make([]info.CacheStats, 0, numberOfNUMANodes) + // Mon group prepared, the collector is running correctly. + c.running = true + return nil +} - for _, numaNodeStats := range *resctrlStats.MBMStats { - stats.Resctrl.MemoryBandwidth = append(stats.Resctrl.MemoryBandwidth, - info.MemoryBandwidthStats{ - TotalBytes: numaNodeStats.MBMTotalBytes, - LocalBytes: numaNodeStats.MBMLocalBytes, - }) +func (c *collector) UpdateStats(stats *info.ContainerStats) error { + if c.running { + stats.Resctrl = info.ResctrlStats{} + + resctrlStats, err := getStats(c.resctrlPath) + if err != nil { + return err + } + numberOfNUMANodes := len(*resctrlStats.MBMStats) + + stats.Resctrl.MemoryBandwidth = make([]info.MemoryBandwidthStats, 0, numberOfNUMANodes) + stats.Resctrl.Cache = make([]info.CacheStats, 0, numberOfNUMANodes) + + for _, numaNodeStats := range *resctrlStats.MBMStats { + stats.Resctrl.MemoryBandwidth = append(stats.Resctrl.MemoryBandwidth, + info.MemoryBandwidthStats{ + TotalBytes: numaNodeStats.MBMTotalBytes, + LocalBytes: numaNodeStats.MBMLocalBytes, + }) + } + + for _, numaNodeStats := range *resctrlStats.CMTStats { + stats.Resctrl.Cache = append(stats.Resctrl.Cache, + info.CacheStats{LLCOccupancy: numaNodeStats.LLCOccupancy}) + } } - for _, numaNodeStats := range *resctrlStats.CMTStats { - stats.Resctrl.Cache = append(stats.Resctrl.Cache, - info.CacheStats{LLCOccupancy: numaNodeStats.LLCOccupancy}) + return nil +} + +func (c *collector) Destroy() { + c.running = false + err := c.clear() + if err != nil { + klog.Errorf("trying to destroy %q resctrl collector but: %v", c.id, err) } +} +func (c *collector) clear() error { + // Couldn't remove root resctrl directory. + if c.id != rootContainer && c.resctrlPath != "" { + err := os.RemoveAll(c.resctrlPath) + if err != nil { + return fmt.Errorf("couldn't clear mon_group: %v", err) + } + } return nil } diff --git a/resctrl/collector_test.go b/resctrl/collector_test.go new file mode 100644 index 0000000000..d352e94d1c --- /dev/null +++ b/resctrl/collector_test.go @@ -0,0 +1,116 @@ +// +build linux + +// Copyright 2021 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Collector tests. +package resctrl + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + + info "github.com/google/cadvisor/info/v1" +) + +func TestNewCollectorWithSetup(t *testing.T) { + rootResctrl = mockResctrl() + defer os.RemoveAll(rootResctrl) + + pidsPath = mockContainersPids() + defer os.RemoveAll(pidsPath) + + processPath = mockProcFs() + defer os.RemoveAll(processPath) + + expectedID := "container" + expectedResctrlPath := filepath.Join(rootResctrl, monGroupsDirName, expectedID) + + collector := newCollector(expectedID, mockGetContainerPids, 0) + err := collector.setup() + + assert.NoError(t, err) + assert.Equal(t, collector.id, expectedID) + assert.Equal(t, collector.resctrlPath, expectedResctrlPath) +} + +func TestUpdateStats(t *testing.T) { + rootResctrl = mockResctrl() + defer os.RemoveAll(rootResctrl) + + pidsPath = mockContainersPids() + defer os.RemoveAll(pidsPath) + + processPath = mockProcFs() + defer os.RemoveAll(processPath) + + collector := newCollector("container", mockGetContainerPids, 0) + err := collector.setup() + assert.NoError(t, err) + + mockResctrlMonData(collector.resctrlPath) + enabledCMT, enabledMBM = true, true + + stats := info.ContainerStats{} + + // Write some dumb data. + err = collector.UpdateStats(&stats) + assert.NoError(t, err) + assert.Equal(t, stats.Resctrl.Cache, []info.CacheStats{ + {LLCOccupancy: 1111}, + {LLCOccupancy: 3333}, + }) + assert.Equal(t, stats.Resctrl.MemoryBandwidth, []info.MemoryBandwidthStats{ + { + TotalBytes: 3333, + LocalBytes: 2222, + }, + { + TotalBytes: 3333, + LocalBytes: 1111, + }, + }) +} + +func TestDestroy(t *testing.T) { + rootResctrl = mockResctrl() + defer os.RemoveAll(rootResctrl) + + pidsPath = mockContainersPids() + defer os.RemoveAll(pidsPath) + + processPath = mockProcFs() + defer os.RemoveAll(processPath) + + collector := newCollector("container", mockGetContainerPids, 0) + err := collector.setup() + if err != nil { + t.Fail() + } + + path := collector.resctrlPath + + if stat, err := os.Stat(path); stat == nil && err != nil { + t.Fail() + } + + collector.Destroy() + + if stat, err := os.Stat(path); stat != nil && err == nil { + t.Fail() + } +} diff --git a/resctrl/manager.go b/resctrl/manager.go index cceb28e6b6..b3034b5687 100644 --- a/resctrl/manager.go +++ b/resctrl/manager.go @@ -1,6 +1,6 @@ // +build linux -// Copyright 2020 Google Inc. All Rights Reserved. +// Copyright 2021 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,31 +18,52 @@ package resctrl import ( - "os" + "errors" + "time" "github.com/google/cadvisor/stats" - - "github.com/opencontainers/runc/libcontainer/intelrdt" ) +type Manager interface { + Destroy() + GetCollector(containerName string, getContainerPids func() ([]string, error)) (stats.Collector, error) +} + type manager struct { - id string stats.NoopDestroy + interval time.Duration } -func (m manager) GetCollector(resctrlPath string) (stats.Collector, error) { - if _, err := os.Stat(resctrlPath); err != nil { +func (m *manager) GetCollector(containerName string, getContainerPids func() ([]string, error)) (stats.Collector, error) { + collector := newCollector(containerName, getContainerPids, m.interval) + err := collector.setup() + if err != nil { return &stats.NoopCollector{}, err } - collector := newCollector(m.id, resctrlPath) + return collector, nil } -func NewManager(id string) (stats.Manager, error) { +func NewManager(interval time.Duration, setup func() error) (Manager, error) { + err := setup() + if err != nil { + return &NoopManager{}, err + } - if intelrdt.IsMBMEnabled() || intelrdt.IsCMTEnabled() { - return &manager{id: id}, nil + if !isResctrlInitialized { + return &NoopManager{}, errors.New("the resctrl isn't initialized") + } + if !(enabledCMT || enabledMBM) { + return &NoopManager{}, errors.New("there are no monitoring features available") } - return &stats.NoopManager{}, nil + return &manager{interval: interval}, nil +} + +type NoopManager struct { + stats.NoopDestroy +} + +func (np *NoopManager) GetCollector(_ string, _ func() ([]string, error)) (stats.Collector, error) { + return &stats.NoopCollector{}, nil } diff --git a/resctrl/manager_test.go b/resctrl/manager_test.go new file mode 100644 index 0000000000..3786319bf7 --- /dev/null +++ b/resctrl/manager_test.go @@ -0,0 +1,129 @@ +// +build linux + +// Copyright 2021 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Manager tests. +package resctrl + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewManager(t *testing.T) { + var testCases = []struct { + isResctrlInitialized bool + enabledCMT bool + enabledMBM bool + err string + expected Manager + }{ + { + true, + true, + false, + "", + &manager{interval: 0}, + }, + { + true, + false, + true, + "", + &manager{interval: 0}, + }, + { + true, + true, + true, + "", + &manager{interval: 0}, + }, + { + false, + true, + false, + "the resctrl isn't initialized", + &NoopManager{}, + }, + { + false, + false, + true, + "the resctrl isn't initialized", + &NoopManager{}, + }, + { + false, + true, + true, + "the resctrl isn't initialized", + &NoopManager{}, + }, + { + false, + false, + false, + "the resctrl isn't initialized", + &NoopManager{}, + }, + { + true, + false, + false, + "there are no monitoring features available", + &NoopManager{}, + }, + } + + for _, test := range testCases { + setup := func() error { + isResctrlInitialized = test.isResctrlInitialized + enabledCMT = test.enabledCMT + enabledMBM = test.enabledMBM + return nil + } + got, err := NewManager(0, setup) + assert.Equal(t, got, test.expected) + checkError(t, err, test.err) + } +} + +func TestGetCollector(t *testing.T) { + rootResctrl = mockResctrl() + defer os.RemoveAll(rootResctrl) + + pidsPath = mockContainersPids() + defer os.RemoveAll(pidsPath) + + processPath = mockProcFs() + defer os.RemoveAll(processPath) + + expectedID := "container" + + setup := func() error { + isResctrlInitialized = true + enabledCMT = true + enabledMBM = true + return nil + } + manager, err := NewManager(0, setup) + assert.NoError(t, err) + + _, err = manager.GetCollector(expectedID, mockGetContainerPids) + assert.NoError(t, err) +} diff --git a/resctrl/utils.go b/resctrl/utils.go new file mode 100644 index 0000000000..6835d7dfbd --- /dev/null +++ b/resctrl/utils.go @@ -0,0 +1,285 @@ +// +build linux + +// Copyright 2021 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Utilities. +package resctrl + +import ( + "bytes" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/cgroups/fs2" + "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" + "github.com/opencontainers/runc/libcontainer/intelrdt" +) + +const ( + cpuCgroup = "cpu" + rootContainer = "/" + monitoringGroupDir = "mon_groups" + processTask = "task" + cpusFileName = "cpus" + cpusListFileName = "cpus_list" + schemataFileName = "schemata" + tasksFileName = "tasks" + infoDirName = "info" + monDataDirName = "mon_data" + monGroupsDirName = "mon_groups" + noPidsError = "there are no pids passed" + noContainerError = "there are no container name passed" + llcOccupancyFileName = "llc_occupancy" + mbmLocalBytesFileName = "mbm_local_bytes" + mbmTotalBytesFileName = "mbm_total_bytes" +) + +var ( + rootResctrl = "" + pidsPath = "" + processPath = "/proc" + enabledMBM = false + enabledCMT = false + isResctrlInitialized = false +) + +func Setup() error { + var err error + rootResctrl, err = intelrdt.GetIntelRdtPath(rootContainer) + if err != nil { + return fmt.Errorf("unable to initialize resctrl: %v", err) + } + + if cgroups.IsCgroup2UnifiedMode() { + pidsPath = fs2.UnifiedMountpoint + } else { + pidsPath = filepath.Join(fs2.UnifiedMountpoint, cpuCgroup) + } + + enabledMBM = intelrdt.IsMBMEnabled() + enabledCMT = intelrdt.IsCMTEnabled() + + isResctrlInitialized = true + + return nil +} + +func getResctrlPath(containerName string, getContainerPids func() ([]string, error)) (string, error) { + if containerName == rootContainer { + return rootResctrl, nil + } + + pids, err := getContainerPids() + if err != nil { + return "", err + } + + if len(pids) == 0 { + return "", fmt.Errorf("couldn't determine resctrl for %q: there is no pids in cGroup", containerName) + } + + path, err := findControlGroup(pids) + if err != nil { + return "", err + } + + if containerName[0] == '/' { + containerName = containerName[1:] + } + + properContainerName := strings.Replace(containerName, "/", "-", -1) + monGroupPath := filepath.Join(path, monitoringGroupDir, properContainerName) + + // Create new mon_group if not exists. + if stat, _ := os.Stat(monGroupPath); stat == nil { + err = os.Mkdir(monGroupPath, os.ModePerm) + } + if err != nil { + return "", err + } + + for _, pid := range pids { + threadFiles, err := ioutil.ReadDir(filepath.Join(processPath, pid, processTask)) + if err != nil { + return "", err + } + + processThreads, err := getAllProcessThreads(threadFiles) + if err != nil { + return "", err + } + for _, thread := range processThreads { + err = intelrdt.WriteIntelRdtTasks(monGroupPath, thread) + if err != nil { + secondError := os.Remove(monGroupPath) + if secondError != nil { + return "", fmt.Errorf("%v : %v", err, secondError) + } + return "", err + } + } + } + + return monGroupPath, nil +} + +func getPids(containerName string) ([]int, error) { + if containerName == "" { + return nil, fmt.Errorf(noContainerError) + } + pids, err := cgroups.GetAllPids(filepath.Join(pidsPath, containerName)) + if err != nil { + return nil, fmt.Errorf("couldn't obtain pids: %v", err) + } + return pids, nil +} + +func getAllProcessThreads(threadFiles []os.FileInfo) ([]int, error) { + processThreads := make([]int, 0) + for _, file := range threadFiles { + pid, err := strconv.Atoi(file.Name()) + if err != nil { + return nil, fmt.Errorf("couldn't parse %q file: %v", file.Name(), err) + } + processThreads = append(processThreads, pid) + } + + return processThreads, nil +} + +func findControlGroup(pids []string) (string, error) { + if len(pids) == 0 { + return "", fmt.Errorf(noPidsError) + } + availablePaths, err := filepath.Glob(filepath.Join(rootResctrl, "*")) + if err != nil { + return "", err + } + + for _, path := range availablePaths { + switch path { + case filepath.Join(rootResctrl, cpusFileName): + continue + case filepath.Join(rootResctrl, cpusListFileName): + continue + case filepath.Join(rootResctrl, infoDirName): + continue + case filepath.Join(rootResctrl, monDataDirName): + continue + case filepath.Join(rootResctrl, monGroupsDirName): + continue + case filepath.Join(rootResctrl, schemataFileName): + continue + case filepath.Join(rootResctrl, tasksFileName): + inGroup, err := checkControlGroup(rootResctrl, pids) + if err != nil { + return "", err + } + if inGroup { + return rootResctrl, nil + } + default: + inGroup, err := checkControlGroup(path, pids) + if err != nil { + return "", err + } + if inGroup { + return path, nil + } + } + } + + return "", fmt.Errorf("there is no available control group") +} + +func checkControlGroup(path string, pids []string) (bool, error) { + if len(pids) == 0 { + return false, fmt.Errorf(noPidsError) + } + + taskFile, err := fscommon.ReadFile(path, "tasks") + if err != nil { + return false, fmt.Errorf("couldn't obtain pids: %v", err) + } + + for _, pid := range pids { + if !strings.Contains(taskFile, pid) { + return false, nil + } + } + + return true, nil +} + +func readStat(path string) (uint64, error) { + context, err := ioutil.ReadFile(path) + if err != nil { + return 0, err + } + + stat, err := strconv.ParseUint(string(bytes.TrimSpace(context)), 10, 64) + if err != nil { + return stat, fmt.Errorf("unable to parse %q as a uint from file %q", string(context), path) + } + + return stat, nil +} + +func getStats(path string) (intelrdt.Stats, error) { + stats := intelrdt.Stats{} + + numaDirs, err := filepath.Glob(filepath.Join(path, monDataDirName, "*")) + if err != nil { + return stats, err + } + + if len(numaDirs) == 0 { + return stats, fmt.Errorf("there is no mon_data NUMA dirs: %q", path) + } + + stats.CMTStats = &[]intelrdt.CMTNumaNodeStats{} + stats.MBMStats = &[]intelrdt.MBMNumaNodeStats{} + + for _, dir := range numaDirs { + if enabledCMT { + cmtStats := intelrdt.CMTNumaNodeStats{} + cmtStats.LLCOccupancy, err = readStat(filepath.Join(dir, llcOccupancyFileName)) + if err != nil { + return stats, err + } + *stats.CMTStats = append(*stats.CMTStats, cmtStats) + + } + if enabledMBM { + mbmStats := intelrdt.MBMNumaNodeStats{} + mbmStats.MBMTotalBytes, err = readStat(filepath.Join(dir, mbmTotalBytesFileName)) + if err != nil { + return stats, err + } + mbmStats.MBMLocalBytes, err = readStat(filepath.Join(dir, mbmLocalBytesFileName)) + if err != nil { + return stats, err + } + *stats.MBMStats = append(*stats.MBMStats, mbmStats) + } + } + + return stats, nil +} diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go new file mode 100644 index 0000000000..464b985550 --- /dev/null +++ b/resctrl/utils_test.go @@ -0,0 +1,576 @@ +// +build linux + +// Copyright 2021 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Utilities tests. +// +// Mocked environment: +// - "container" first container with {1, 2, 3} processes. +// - "another" second container with {5, 6} processes. +// +package resctrl + +import ( + "fmt" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/intelrdt" + + "github.com/stretchr/testify/assert" + + "github.com/google/cadvisor/utils/sysfs/fakesysfs" +) + +func mockAllGetContainerPids() ([]string, error) { + return []string{"1", "2", "3", "5", "6"}, nil +} + +func mockGetContainerPids() ([]string, error) { + return []string{"1", "2", "3"}, nil +} + +func mockAnotherGetContainerPids() ([]string, error) { + return []string{"5", "6"}, nil +} + +func touch(path string) error { + file, err := os.OpenFile(path, os.O_CREATE, os.ModePerm) + if err != nil { + return err + } + return file.Close() +} + +func touchDir(path string) error { + err := os.MkdirAll(path, os.ModePerm) + if err != nil { + return err + } + return nil +} + +func fillPids(path string, pids []int) error { + f, err := os.OpenFile(path, os.O_WRONLY, os.ModePerm) + if err != nil { + return err + } + defer f.Close() + for _, pid := range pids { + _, err := fmt.Fprintln(f, pid) + if err != nil { + return err + } + } + return nil +} + +func mockResctrl() string { + path, _ := ioutil.TempDir("", "resctrl") + + var files = []struct { + path string + touch func(string) error + }{ + // Mock root files. + { + filepath.Join(path, cpusFileName), + touch, + }, + { + filepath.Join(path, cpusListFileName), + touch, + }, + { + filepath.Join(path, infoDirName), + touchDir, + }, + { + filepath.Join(path, monDataDirName), + touchDir, + }, + { + filepath.Join(path, monGroupsDirName), + touchDir, + }, + { + filepath.Join(path, schemataFileName), + touch, + }, + { + filepath.Join(path, tasksFileName), + touch, + }, + // Create custom CLOSID "m1". + { + filepath.Join(path, "m1"), + touchDir, + }, + { + filepath.Join(path, "m1", cpusFileName), + touch, + }, + { + filepath.Join(path, "m1", cpusListFileName), + touch, + }, + { + filepath.Join(path, "m1", monDataDirName), + touchDir, + }, + { + filepath.Join(path, "m1", monGroupsDirName), + touchDir, + }, + { + filepath.Join(path, "m1", schemataFileName), + touch, + }, + { + filepath.Join(path, "m1", tasksFileName), + touch, + }, + } + for _, file := range files { + err := file.touch(file.path) + if err != nil { + return "" + } + } + + // Mock root group task file. + err := fillPids(filepath.Join(path, tasksFileName), []int{1, 2, 3, 4}) + if err != nil { + return "" + } + + // Mock custom CLOSID "m1" task file. + err = fillPids(filepath.Join(path, "m1", tasksFileName), []int{5, 6}) + if err != nil { + return "" + } + + return path +} + +func mockResctrlMonData(path string) { + + _ = touchDir(filepath.Join(path, monDataDirName, "mon_L3_00")) + _ = touchDir(filepath.Join(path, monDataDirName, "mon_L3_01")) + + var files = []struct { + path string + value string + }{ + { + filepath.Join(path, monDataDirName, "mon_L3_00", llcOccupancyFileName), + "1111", + }, + { + filepath.Join(path, monDataDirName, "mon_L3_00", mbmLocalBytesFileName), + "2222", + }, + { + filepath.Join(path, monDataDirName, "mon_L3_00", mbmTotalBytesFileName), + "3333", + }, + { + filepath.Join(path, monDataDirName, "mon_L3_01", llcOccupancyFileName), + "3333", + }, + { + filepath.Join(path, monDataDirName, "mon_L3_01", mbmLocalBytesFileName), + "1111", + }, + { + filepath.Join(path, monDataDirName, "mon_L3_01", mbmTotalBytesFileName), + "3333", + }, + } + + for _, file := range files { + _ = touch(file.path) + _ = ioutil.WriteFile(file.path, []byte(file.value), os.ModePerm) + } +} + +func mockContainersPids() string { + path, _ := ioutil.TempDir("", "cgroup") + // container + _ = touchDir(filepath.Join(path, "container")) + _ = touch(filepath.Join(path, "container", cgroups.CgroupProcesses)) + err := fillPids(filepath.Join(path, "container", cgroups.CgroupProcesses), []int{1, 2, 3}) + if err != nil { + return "" + } + // another + _ = touchDir(filepath.Join(path, "another")) + _ = touch(filepath.Join(path, "another", cgroups.CgroupProcesses)) + err = fillPids(filepath.Join(path, "another", cgroups.CgroupProcesses), []int{5}) + if err != nil { + return "" + } + + return path +} + +func mockProcFs() string { + path, _ := ioutil.TempDir("", "proc") + + var files = []struct { + path string + touch func(string) error + }{ + // container + { + filepath.Join(path, "1", processTask, "1"), + touchDir, + }, + { + filepath.Join(path, "2", processTask, "2"), + touchDir, + }, + { + filepath.Join(path, "3", processTask, "3"), + touchDir, + }, + { + filepath.Join(path, "4", processTask, "4"), + touchDir, + }, + // another + { + filepath.Join(path, "5", processTask, "5"), + touchDir, + }, + { + filepath.Join(path, "6", processTask, "6"), + touchDir, + }, + } + + for _, file := range files { + _ = file.touch(file.path) + } + + return path +} + +func checkError(t *testing.T, err error, expected string) { + if expected != "" { + assert.EqualError(t, err, expected) + } else { + assert.NoError(t, err) + } +} + +func TestGetResctrlPath(t *testing.T) { + rootResctrl = mockResctrl() + defer os.RemoveAll(rootResctrl) + + pidsPath = mockContainersPids() + defer os.RemoveAll(pidsPath) + + processPath = mockProcFs() + defer os.RemoveAll(processPath) + + var testCases = []struct { + container string + getContainerPids func() ([]string, error) + expected string + err string + }{ + { + "container", + mockGetContainerPids, + filepath.Join(rootResctrl, monGroupsDirName, "container"), + "", + }, + { + "another", + mockAnotherGetContainerPids, + filepath.Join(rootResctrl, "m1", monGroupsDirName, "another"), + "", + }, + { + "/", + mockAllGetContainerPids, + rootResctrl, + "", + }, + } + + for _, test := range testCases { + actual, err := getResctrlPath(test.container, test.getContainerPids) + assert.Equal(t, test.expected, actual) + checkError(t, err, test.err) + } +} + +func TestGetPids(t *testing.T) { + pidsPath = mockContainersPids() + defer os.RemoveAll(pidsPath) + + var testCases = []struct { + container string + expected []int + err string + }{ + { + "", + nil, + noContainerError, + }, + { + "container", + []int{1, 2, 3}, + "", + }, + { + "no_container", + nil, + fmt.Sprintf("couldn't obtain pids: lstat %v: no such file or directory", filepath.Join(pidsPath, "no_container")), + }, + } + + for _, test := range testCases { + actual, err := getPids(test.container) + assert.Equal(t, test.expected, actual) + checkError(t, err, test.err) + } +} + +func TestGetAllProcessThreads(t *testing.T) { + var testCases = []struct { + filesInfo []os.FileInfo + expected []int + err string + }{ + { + []os.FileInfo{ + &fakesysfs.FileInfo{EntryName: "1"}, + &fakesysfs.FileInfo{EntryName: "2"}, + &fakesysfs.FileInfo{EntryName: "3"}, + &fakesysfs.FileInfo{EntryName: "4"}, + }, + []int{1, 2, 3, 4}, + "", + }, + { + []os.FileInfo{ + &fakesysfs.FileInfo{EntryName: "incorrect"}, + &fakesysfs.FileInfo{EntryName: "1"}, + }, + nil, + "couldn't parse \"incorrect\" file: strconv.Atoi: parsing \"incorrect\": invalid syntax", + }, + } + + for _, test := range testCases { + actual, err := getAllProcessThreads(test.filesInfo) + assert.Equal(t, test.expected, actual) + checkError(t, err, test.err) + } +} + +func TestFindControlGroup(t *testing.T) { + rootResctrl = mockResctrl() + defer os.RemoveAll(rootResctrl) + + var testCases = []struct { + pids []string + expected string + err string + }{ + { + []string{"1", "2", "3", "4"}, + rootResctrl, + "", + }, + { + []string{}, + "", + "there are no pids passed", + }, + { + []string{"5", "6"}, + filepath.Join(rootResctrl, "m1"), + "", + }, + { + []string{"7", "8"}, + "", + "there is no available control group", + }, + } + for _, test := range testCases { + actual, err := findControlGroup(test.pids) + assert.Equal(t, test.expected, actual) + checkError(t, err, test.err) + } +} + +func TestCheckControlGroup(t *testing.T) { + rootResctrl = mockResctrl() + defer os.RemoveAll(rootResctrl) + + var testCases = []struct { + expected bool + err string + path string + pids []string + }{ + { + true, + "", + rootResctrl, + []string{"1", "2"}, + }, + { + false, + "", + rootResctrl, + []string{"4", "5"}, + }, + { + false, + "", + filepath.Join(rootResctrl, "m1"), + []string{"1"}, + }, + { + false, + fmt.Sprintf("couldn't obtain pids: open %s: no such file or directory", filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName)), + filepath.Join(rootResctrl, monitoringGroupDir), + []string{"1", "2"}, + }, + { + false, + noPidsError, + rootResctrl, + nil, + }, + } + + for _, test := range testCases { + actual, err := checkControlGroup(test.path, test.pids) + assert.Equal(t, test.expected, actual) + checkError(t, err, test.err) + } +} + +func TestGetStats(t *testing.T) { + rootResctrl = mockResctrl() + defer os.RemoveAll(rootResctrl) + + pidsPath = mockContainersPids() + defer os.RemoveAll(pidsPath) + + processPath = mockProcFs() + defer os.RemoveAll(processPath) + + enabledCMT, enabledMBM = true, true + + var testCases = []struct { + container string + expected intelrdt.Stats + err string + }{ + { + "container", + intelrdt.Stats{ + MBMStats: &[]intelrdt.MBMNumaNodeStats{ + { + MBMTotalBytes: 3333, + MBMLocalBytes: 2222, + }, + { + MBMTotalBytes: 3333, + MBMLocalBytes: 1111, + }, + }, + CMTStats: &[]intelrdt.CMTNumaNodeStats{ + { + LLCOccupancy: 1111, + }, + { + LLCOccupancy: 3333, + }, + }, + }, + "", + }, + { + "another", + intelrdt.Stats{ + MBMStats: &[]intelrdt.MBMNumaNodeStats{ + { + MBMTotalBytes: 3333, + MBMLocalBytes: 2222, + }, + { + MBMTotalBytes: 3333, + MBMLocalBytes: 1111, + }, + }, + CMTStats: &[]intelrdt.CMTNumaNodeStats{ + { + LLCOccupancy: 1111, + }, + { + LLCOccupancy: 3333, + }, + }, + }, + "", + }, + { + "/", + intelrdt.Stats{ + MBMStats: &[]intelrdt.MBMNumaNodeStats{ + { + MBMTotalBytes: 3333, + MBMLocalBytes: 2222, + }, + { + MBMTotalBytes: 3333, + MBMLocalBytes: 1111, + }, + }, + CMTStats: &[]intelrdt.CMTNumaNodeStats{ + { + LLCOccupancy: 1111, + }, + { + LLCOccupancy: 3333, + }, + }, + }, + "", + }, + } + + for _, test := range testCases { + containerPath, _ := getResctrlPath(test.container, mockGetContainerPids) + mockResctrlMonData(containerPath) + actual, err := getStats(containerPath) + checkError(t, err, test.err) + assert.Equal(t, test.expected.CMTStats, actual.CMTStats) + assert.Equal(t, test.expected.MBMStats, actual.MBMStats) + } +} From 3452cf3f1887f1e9361b3dcf57ef4e4e9cf11ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 15 Mar 2021 14:41:28 +0100 Subject: [PATCH 02/49] Do not try to setup the root container. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 51 +++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index 9ca2fe77b4..7d5039f40f 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -40,34 +40,37 @@ func newCollector(id string, getContainerPids func() ([]string, error), interval } func (c *collector) setup() error { - err := c.prepareMonGroup() + if c.id != rootContainer { + // There is no need to prepare or update "/" container. + err := c.prepareMonGroup() - if c.interval != 0 && c.id != rootContainer { - if err != nil { - klog.Errorf("Failed to setup container %q resctrl collector: %v \n Trying again in next intervals!", c.id, err) - } - go func() { - for { - if c.running { - err = c.prepareMonGroup() - if err != nil { - klog.Errorf("checking %q resctrl collector but: %v", c.id, err) - } - } else { - err = c.clear() - if err != nil { - klog.Errorf("trying to end %q resctrl collector interval but: %v", c.id, err) + if c.interval != 0 { + if err != nil { + klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals!", c.id, err) + } + go func() { + for { + time.Sleep(c.interval) + if c.running { + err = c.prepareMonGroup() + if err != nil { + klog.Errorf("checking %q resctrl collector but: %w", c.id, err) + } + } else { + err = c.clear() + if err != nil { + klog.Errorf("trying to end %q resctrl collector interval but: %w", c.id, err) + } + break } - break } - time.Sleep(c.interval) + }() + } else { + // There is no interval set, if setup fail, stop. + if err != nil { + c.running = false + return err } - }() - } else { - // There is no interval set, if setup fail, stop. - if err != nil { - c.running = false - return err } } From 18d297d0e7c6eae37b35878ac319050ef738159e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 15 Mar 2021 15:39:15 +0100 Subject: [PATCH 03/49] Update klog version to avoid errors from golangci-lint. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- go.mod | 2 +- go.sum | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 9f5cc9aa55..1adf468a8f 100644 --- a/go.mod +++ b/go.mod @@ -46,6 +46,6 @@ require ( google.golang.org/grpc v1.27.1 google.golang.org/protobuf v1.25.0 // indirect gotest.tools/v3 v3.0.3 // indirect - k8s.io/klog/v2 v2.2.0 + k8s.io/klog/v2 v2.8.0 k8s.io/utils v0.0.0-20201110183641-67b214c5f920 ) diff --git a/go.sum b/go.sum index 0509c28ec5..52a24fa38d 100644 --- a/go.sum +++ b/go.sum @@ -91,6 +91,8 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc= +github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/godbus/dbus/v5 v5.0.3 h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -491,6 +493,8 @@ honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9 k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= k8s.io/klog/v2 v2.2.0 h1:XRvcwJozkgZ1UQJmfMGpvRthQHOvihEhYtDfAaxMz/A= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= +k8s.io/klog/v2 v2.8.0 h1:Q3gmuM9hKEjefWFFYF0Mat+YyFJvsUyYuwyNNJ5C9Ts= +k8s.io/klog/v2 v2.8.0/go.mod h1:hy9LJ/NvuK+iVyP4Ehqva4HxZG/oXyIS3n3Jmire4Ec= k8s.io/utils v0.0.0-20201110183641-67b214c5f920 h1:CbnUZsM497iRC5QMVkHwyl8s2tB3g7yaSHkYPkpgelw= k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= From 3feb264be68ddb444a000c5491627f5f008590d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 15 Mar 2021 15:42:04 +0100 Subject: [PATCH 04/49] Update klog version in cmd to avoid errors from golangci-lint. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- cmd/go.mod | 2 +- cmd/go.sum | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cmd/go.mod b/cmd/go.mod index c907e1c89b..2a23e1fe99 100644 --- a/cmd/go.mod +++ b/cmd/go.mod @@ -27,6 +27,6 @@ require ( golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43 google.golang.org/api v0.34.0 gopkg.in/olivere/elastic.v2 v2.0.12 - k8s.io/klog/v2 v2.2.0 + k8s.io/klog/v2 v2.8.0 k8s.io/utils v0.0.0-20201110183641-67b214c5f920 ) diff --git a/cmd/go.sum b/cmd/go.sum index cf0ee4f543..b589c23a28 100644 --- a/cmd/go.sum +++ b/cmd/go.sum @@ -158,6 +158,8 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY= github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= +github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc= +github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/godbus/dbus/v5 v5.0.3 h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME= @@ -824,6 +826,8 @@ honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9 k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= k8s.io/klog/v2 v2.2.0 h1:XRvcwJozkgZ1UQJmfMGpvRthQHOvihEhYtDfAaxMz/A= k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= +k8s.io/klog/v2 v2.8.0 h1:Q3gmuM9hKEjefWFFYF0Mat+YyFJvsUyYuwyNNJ5C9Ts= +k8s.io/klog/v2 v2.8.0/go.mod h1:hy9LJ/NvuK+iVyP4Ehqva4HxZG/oXyIS3n3Jmire4Ec= k8s.io/utils v0.0.0-20201110183641-67b214c5f920 h1:CbnUZsM497iRC5QMVkHwyl8s2tB3g7yaSHkYPkpgelw= k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= From 7aaaff7800aa1487dd7923f2f61dec14e5a7cc32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 15 Mar 2021 15:52:41 +0100 Subject: [PATCH 05/49] Fix go.sum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- cmd/go.sum | 4 ---- go.sum | 4 ---- 2 files changed, 8 deletions(-) diff --git a/cmd/go.sum b/cmd/go.sum index b589c23a28..0a0e1d9d26 100644 --- a/cmd/go.sum +++ b/cmd/go.sum @@ -156,8 +156,6 @@ github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= -github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY= -github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc= github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= @@ -824,8 +822,6 @@ honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= -k8s.io/klog/v2 v2.2.0 h1:XRvcwJozkgZ1UQJmfMGpvRthQHOvihEhYtDfAaxMz/A= -k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= k8s.io/klog/v2 v2.8.0 h1:Q3gmuM9hKEjefWFFYF0Mat+YyFJvsUyYuwyNNJ5C9Ts= k8s.io/klog/v2 v2.8.0/go.mod h1:hy9LJ/NvuK+iVyP4Ehqva4HxZG/oXyIS3n3Jmire4Ec= k8s.io/utils v0.0.0-20201110183641-67b214c5f920 h1:CbnUZsM497iRC5QMVkHwyl8s2tB3g7yaSHkYPkpgelw= diff --git a/go.sum b/go.sum index 52a24fa38d..4b0e16a97e 100644 --- a/go.sum +++ b/go.sum @@ -89,8 +89,6 @@ github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2 github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= -github.com/go-logr/logr v0.2.0 h1:QvGt2nLcHH0WK9orKa+ppBPAxREcH364nPUedEpK0TY= -github.com/go-logr/logr v0.2.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc= github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -491,8 +489,6 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= k8s.io/klog/v2 v2.0.0/go.mod h1:PBfzABfn139FHAV07az/IF9Wp1bkk3vpT2XSJ76fSDE= -k8s.io/klog/v2 v2.2.0 h1:XRvcwJozkgZ1UQJmfMGpvRthQHOvihEhYtDfAaxMz/A= -k8s.io/klog/v2 v2.2.0/go.mod h1:Od+F08eJP+W3HUb4pSrPpgp9DGU4GzlpG/TmITuYh/Y= k8s.io/klog/v2 v2.8.0 h1:Q3gmuM9hKEjefWFFYF0Mat+YyFJvsUyYuwyNNJ5C9Ts= k8s.io/klog/v2 v2.8.0/go.mod h1:hy9LJ/NvuK+iVyP4Ehqva4HxZG/oXyIS3n3Jmire4Ec= k8s.io/utils v0.0.0-20201110183641-67b214c5f920 h1:CbnUZsM497iRC5QMVkHwyl8s2tB3g7yaSHkYPkpgelw= From 58a2e0103b9901d870cfee5e3ba218ab1c150b2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 15 Mar 2021 16:27:22 +0100 Subject: [PATCH 06/49] Check if container moved between control groups only if its running. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index 7d5039f40f..edf125c58d 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -84,13 +84,15 @@ func (c *collector) prepareMonGroup() error { } // Check if container moved between control groups. - if newPath != c.resctrlPath { - err = c.clear() - if err != nil { - c.running = false - return fmt.Errorf("couldn't clear previous mon group: %v", err) + if c.running { + if newPath != c.resctrlPath { + err = c.clear() + if err != nil { + c.running = false + return fmt.Errorf("couldn't clear previous mon group: %v", err) + } + c.resctrlPath = newPath } - c.resctrlPath = newPath } // Mon group prepared, the collector is running correctly. From 5561bd971b4904e75550d46d38f056cfe88ed97c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Tue, 16 Mar 2021 09:09:45 +0100 Subject: [PATCH 07/49] Get NUMA nodes from MachineInfo.Topology. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- manager/manager.go | 2 +- resctrl/collector.go | 28 +++++++++++++++------------- resctrl/collector_test.go | 6 +++--- resctrl/manager.go | 8 ++++---- resctrl/manager_test.go | 2 +- 5 files changed, 24 insertions(+), 22 deletions(-) diff --git a/manager/manager.go b/manager/manager.go index 9c6b972238..5154425c34 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -959,7 +959,7 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche if m.includedMetrics.Has(container.ResctrlMetrics) { cont.resctrlCollector, err = m.resctrlManager.GetCollector(containerName, func() ([]string, error) { return cont.getContainerPids(true) - }) + }, len(m.machineInfo.Topology)) if err != nil { klog.V(4).Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err) } diff --git a/resctrl/collector.go b/resctrl/collector.go index edf125c58d..3bdf25754b 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -28,15 +28,16 @@ import ( ) type collector struct { - id string - interval time.Duration - getContainerPids func() ([]string, error) - resctrlPath string - running bool + id string + interval time.Duration + getContainerPids func() ([]string, error) + resctrlPath string + running bool + numberOfNUMANodes int } -func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration) *collector { - return &collector{id: id, interval: interval, getContainerPids: getContainerPids} +func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int) *collector { + return &collector{id: id, interval: interval, getContainerPids: getContainerPids, numberOfNUMANodes: numberOfNUMANodes} } func (c *collector) setup() error { @@ -83,8 +84,8 @@ func (c *collector) prepareMonGroup() error { return fmt.Errorf("couldn't obtain mon_group path: %v", err) } - // Check if container moved between control groups. if c.running { + // Check if container moved between control groups. if newPath != c.resctrlPath { err = c.clear() if err != nil { @@ -93,10 +94,12 @@ func (c *collector) prepareMonGroup() error { } c.resctrlPath = newPath } + } else { + // Mon group prepared, the collector is running correctly. + c.resctrlPath = newPath + c.running = true } - // Mon group prepared, the collector is running correctly. - c.running = true return nil } @@ -108,10 +111,9 @@ func (c *collector) UpdateStats(stats *info.ContainerStats) error { if err != nil { return err } - numberOfNUMANodes := len(*resctrlStats.MBMStats) - stats.Resctrl.MemoryBandwidth = make([]info.MemoryBandwidthStats, 0, numberOfNUMANodes) - stats.Resctrl.Cache = make([]info.CacheStats, 0, numberOfNUMANodes) + stats.Resctrl.MemoryBandwidth = make([]info.MemoryBandwidthStats, 0, c.numberOfNUMANodes) + stats.Resctrl.Cache = make([]info.CacheStats, 0, c.numberOfNUMANodes) for _, numaNodeStats := range *resctrlStats.MBMStats { stats.Resctrl.MemoryBandwidth = append(stats.Resctrl.MemoryBandwidth, diff --git a/resctrl/collector_test.go b/resctrl/collector_test.go index d352e94d1c..1a95d13b91 100644 --- a/resctrl/collector_test.go +++ b/resctrl/collector_test.go @@ -40,7 +40,7 @@ func TestNewCollectorWithSetup(t *testing.T) { expectedID := "container" expectedResctrlPath := filepath.Join(rootResctrl, monGroupsDirName, expectedID) - collector := newCollector(expectedID, mockGetContainerPids, 0) + collector := newCollector(expectedID, mockGetContainerPids, 0, 2) err := collector.setup() assert.NoError(t, err) @@ -58,7 +58,7 @@ func TestUpdateStats(t *testing.T) { processPath = mockProcFs() defer os.RemoveAll(processPath) - collector := newCollector("container", mockGetContainerPids, 0) + collector := newCollector("container", mockGetContainerPids, 0, 2) err := collector.setup() assert.NoError(t, err) @@ -96,7 +96,7 @@ func TestDestroy(t *testing.T) { processPath = mockProcFs() defer os.RemoveAll(processPath) - collector := newCollector("container", mockGetContainerPids, 0) + collector := newCollector("container", mockGetContainerPids, 0, 2) err := collector.setup() if err != nil { t.Fail() diff --git a/resctrl/manager.go b/resctrl/manager.go index b3034b5687..86e7fbc5aa 100644 --- a/resctrl/manager.go +++ b/resctrl/manager.go @@ -26,7 +26,7 @@ import ( type Manager interface { Destroy() - GetCollector(containerName string, getContainerPids func() ([]string, error)) (stats.Collector, error) + GetCollector(containerName string, getContainerPids func() ([]string, error), numberOfNUMANodes int) (stats.Collector, error) } type manager struct { @@ -34,8 +34,8 @@ type manager struct { interval time.Duration } -func (m *manager) GetCollector(containerName string, getContainerPids func() ([]string, error)) (stats.Collector, error) { - collector := newCollector(containerName, getContainerPids, m.interval) +func (m *manager) GetCollector(containerName string, getContainerPids func() ([]string, error), numberOfNUMANodes int) (stats.Collector, error) { + collector := newCollector(containerName, getContainerPids, m.interval, numberOfNUMANodes) err := collector.setup() if err != nil { return &stats.NoopCollector{}, err @@ -64,6 +64,6 @@ type NoopManager struct { stats.NoopDestroy } -func (np *NoopManager) GetCollector(_ string, _ func() ([]string, error)) (stats.Collector, error) { +func (np *NoopManager) GetCollector(_ string, _ func() ([]string, error), _ int) (stats.Collector, error) { return &stats.NoopCollector{}, nil } diff --git a/resctrl/manager_test.go b/resctrl/manager_test.go index 3786319bf7..809ddb0cba 100644 --- a/resctrl/manager_test.go +++ b/resctrl/manager_test.go @@ -124,6 +124,6 @@ func TestGetCollector(t *testing.T) { manager, err := NewManager(0, setup) assert.NoError(t, err) - _, err = manager.GetCollector(expectedID, mockGetContainerPids) + _, err = manager.GetCollector(expectedID, mockGetContainerPids, 2) assert.NoError(t, err) } From 9a1e6cb7c328ec535e976da8cbe302120383b34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Tue, 16 Mar 2021 11:53:32 +0100 Subject: [PATCH 08/49] Make code thread safe again. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index 3bdf25754b..a18715b222 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -20,6 +20,7 @@ package resctrl import ( "fmt" "os" + "sync" "time" "k8s.io/klog/v2" @@ -34,10 +35,12 @@ type collector struct { resctrlPath string running bool numberOfNUMANodes int + mu sync.Mutex } func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int) *collector { - return &collector{id: id, interval: interval, getContainerPids: getContainerPids, numberOfNUMANodes: numberOfNUMANodes} + return &collector{id: id, interval: interval, getContainerPids: getContainerPids, numberOfNUMANodes: numberOfNUMANodes, + mu: sync.Mutex{}} } func (c *collector) setup() error { @@ -52,18 +55,18 @@ func (c *collector) setup() error { go func() { for { time.Sleep(c.interval) + c.mu.Lock() + klog.V(5).Infof("Checking %q containers control group.", c.id) if c.running { err = c.prepareMonGroup() if err != nil { klog.Errorf("checking %q resctrl collector but: %w", c.id, err) } } else { - err = c.clear() - if err != nil { - klog.Errorf("trying to end %q resctrl collector interval but: %w", c.id, err) - } - break + c.mu.Unlock() + return } + c.mu.Unlock() } }() } else { @@ -104,6 +107,7 @@ func (c *collector) prepareMonGroup() error { } func (c *collector) UpdateStats(stats *info.ContainerStats) error { + c.mu.Lock() if c.running { stats.Resctrl = info.ResctrlStats{} @@ -128,16 +132,19 @@ func (c *collector) UpdateStats(stats *info.ContainerStats) error { info.CacheStats{LLCOccupancy: numaNodeStats.LLCOccupancy}) } } + c.mu.Unlock() return nil } func (c *collector) Destroy() { + c.mu.Lock() c.running = false err := c.clear() if err != nil { klog.Errorf("trying to destroy %q resctrl collector but: %v", c.id, err) } + c.mu.Unlock() } func (c *collector) clear() error { From 873cc47e91b26de8ff065eabef002d4c214d110e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Tue, 16 Mar 2021 12:30:55 +0100 Subject: [PATCH 09/49] Fix typo. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index a18715b222..d5d51abc41 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -148,7 +148,7 @@ func (c *collector) Destroy() { } func (c *collector) clear() error { - // Couldn't remove root resctrl directory. + // Not allowed to remove root resctrl directory. if c.id != rootContainer && c.resctrlPath != "" { err := os.RemoveAll(c.resctrlPath) if err != nil { From d07084cb9a1b30ba3b52b373230dd63b331654fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 17 Mar 2021 12:26:31 +0100 Subject: [PATCH 10/49] Refactor resctrl collector setup. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 86 +++++++++++++++++++++---------------------- resctrl/utils.go | 14 +++---- resctrl/utils_test.go | 6 +-- 3 files changed, 52 insertions(+), 54 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index d5d51abc41..ae6bb87292 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -44,63 +44,63 @@ func newCollector(id string, getContainerPids func() ([]string, error), interval } func (c *collector) setup() error { - if c.id != rootContainer { - // There is no need to prepare or update "/" container. - err := c.prepareMonGroup() + var err error + c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids) - if c.interval != 0 { - if err != nil { - klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals!", c.id, err) - } - go func() { - for { - time.Sleep(c.interval) - c.mu.Lock() - klog.V(5).Infof("Checking %q containers control group.", c.id) - if c.running { - err = c.prepareMonGroup() - if err != nil { - klog.Errorf("checking %q resctrl collector but: %w", c.id, err) - } + if c.interval != 0 { + if err != nil { + c.running = false + klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals.", c.id, err) + } else { + c.running = true + } + go func() { + for { + time.Sleep(c.interval) + c.mu.Lock() + klog.V(5).Infof("Trying to check %q containers control group.", c.id) + if c.running { + err = c.checkMonitoringGroup() + if err != nil { + c.running = false + klog.Errorf("Failed to check %q resctrl collector control group: %w \n Trying again in next intervals.", c.id, err) + } + } else { + c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids) + if err != nil { + c.running = false + klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals.", c.id, err) } else { - c.mu.Unlock() - return + c.running = true } - c.mu.Unlock() } - }() - } else { - // There is no interval set, if setup fail, stop. - if err != nil { - c.running = false - return err + c.mu.Unlock() } + }() + } else { + // There is no interval set, if setup fail, stop. + if err != nil { + return fmt.Errorf("failed to setup container %q resctrl collector: %w", c.id, err) } + c.running = true } return nil } -func (c *collector) prepareMonGroup() error { - newPath, err := getResctrlPath(c.id, c.getContainerPids) +func (c *collector) checkMonitoringGroup() error { + newPath, err := prepareMonitoringGroup(c.id, c.getContainerPids) if err != nil { return fmt.Errorf("couldn't obtain mon_group path: %v", err) } - if c.running { - // Check if container moved between control groups. - if newPath != c.resctrlPath { - err = c.clear() - if err != nil { - c.running = false - return fmt.Errorf("couldn't clear previous mon group: %v", err) - } - c.resctrlPath = newPath + // Check if container moved between control groups. + if newPath != c.resctrlPath { + err = c.clear() + if err != nil { + return fmt.Errorf("couldn't clear previous monitoring group: %w", err) } - } else { - // Mon group prepared, the collector is running correctly. c.resctrlPath = newPath - c.running = true } return nil @@ -108,6 +108,7 @@ func (c *collector) prepareMonGroup() error { func (c *collector) UpdateStats(stats *info.ContainerStats) error { c.mu.Lock() + defer c.mu.Unlock() if c.running { stats.Resctrl = info.ResctrlStats{} @@ -132,23 +133,22 @@ func (c *collector) UpdateStats(stats *info.ContainerStats) error { info.CacheStats{LLCOccupancy: numaNodeStats.LLCOccupancy}) } } - c.mu.Unlock() return nil } func (c *collector) Destroy() { c.mu.Lock() + defer c.mu.Unlock() c.running = false err := c.clear() if err != nil { klog.Errorf("trying to destroy %q resctrl collector but: %v", c.id, err) } - c.mu.Unlock() } func (c *collector) clear() error { - // Not allowed to remove root resctrl directory. + // Not allowed to remove root or undefined resctrl directory. if c.id != rootContainer && c.resctrlPath != "" { err := os.RemoveAll(c.resctrlPath) if err != nil { diff --git a/resctrl/utils.go b/resctrl/utils.go index 6835d7dfbd..cccfb18919 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -81,7 +81,7 @@ func Setup() error { return nil } -func getResctrlPath(containerName string, getContainerPids func() ([]string, error)) (string, error) { +func prepareMonitoringGroup(containerName string, getContainerPids func() ([]string, error)) (string, error) { if containerName == rootContainer { return rootResctrl, nil } @@ -92,15 +92,15 @@ func getResctrlPath(containerName string, getContainerPids func() ([]string, err } if len(pids) == 0 { - return "", fmt.Errorf("couldn't determine resctrl for %q: there is no pids in cGroup", containerName) + return "", fmt.Errorf("couldn't obtain %q container pids, there is no pids in cgroup", containerName) } path, err := findControlGroup(pids) if err != nil { - return "", err + return "", fmt.Errorf("couldn't find control group matching %q container: %w", containerName, err) } - if containerName[0] == '/' { + if containerName[0] == '/' && (len(containerName) > 1) { containerName = containerName[1:] } @@ -108,11 +108,9 @@ func getResctrlPath(containerName string, getContainerPids func() ([]string, err monGroupPath := filepath.Join(path, monitoringGroupDir, properContainerName) // Create new mon_group if not exists. - if stat, _ := os.Stat(monGroupPath); stat == nil { - err = os.Mkdir(monGroupPath, os.ModePerm) - } + err = os.MkdirAll(monGroupPath, os.ModePerm) if err != nil { - return "", err + return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err) } for _, pid := range pids { diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 464b985550..3eebd3bae0 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -279,7 +279,7 @@ func checkError(t *testing.T, err error, expected string) { } } -func TestGetResctrlPath(t *testing.T) { +func TestPrepareMonitoringGroup(t *testing.T) { rootResctrl = mockResctrl() defer os.RemoveAll(rootResctrl) @@ -316,7 +316,7 @@ func TestGetResctrlPath(t *testing.T) { } for _, test := range testCases { - actual, err := getResctrlPath(test.container, test.getContainerPids) + actual, err := prepareMonitoringGroup(test.container, test.getContainerPids) assert.Equal(t, test.expected, actual) checkError(t, err, test.err) } @@ -566,7 +566,7 @@ func TestGetStats(t *testing.T) { } for _, test := range testCases { - containerPath, _ := getResctrlPath(test.container, mockGetContainerPids) + containerPath, _ := prepareMonitoringGroup(test.container, mockGetContainerPids) mockResctrlMonData(containerPath) actual, err := getStats(containerPath) checkError(t, err, test.err) From 801e883ef6b121ec607205688bc04646d9d22133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 17 Mar 2021 18:19:34 +0100 Subject: [PATCH 11/49] Refactor resctrl utilies. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 2 +- resctrl/utils.go | 57 ++++++++++++++++++++++++------------------- resctrl/utils_test.go | 14 +++++------ 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index ae6bb87292..fcce05f9fe 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -112,7 +112,7 @@ func (c *collector) UpdateStats(stats *info.ContainerStats) error { if c.running { stats.Resctrl = info.ResctrlStats{} - resctrlStats, err := getStats(c.resctrlPath) + resctrlStats, err := getIntelRDTStatsFrom(c.resctrlPath) if err != nil { return err } diff --git a/resctrl/utils.go b/resctrl/utils.go index cccfb18919..ec5d3df396 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -44,8 +44,8 @@ const ( infoDirName = "info" monDataDirName = "mon_data" monGroupsDirName = "mon_groups" - noPidsError = "there are no pids passed" - noContainerError = "there are no container name passed" + noPidsPassedError = "there are no pids passed" + noContainerNameError = "there are no container name passed" llcOccupancyFileName = "llc_occupancy" mbmLocalBytesFileName = "mbm_local_bytes" mbmTotalBytesFileName = "mbm_total_bytes" @@ -128,9 +128,11 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str if err != nil { secondError := os.Remove(monGroupPath) if secondError != nil { - return "", fmt.Errorf("%v : %v", err, secondError) + return "", fmt.Errorf( + "coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v", + containerName, err, containerName, secondError) } - return "", err + return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err) } } } @@ -139,12 +141,13 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str } func getPids(containerName string) ([]int, error) { - if containerName == "" { - return nil, fmt.Errorf(noContainerError) + if len(containerName) == 0 { + // No container name passed. + return nil, fmt.Errorf(noContainerNameError) } pids, err := cgroups.GetAllPids(filepath.Join(pidsPath, containerName)) if err != nil { - return nil, fmt.Errorf("couldn't obtain pids: %v", err) + return nil, fmt.Errorf("couldn't obtain pids for %q container: %v", containerName, err) } return pids, nil } @@ -164,7 +167,7 @@ func getAllProcessThreads(threadFiles []os.FileInfo) ([]int, error) { func findControlGroup(pids []string) (string, error) { if len(pids) == 0 { - return "", fmt.Errorf(noPidsError) + return "", fmt.Errorf(noPidsPassedError) } availablePaths, err := filepath.Glob(filepath.Join(rootResctrl, "*")) if err != nil { @@ -186,7 +189,7 @@ func findControlGroup(pids []string) (string, error) { case filepath.Join(rootResctrl, schemataFileName): continue case filepath.Join(rootResctrl, tasksFileName): - inGroup, err := checkControlGroup(rootResctrl, pids) + inGroup, err := arePIDsInControlGroup(rootResctrl, pids) if err != nil { return "", err } @@ -194,7 +197,7 @@ func findControlGroup(pids []string) (string, error) { return rootResctrl, nil } default: - inGroup, err := checkControlGroup(path, pids) + inGroup, err := arePIDsInControlGroup(path, pids) if err != nil { return "", err } @@ -207,14 +210,15 @@ func findControlGroup(pids []string) (string, error) { return "", fmt.Errorf("there is no available control group") } -func checkControlGroup(path string, pids []string) (bool, error) { +func arePIDsInControlGroup(path string, pids []string) (bool, error) { if len(pids) == 0 { - return false, fmt.Errorf(noPidsError) + // No container pids passed. + return false, fmt.Errorf(noPidsPassedError) } taskFile, err := fscommon.ReadFile(path, "tasks") if err != nil { - return false, fmt.Errorf("couldn't obtain pids: %v", err) + return false, fmt.Errorf("couldn't obtain pids from %q path: %w", path, err) } for _, pid := range pids { @@ -226,7 +230,7 @@ func checkControlGroup(path string, pids []string) (bool, error) { return true, nil } -func readStat(path string) (uint64, error) { +func readStatFrom(path string) (uint64, error) { context, err := ioutil.ReadFile(path) if err != nil { return 0, err @@ -240,7 +244,7 @@ func readStat(path string) (uint64, error) { return stat, nil } -func getStats(path string) (intelrdt.Stats, error) { +func getIntelRDTStatsFrom(path string) (intelrdt.Stats, error) { stats := intelrdt.Stats{} numaDirs, err := filepath.Glob(filepath.Join(path, monDataDirName, "*")) @@ -252,32 +256,35 @@ func getStats(path string) (intelrdt.Stats, error) { return stats, fmt.Errorf("there is no mon_data NUMA dirs: %q", path) } - stats.CMTStats = &[]intelrdt.CMTNumaNodeStats{} - stats.MBMStats = &[]intelrdt.MBMNumaNodeStats{} + var cmtStats []intelrdt.CMTNumaNodeStats + var mbmStats []intelrdt.MBMNumaNodeStats for _, dir := range numaDirs { if enabledCMT { - cmtStats := intelrdt.CMTNumaNodeStats{} - cmtStats.LLCOccupancy, err = readStat(filepath.Join(dir, llcOccupancyFileName)) + llcOccupancy, err := readStatFrom(filepath.Join(dir, llcOccupancyFileName)) if err != nil { return stats, err } - *stats.CMTStats = append(*stats.CMTStats, cmtStats) - + cmtStats = append(cmtStats, intelrdt.CMTNumaNodeStats{LLCOccupancy: llcOccupancy}) } if enabledMBM { - mbmStats := intelrdt.MBMNumaNodeStats{} - mbmStats.MBMTotalBytes, err = readStat(filepath.Join(dir, mbmTotalBytesFileName)) + mbmTotalBytes, err := readStatFrom(filepath.Join(dir, mbmTotalBytesFileName)) if err != nil { return stats, err } - mbmStats.MBMLocalBytes, err = readStat(filepath.Join(dir, mbmLocalBytesFileName)) + mbmLocalBytes, err := readStatFrom(filepath.Join(dir, mbmLocalBytesFileName)) if err != nil { return stats, err } - *stats.MBMStats = append(*stats.MBMStats, mbmStats) + mbmStats = append(mbmStats, intelrdt.MBMNumaNodeStats{ + MBMTotalBytes: mbmTotalBytes, + MBMLocalBytes: mbmLocalBytes, + }) } } + stats.CMTStats = &cmtStats + stats.MBMStats = &mbmStats + return stats, nil } diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 3eebd3bae0..2de8343bc6 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -334,7 +334,7 @@ func TestGetPids(t *testing.T) { { "", nil, - noContainerError, + noContainerNameError, }, { "container", @@ -344,7 +344,7 @@ func TestGetPids(t *testing.T) { { "no_container", nil, - fmt.Sprintf("couldn't obtain pids: lstat %v: no such file or directory", filepath.Join(pidsPath, "no_container")), + fmt.Sprintf("couldn't obtain pids for \"no_container\" container: lstat %v: no such file or directory", filepath.Join(pidsPath, "no_container")), }, } @@ -425,7 +425,7 @@ func TestFindControlGroup(t *testing.T) { } } -func TestCheckControlGroup(t *testing.T) { +func TestArePIDsInControlGroup(t *testing.T) { rootResctrl = mockResctrl() defer os.RemoveAll(rootResctrl) @@ -455,20 +455,20 @@ func TestCheckControlGroup(t *testing.T) { }, { false, - fmt.Sprintf("couldn't obtain pids: open %s: no such file or directory", filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName)), + fmt.Sprintf("couldn't obtain pids from %q path: open %s: no such file or directory", filepath.Join(rootResctrl, monitoringGroupDir), filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName)), filepath.Join(rootResctrl, monitoringGroupDir), []string{"1", "2"}, }, { false, - noPidsError, + noPidsPassedError, rootResctrl, nil, }, } for _, test := range testCases { - actual, err := checkControlGroup(test.path, test.pids) + actual, err := arePIDsInControlGroup(test.path, test.pids) assert.Equal(t, test.expected, actual) checkError(t, err, test.err) } @@ -568,7 +568,7 @@ func TestGetStats(t *testing.T) { for _, test := range testCases { containerPath, _ := prepareMonitoringGroup(test.container, mockGetContainerPids) mockResctrlMonData(containerPath) - actual, err := getStats(containerPath) + actual, err := getIntelRDTStatsFrom(containerPath) checkError(t, err, test.err) assert.Equal(t, test.expected.CMTStats, actual.CMTStats) assert.Equal(t, test.expected.MBMStats, actual.MBMStats) From 75fabd701cea7f2e5ce0c7703bdd7c3e3d7b73ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 17 Mar 2021 18:37:05 +0100 Subject: [PATCH 12/49] Better name vars. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index ec5d3df396..8d75fc00a5 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -247,19 +247,19 @@ func readStatFrom(path string) (uint64, error) { func getIntelRDTStatsFrom(path string) (intelrdt.Stats, error) { stats := intelrdt.Stats{} - numaDirs, err := filepath.Glob(filepath.Join(path, monDataDirName, "*")) + statsDirectories, err := filepath.Glob(filepath.Join(path, monDataDirName, "*")) if err != nil { return stats, err } - if len(numaDirs) == 0 { - return stats, fmt.Errorf("there is no mon_data NUMA dirs: %q", path) + if len(statsDirectories) == 0 { + return stats, fmt.Errorf("there is no mon_data stats directories: %q", path) } var cmtStats []intelrdt.CMTNumaNodeStats var mbmStats []intelrdt.MBMNumaNodeStats - for _, dir := range numaDirs { + for _, dir := range statsDirectories { if enabledCMT { llcOccupancy, err := readStatFrom(filepath.Join(dir, llcOccupancyFileName)) if err != nil { From a1619a76ff96d2ae482cae1bcd7d9c7afb5784ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Thu, 18 Mar 2021 09:02:42 +0100 Subject: [PATCH 13/49] Add missing python3 in Dockerfile. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- deploy/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/Dockerfile b/deploy/Dockerfile index e7346fa780..4163ec4303 100644 --- a/deploy/Dockerfile +++ b/deploy/Dockerfile @@ -1,6 +1,6 @@ FROM alpine:3.12 AS build -RUN apk --no-cache add libc6-compat device-mapper findutils zfs build-base linux-headers go bash git wget cmake pkgconfig ndctl-dev && \ +RUN apk --no-cache add libc6-compat device-mapper findutils zfs build-base linux-headers go python3 bash git wget cmake pkgconfig ndctl-dev && \ apk --no-cache add thin-provisioning-tools --repository http://dl-3.alpinelinux.org/alpine/edge/main/ && \ echo 'hosts: files mdns4_minimal [NOTFOUND=return] dns mdns4' >> /etc/nsswitch.conf && \ rm -rf /var/cache/apk/* From 0f274e4b3818f27f146b680c2f0c3c8339d04a95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Thu, 18 Mar 2021 17:18:31 +0100 Subject: [PATCH 14/49] Add missing procps in Dockerfile. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- deploy/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/Dockerfile b/deploy/Dockerfile index 4163ec4303..2af193bee1 100644 --- a/deploy/Dockerfile +++ b/deploy/Dockerfile @@ -34,7 +34,7 @@ RUN ./build/build.sh FROM alpine:3.12 MAINTAINER dengnan@google.com vmarmol@google.com vishnuk@google.com jimmidyson@gmail.com stclair@google.com -RUN apk --no-cache add libc6-compat device-mapper findutils zfs ndctl && \ +RUN apk --no-cache add libc6-compat device-mapper findutils zfs ndctl procps && \ apk --no-cache add thin-provisioning-tools --repository http://dl-3.alpinelinux.org/alpine/edge/main/ && \ echo 'hosts: files mdns4_minimal [NOTFOUND=return] dns mdns4' >> /etc/nsswitch.conf && \ rm -rf /var/cache/apk/* From 03f8571b6e0b91488a51faa9ae0c27ba5a30c831 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 10 May 2021 19:13:10 +0200 Subject: [PATCH 15/49] Use const instead of magic value. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index fcce05f9fe..6b7eea687d 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -28,6 +28,8 @@ import ( info "github.com/google/cadvisor/info/v1" ) +const noInterval = 0 + type collector struct { id string interval time.Duration @@ -47,7 +49,7 @@ func (c *collector) setup() error { var err error c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids) - if c.interval != 0 { + if c.interval != noInterval { if err != nil { c.running = false klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals.", c.id, err) From 29e1e196b0cfb0c2f9fbe97828521e9a54a65095 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 10 May 2021 19:19:48 +0200 Subject: [PATCH 16/49] Delete an unnecessary setting of c.running to false. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 1 - 1 file changed, 1 deletion(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index 6b7eea687d..6fe3298c98 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -51,7 +51,6 @@ func (c *collector) setup() error { if c.interval != noInterval { if err != nil { - c.running = false klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals.", c.id, err) } else { c.running = true From fc3b8cee1db74589d3e621621c40006277d0b180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 10 May 2021 19:22:48 +0200 Subject: [PATCH 17/49] Do not wrap the error from cAdvisor. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index 8d75fc00a5..b1ff54cfeb 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -97,7 +97,7 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str path, err := findControlGroup(pids) if err != nil { - return "", fmt.Errorf("couldn't find control group matching %q container: %w", containerName, err) + return "", fmt.Errorf("couldn't find control group matching %q container: %v", containerName, err) } if containerName[0] == '/' && (len(containerName) > 1) { From ecb0156f6ce5512ff66f85aae7dc80460b77b814 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 10 May 2021 19:25:23 +0200 Subject: [PATCH 18/49] Use path in error message. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index b1ff54cfeb..da07fbe8a8 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -213,7 +213,7 @@ func findControlGroup(pids []string) (string, error) { func arePIDsInControlGroup(path string, pids []string) (bool, error) { if len(pids) == 0 { // No container pids passed. - return false, fmt.Errorf(noPidsPassedError) + return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError) } taskFile, err := fscommon.ReadFile(path, "tasks") From e1f5e9b71c7e8ed7a04c49f2c38bd729bc4a95f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Tue, 11 May 2021 21:45:40 +0200 Subject: [PATCH 19/49] Avoid goroutine looping. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index 6fe3298c98..b46a352a9e 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -36,6 +36,7 @@ type collector struct { getContainerPids func() ([]string, error) resctrlPath string running bool + destroyed bool numberOfNUMANodes int mu sync.Mutex } @@ -59,6 +60,9 @@ func (c *collector) setup() error { for { time.Sleep(c.interval) c.mu.Lock() + if c.destroyed { + break + } klog.V(5).Infof("Trying to check %q containers control group.", c.id) if c.running { err = c.checkMonitoringGroup() @@ -71,8 +75,6 @@ func (c *collector) setup() error { if err != nil { c.running = false klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals.", c.id, err) - } else { - c.running = true } } c.mu.Unlock() @@ -146,6 +148,7 @@ func (c *collector) Destroy() { if err != nil { klog.Errorf("trying to destroy %q resctrl collector but: %v", c.id, err) } + c.destroyed = true } func (c *collector) clear() error { From 3951953df4dadf402fc24361d6dfb2989ed4dfb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Tue, 11 May 2021 22:07:47 +0200 Subject: [PATCH 20/49] Do not use fscommon package from runc/libcontainer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 40 +++++++++++++++++++++++++++++++++++++--- resctrl/utils_test.go | 30 +++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 4 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index da07fbe8a8..33b553a7a3 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -28,7 +28,6 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/intelrdt" ) @@ -216,13 +215,20 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError) } - taskFile, err := fscommon.ReadFile(path, "tasks") + tasksFile, err := ioutil.ReadFile(filepath.Join(path, "tasks")) if err != nil { return false, fmt.Errorf("couldn't obtain pids from %q path: %w", path, err) } + if len(tasksFile) == 0 { + return false, nil + } + + tasks := readTasksFile(tasksFile) + for _, pid := range pids { - if !strings.Contains(taskFile, pid) { + _, ok := tasks[pid] + if !ok { return false, nil } } @@ -230,6 +236,34 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return true, nil } +func readTasksFile(tasksFile []byte) map[string]bool { + const ( + newLineAsciiCode = 0xA + zeroAsciiCode = 0x30 + nineAsciiCode = 0x39 + ) + tasks := make(map[string]bool) + var task []byte + for _, b := range tasksFile { + if b == newLineAsciiCode { + if len(task) != 0 { + tasks[string(task)] = true + task = []byte{} + } + continue + } + if b >= zeroAsciiCode && b <= nineAsciiCode { + task = append(task, b) + } + } + + if len(task) > 0 { + tasks[string(task)] = true + } + + return tasks +} + func readStatFrom(path string) (uint64, error) { context, err := ioutil.ReadFile(path) if err != nil { diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 2de8343bc6..c30ade6ae9 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -461,7 +461,7 @@ func TestArePIDsInControlGroup(t *testing.T) { }, { false, - noPidsPassedError, + fmt.Sprintf("couldn't obtain pids from %q path: %v", rootResctrl, noPidsPassedError), rootResctrl, nil, }, @@ -574,3 +574,31 @@ func TestGetStats(t *testing.T) { assert.Equal(t, test.expected.MBMStats, actual.MBMStats) } } + +func TestReadTasksFile(t *testing.T) { + var testCases = []struct { + tasksFile []byte + expected map[string]bool + }{ + {[]byte{0x31, 0x32, 0xA, 0x37, 0x37}, + map[string]bool{ + "12": true, + "77": true}, + }, + {[]byte{0xA, 0x32, 0xA}, + map[string]bool{ + "2": true}, + }, + {[]byte{0x0, 0x2A, 0xA}, + map[string]bool{}, + }, + {[]byte{}, + map[string]bool{}, + }, + } + + for _, test := range testCases { + actual := readTasksFile(test.tasksFile) + assert.Equal(t, test.expected, actual) + } +} From 7d363059fef4eeb4720339c382ab339eb3363d39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Tue, 11 May 2021 22:15:29 +0200 Subject: [PATCH 21/49] Fix const ASCII names. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index 33b553a7a3..e641c041b7 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -238,21 +238,21 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { func readTasksFile(tasksFile []byte) map[string]bool { const ( - newLineAsciiCode = 0xA - zeroAsciiCode = 0x30 - nineAsciiCode = 0x39 + newLineASCIICode = 0xA + zeroASCIICode = 0x30 + nineASCIICode = 0x39 ) tasks := make(map[string]bool) var task []byte for _, b := range tasksFile { - if b == newLineAsciiCode { + if b == newLineASCIICode { if len(task) != 0 { tasks[string(task)] = true task = []byte{} } continue } - if b >= zeroAsciiCode && b <= nineAsciiCode { + if b >= zeroASCIICode && b <= nineASCIICode { task = append(task, b) } } From ee42de26c9d4b7a6142df356c7fc81798ca5c873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 12 May 2021 09:05:48 +0200 Subject: [PATCH 22/49] Use same operator in func. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index e641c041b7..d41f0def52 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -246,7 +246,7 @@ func readTasksFile(tasksFile []byte) map[string]bool { var task []byte for _, b := range tasksFile { if b == newLineASCIICode { - if len(task) != 0 { + if len(task) > 0 { tasks[string(task)] = true task = []byte{} } From 7c1eeb2625765ee2d3da35cf8b0786082707de41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 12 May 2021 10:12:31 +0200 Subject: [PATCH 23/49] Introduce const variables. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index d41f0def52..440af002ae 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -48,6 +48,8 @@ const ( llcOccupancyFileName = "llc_occupancy" mbmLocalBytesFileName = "mbm_local_bytes" mbmTotalBytesFileName = "mbm_total_bytes" + containerPrefix = '/' + minContainerNameLen = 2 // "/" e.g. "/a" ) var ( @@ -99,7 +101,7 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str return "", fmt.Errorf("couldn't find control group matching %q container: %v", containerName, err) } - if containerName[0] == '/' && (len(containerName) > 1) { + if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { containerName = containerName[1:] } From 3ffe4221f89291f33202c0a8937efdd6bc2a4560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Fri, 18 Jun 2021 14:44:15 +0200 Subject: [PATCH 24/49] Introduce vendor_id in MachineInfo. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- info/v1/machine.go | 4 ++++ machine/info.go | 1 + machine/machine.go | 16 ++++++++++++++++ machine/topology_test.go | 11 +++++++++++ 4 files changed, 32 insertions(+) diff --git a/info/v1/machine.go b/info/v1/machine.go index 68a81adb3e..fca9d1eb7d 100644 --- a/info/v1/machine.go +++ b/info/v1/machine.go @@ -178,6 +178,9 @@ type MachineInfo struct { // The time of this information point. Timestamp time.Time `json:"timestamp"` + // Vendor id of CPU. + VendorID string `json:"vendor_id"` + // The number of cores in this machine. NumCores int `json:"num_cores"` @@ -249,6 +252,7 @@ func (m *MachineInfo) Clone() *MachineInfo { } } copy := MachineInfo{ + VendorID: m.VendorID, Timestamp: m.Timestamp, NumCores: m.NumCores, NumPhysicalCores: m.NumPhysicalCores, diff --git a/machine/info.go b/machine/info.go index 8db8a7e76d..50f6da2072 100644 --- a/machine/info.go +++ b/machine/info.go @@ -121,6 +121,7 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach machineInfo := &info.MachineInfo{ Timestamp: time.Now(), + VendorID: GetVendorID(cpuinfo), NumCores: numCores, NumPhysicalCores: GetPhysicalCores(cpuinfo), NumSockets: GetSockets(cpuinfo), diff --git a/machine/machine.go b/machine/machine.go index 706bba6378..47a4e6fddc 100644 --- a/machine/machine.go +++ b/machine/machine.go @@ -43,6 +43,7 @@ var ( cpuClockSpeedMHz = regexp.MustCompile(`(?:cpu MHz|CPU MHz|clock)\s*:\s*([0-9]+\.[0-9]+)(?:MHz)?`) memoryCapacityRegexp = regexp.MustCompile(`MemTotal:\s*([0-9]+) kB`) swapCapacityRegexp = regexp.MustCompile(`SwapTotal:\s*([0-9]+) kB`) + vendorIDRegexp = regexp.MustCompile(`vendor_id\s*:\s*(\w+)`) cpuBusPath = "/sys/bus/cpu/devices/" isMemoryController = regexp.MustCompile("mc[0-9]+") @@ -54,6 +55,21 @@ var ( const memTypeFileName = "dimm_mem_type" const sizeFileName = "size" +// GetVendorID returns "vendor_id" reading /proc/cpuinfo file. +func GetVendorID(procInfo []byte) string { + vendorID := "" + + matches := vendorIDRegexp.FindSubmatch(procInfo) + if len(matches) != 2 { + klog.Error("Cannot read vendor id correctly, set empty") + return vendorID + } + + vendorID = string(matches[1]) + + return vendorID +} + // GetPhysicalCores returns number of CPU cores reading /proc/cpuinfo file or if needed information from sysfs cpu path func GetPhysicalCores(procInfo []byte) int { numCores := getUniqueMatchesCount(string(procInfo), coreRegExp) diff --git a/machine/topology_test.go b/machine/topology_test.go index c0f2cc826f..3c80b72dec 100644 --- a/machine/topology_test.go +++ b/machine/topology_test.go @@ -462,3 +462,14 @@ func TestClockSpeedOnCpuLowerCase(t *testing.T) { assert.NotNil(t, clockSpeed) assert.Equal(t, uint64(1450*1000), clockSpeed) } + +func TestGetVendorID(t *testing.T) { + testfile := "./testdata/cpuinfo_onesocket_many_NUMAs" + + testcpuinfo, err := ioutil.ReadFile(testfile) + assert.Nil(t, err) + assert.NotNil(t, testcpuinfo) + + vendorID := GetVendorID(testcpuinfo) + assert.Equal(t, "GenuineIntel", vendorID) +} From ab3ee30154cda38a93a5ff36a6612679f29bc408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Fri, 18 Jun 2021 14:47:04 +0200 Subject: [PATCH 25/49] Extend files which should be omitted when searching control group. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/resctrl/utils.go b/resctrl/utils.go index 440af002ae..d098b1e3b8 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -40,6 +40,8 @@ const ( cpusListFileName = "cpus_list" schemataFileName = "schemata" tasksFileName = "tasks" + modeFileName = "mode" + sizeFileName = "size" infoDirName = "info" monDataDirName = "mon_data" monGroupsDirName = "mon_groups" @@ -181,6 +183,10 @@ func findControlGroup(pids []string) (string, error) { continue case filepath.Join(rootResctrl, cpusListFileName): continue + case filepath.Join(rootResctrl, modeFileName): + continue + case filepath.Join(rootResctrl, sizeFileName): + continue case filepath.Join(rootResctrl, infoDirName): continue case filepath.Join(rootResctrl, monDataDirName): From a9869999d30473c906cad3cd709c98b4eee4d0f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Fri, 18 Jun 2021 14:48:09 +0200 Subject: [PATCH 26/49] Add info about possible bug when reading resctrl values on AMD. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- manager/manager.go | 2 +- resctrl/collector.go | 7 ++++--- resctrl/collector_test.go | 6 +++--- resctrl/manager.go | 7 ++++--- resctrl/manager_test.go | 4 ++-- resctrl/utils.go | 24 ++++++++++++++++++------ resctrl/utils_test.go | 2 +- 7 files changed, 33 insertions(+), 19 deletions(-) diff --git a/manager/manager.go b/manager/manager.go index 5154425c34..38bcc7641f 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -217,7 +217,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig return nil, err } - newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup) + newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup, machineInfo.VendorID) if err != nil { klog.V(4).Infof("Cannot gather resctrl metrics: %v", err) } diff --git a/resctrl/collector.go b/resctrl/collector.go index b46a352a9e..3363f88a4f 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -38,12 +38,13 @@ type collector struct { running bool destroyed bool numberOfNUMANodes int + vendorID string mu sync.Mutex } -func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int) *collector { +func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int, vendorID string) *collector { return &collector{id: id, interval: interval, getContainerPids: getContainerPids, numberOfNUMANodes: numberOfNUMANodes, - mu: sync.Mutex{}} + vendorID: vendorID, mu: sync.Mutex{}} } func (c *collector) setup() error { @@ -115,7 +116,7 @@ func (c *collector) UpdateStats(stats *info.ContainerStats) error { if c.running { stats.Resctrl = info.ResctrlStats{} - resctrlStats, err := getIntelRDTStatsFrom(c.resctrlPath) + resctrlStats, err := getIntelRDTStatsFrom(c.resctrlPath, c.vendorID) if err != nil { return err } diff --git a/resctrl/collector_test.go b/resctrl/collector_test.go index 1a95d13b91..e3d281a8ed 100644 --- a/resctrl/collector_test.go +++ b/resctrl/collector_test.go @@ -40,7 +40,7 @@ func TestNewCollectorWithSetup(t *testing.T) { expectedID := "container" expectedResctrlPath := filepath.Join(rootResctrl, monGroupsDirName, expectedID) - collector := newCollector(expectedID, mockGetContainerPids, 0, 2) + collector := newCollector(expectedID, mockGetContainerPids, 0, 2, "") err := collector.setup() assert.NoError(t, err) @@ -58,7 +58,7 @@ func TestUpdateStats(t *testing.T) { processPath = mockProcFs() defer os.RemoveAll(processPath) - collector := newCollector("container", mockGetContainerPids, 0, 2) + collector := newCollector("container", mockGetContainerPids, 0, 2, "") err := collector.setup() assert.NoError(t, err) @@ -96,7 +96,7 @@ func TestDestroy(t *testing.T) { processPath = mockProcFs() defer os.RemoveAll(processPath) - collector := newCollector("container", mockGetContainerPids, 0, 2) + collector := newCollector("container", mockGetContainerPids, 0, 2, "") err := collector.setup() if err != nil { t.Fail() diff --git a/resctrl/manager.go b/resctrl/manager.go index 86e7fbc5aa..032daa2f43 100644 --- a/resctrl/manager.go +++ b/resctrl/manager.go @@ -32,10 +32,11 @@ type Manager interface { type manager struct { stats.NoopDestroy interval time.Duration + vendorID string } func (m *manager) GetCollector(containerName string, getContainerPids func() ([]string, error), numberOfNUMANodes int) (stats.Collector, error) { - collector := newCollector(containerName, getContainerPids, m.interval, numberOfNUMANodes) + collector := newCollector(containerName, getContainerPids, m.interval, numberOfNUMANodes, m.vendorID) err := collector.setup() if err != nil { return &stats.NoopCollector{}, err @@ -44,7 +45,7 @@ func (m *manager) GetCollector(containerName string, getContainerPids func() ([] return collector, nil } -func NewManager(interval time.Duration, setup func() error) (Manager, error) { +func NewManager(interval time.Duration, setup func() error, vendorID string) (Manager, error) { err := setup() if err != nil { return &NoopManager{}, err @@ -57,7 +58,7 @@ func NewManager(interval time.Duration, setup func() error) (Manager, error) { return &NoopManager{}, errors.New("there are no monitoring features available") } - return &manager{interval: interval}, nil + return &manager{interval: interval, vendorID: vendorID}, nil } type NoopManager struct { diff --git a/resctrl/manager_test.go b/resctrl/manager_test.go index 809ddb0cba..644285facf 100644 --- a/resctrl/manager_test.go +++ b/resctrl/manager_test.go @@ -97,7 +97,7 @@ func TestNewManager(t *testing.T) { enabledMBM = test.enabledMBM return nil } - got, err := NewManager(0, setup) + got, err := NewManager(0, setup, "") assert.Equal(t, got, test.expected) checkError(t, err, test.err) } @@ -121,7 +121,7 @@ func TestGetCollector(t *testing.T) { enabledMBM = true return nil } - manager, err := NewManager(0, setup) + manager, err := NewManager(0, setup, "") assert.NoError(t, err) _, err = manager.GetCollector(expectedID, mockGetContainerPids, 2) diff --git a/resctrl/utils.go b/resctrl/utils.go index d098b1e3b8..511c3e3f45 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -52,6 +52,7 @@ const ( mbmTotalBytesFileName = "mbm_total_bytes" containerPrefix = '/' minContainerNameLen = 2 // "/" e.g. "/a" + unavailable = "Unavailable" ) var ( @@ -272,13 +273,24 @@ func readTasksFile(tasksFile []byte) map[string]bool { return tasks } -func readStatFrom(path string) (uint64, error) { +func readStatFrom(path string, vendorID string) (uint64, error) { context, err := ioutil.ReadFile(path) if err != nil { return 0, err } - stat, err := strconv.ParseUint(string(bytes.TrimSpace(context)), 10, 64) + contextString := string(bytes.TrimSpace(context)) + + if contextString == unavailable { + err := fmt.Errorf("\"Unavailable\" value from file %q", path) + if vendorID == "AuthenticAMD" { + kernelBugzillaLink := "https://bugzilla.kernel.org/show_bug.cgi?id=213311" + err = fmt.Errorf("%v, possible bug: %q", err, kernelBugzillaLink) + } + return 0, err + } + + stat, err := strconv.ParseUint(contextString, 10, 64) if err != nil { return stat, fmt.Errorf("unable to parse %q as a uint from file %q", string(context), path) } @@ -286,7 +298,7 @@ func readStatFrom(path string) (uint64, error) { return stat, nil } -func getIntelRDTStatsFrom(path string) (intelrdt.Stats, error) { +func getIntelRDTStatsFrom(path string, vendorID string) (intelrdt.Stats, error) { stats := intelrdt.Stats{} statsDirectories, err := filepath.Glob(filepath.Join(path, monDataDirName, "*")) @@ -303,18 +315,18 @@ func getIntelRDTStatsFrom(path string) (intelrdt.Stats, error) { for _, dir := range statsDirectories { if enabledCMT { - llcOccupancy, err := readStatFrom(filepath.Join(dir, llcOccupancyFileName)) + llcOccupancy, err := readStatFrom(filepath.Join(dir, llcOccupancyFileName), vendorID) if err != nil { return stats, err } cmtStats = append(cmtStats, intelrdt.CMTNumaNodeStats{LLCOccupancy: llcOccupancy}) } if enabledMBM { - mbmTotalBytes, err := readStatFrom(filepath.Join(dir, mbmTotalBytesFileName)) + mbmTotalBytes, err := readStatFrom(filepath.Join(dir, mbmTotalBytesFileName), vendorID) if err != nil { return stats, err } - mbmLocalBytes, err := readStatFrom(filepath.Join(dir, mbmLocalBytesFileName)) + mbmLocalBytes, err := readStatFrom(filepath.Join(dir, mbmLocalBytesFileName), vendorID) if err != nil { return stats, err } diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index c30ade6ae9..9ad93aab52 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -568,7 +568,7 @@ func TestGetStats(t *testing.T) { for _, test := range testCases { containerPath, _ := prepareMonitoringGroup(test.container, mockGetContainerPids) mockResctrlMonData(containerPath) - actual, err := getIntelRDTStatsFrom(containerPath) + actual, err := getIntelRDTStatsFrom(containerPath, "") checkError(t, err, test.err) assert.Equal(t, test.expected.CMTStats, actual.CMTStats) assert.Equal(t, test.expected.MBMStats, actual.MBMStats) From 88b6b7cb826441a0a230805187ec7208b968cbee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 16 Aug 2021 09:56:50 +0200 Subject: [PATCH 27/49] Use empty struct map instead of boolean. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 8 ++++---- resctrl/utils_test.go | 17 +++++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index 511c3e3f45..3fa13ee336 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -245,18 +245,18 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return true, nil } -func readTasksFile(tasksFile []byte) map[string]bool { +func readTasksFile(tasksFile []byte) map[string]struct{} { const ( newLineASCIICode = 0xA zeroASCIICode = 0x30 nineASCIICode = 0x39 ) - tasks := make(map[string]bool) + tasks := make(map[string]struct{}) var task []byte for _, b := range tasksFile { if b == newLineASCIICode { if len(task) > 0 { - tasks[string(task)] = true + tasks[string(task)] = struct{}{} task = []byte{} } continue @@ -267,7 +267,7 @@ func readTasksFile(tasksFile []byte) map[string]bool { } if len(task) > 0 { - tasks[string(task)] = true + tasks[string(task)] = struct{}{} } return tasks diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 9ad93aab52..69d588f522 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -578,22 +578,23 @@ func TestGetStats(t *testing.T) { func TestReadTasksFile(t *testing.T) { var testCases = []struct { tasksFile []byte - expected map[string]bool + expected map[string]struct{} }{ {[]byte{0x31, 0x32, 0xA, 0x37, 0x37}, - map[string]bool{ - "12": true, - "77": true}, + map[string]struct{}{ + "12": {}, + "77": {}, + }, }, {[]byte{0xA, 0x32, 0xA}, - map[string]bool{ - "2": true}, + map[string]struct{}{ + "2": {}}, }, {[]byte{0x0, 0x2A, 0xA}, - map[string]bool{}, + map[string]struct{}{}, }, {[]byte{}, - map[string]bool{}, + map[string]struct{}{}, }, } From 8bf947a6ff0b45a4e17b906733e61198eea03a2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 16 Aug 2021 11:05:22 +0200 Subject: [PATCH 28/49] Move reading file logic. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/testing/tasks_empty | 0 resctrl/testing/tasks_invalid | Bin 0 -> 3 bytes resctrl/testing/tasks_one | 1 + resctrl/testing/tasks_two | 2 ++ resctrl/utils.go | 21 +++++++++++---------- resctrl/utils_test.go | 15 ++++++++------- 6 files changed, 22 insertions(+), 17 deletions(-) create mode 100644 resctrl/testing/tasks_empty create mode 100644 resctrl/testing/tasks_invalid create mode 100644 resctrl/testing/tasks_one create mode 100644 resctrl/testing/tasks_two diff --git a/resctrl/testing/tasks_empty b/resctrl/testing/tasks_empty new file mode 100644 index 0000000000..e69de29bb2 diff --git a/resctrl/testing/tasks_invalid b/resctrl/testing/tasks_invalid new file mode 100644 index 0000000000000000000000000000000000000000..b86584b5167acaff3a7a81ccdcbb01aeaa4f2738 GIT binary patch literal 3 KcmZS3;sO8wVE{D% literal 0 HcmV?d00001 diff --git a/resctrl/testing/tasks_one b/resctrl/testing/tasks_one new file mode 100644 index 0000000000..0cfbf08886 --- /dev/null +++ b/resctrl/testing/tasks_one @@ -0,0 +1 @@ +2 diff --git a/resctrl/testing/tasks_two b/resctrl/testing/tasks_two new file mode 100644 index 0000000000..dc11a81052 --- /dev/null +++ b/resctrl/testing/tasks_two @@ -0,0 +1,2 @@ +12 +77 \ No newline at end of file diff --git a/resctrl/utils.go b/resctrl/utils.go index 3fa13ee336..e00af5ea56 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -224,17 +224,11 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError) } - tasksFile, err := ioutil.ReadFile(filepath.Join(path, "tasks")) + tasks, err := readTasksFile(filepath.Join(path, "tasks")) if err != nil { - return false, fmt.Errorf("couldn't obtain pids from %q path: %w", path, err) + return false, err } - if len(tasksFile) == 0 { - return false, nil - } - - tasks := readTasksFile(tasksFile) - for _, pid := range pids { _, ok := tasks[pid] if !ok { @@ -245,13 +239,20 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return true, nil } -func readTasksFile(tasksFile []byte) map[string]struct{} { +func readTasksFile(tasksPath string) (map[string]struct{}, error) { const ( newLineASCIICode = 0xA zeroASCIICode = 0x30 nineASCIICode = 0x39 ) + tasks := make(map[string]struct{}) + + tasksFile, err := ioutil.ReadFile(tasksPath) + if err != nil { + return tasks, fmt.Errorf("couldn't obtain pids from %q path: %w", tasksPath, err) + } + var task []byte for _, b := range tasksFile { if b == newLineASCIICode { @@ -270,7 +271,7 @@ func readTasksFile(tasksFile []byte) map[string]struct{} { tasks[string(task)] = struct{}{} } - return tasks + return tasks, nil } func readStatFrom(path string, vendorID string) (uint64, error) { diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 69d588f522..400401a52d 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -455,7 +455,7 @@ func TestArePIDsInControlGroup(t *testing.T) { }, { false, - fmt.Sprintf("couldn't obtain pids from %q path: open %s: no such file or directory", filepath.Join(rootResctrl, monitoringGroupDir), filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName)), + fmt.Sprintf("couldn't obtain pids from %q path: open %s: no such file or directory", filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName), filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName)), filepath.Join(rootResctrl, monitoringGroupDir), []string{"1", "2"}, }, @@ -577,29 +577,30 @@ func TestGetStats(t *testing.T) { func TestReadTasksFile(t *testing.T) { var testCases = []struct { - tasksFile []byte + tasksFile string expected map[string]struct{} }{ - {[]byte{0x31, 0x32, 0xA, 0x37, 0x37}, + {"testing/tasks_two", map[string]struct{}{ "12": {}, "77": {}, }, }, - {[]byte{0xA, 0x32, 0xA}, + {"testing/tasks_one", map[string]struct{}{ "2": {}}, }, - {[]byte{0x0, 0x2A, 0xA}, + {"testing/tasks_invalid", map[string]struct{}{}, }, - {[]byte{}, + {"testing/tasks_empty", map[string]struct{}{}, }, } for _, test := range testCases { - actual := readTasksFile(test.tasksFile) + actual, err := readTasksFile(test.tasksFile) assert.Equal(t, test.expected, actual) + assert.NoError(t, err) } } From c54e007c10fa505db8008239f1d781537aa5180b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 16 Aug 2021 12:36:32 +0200 Subject: [PATCH 29/49] Use scanner to read tasks file. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 43 +++++++++++++++++++------------------------ resctrl/utils_test.go | 26 ++++++++++++++++---------- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index e00af5ea56..9bdce1b48e 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -18,6 +18,7 @@ package resctrl import ( + "bufio" "bytes" "fmt" "io/ioutil" @@ -229,7 +230,11 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return false, err } - for _, pid := range pids { + for _, pidString := range pids { + pid, err := strconv.Atoi(pidString) + if err != nil { + return false, err + } _, ok := tasks[pid] if !ok { return false, nil @@ -239,36 +244,26 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return true, nil } -func readTasksFile(tasksPath string) (map[string]struct{}, error) { - const ( - newLineASCIICode = 0xA - zeroASCIICode = 0x30 - nineASCIICode = 0x39 - ) +func readTasksFile(tasksPath string) (map[int]struct{}, error) { + tasks := make(map[int]struct{}) - tasks := make(map[string]struct{}) - - tasksFile, err := ioutil.ReadFile(tasksPath) + tasksFile, err := os.Open(tasksPath) if err != nil { - return tasks, fmt.Errorf("couldn't obtain pids from %q path: %w", tasksPath, err) + return tasks, fmt.Errorf("couldn't read tasks file from %q path: %w", tasksPath, err) } + defer tasksFile.Close() - var task []byte - for _, b := range tasksFile { - if b == newLineASCIICode { - if len(task) > 0 { - tasks[string(task)] = struct{}{} - task = []byte{} - } - continue - } - if b >= zeroASCIICode && b <= nineASCIICode { - task = append(task, b) + scanner := bufio.NewScanner(tasksFile) + for scanner.Scan() { + id, err := strconv.Atoi(scanner.Text()) + if err != nil { + return tasks, fmt.Errorf("couldn't convert pids from %q path: %w", tasksPath, err) } + tasks[id] = struct{}{} } - if len(task) > 0 { - tasks[string(task)] = struct{}{} + if err := scanner.Err(); err != nil { + return tasks, fmt.Errorf("couldn't obtain pids from %q path: %w", tasksPath, err) } return tasks, nil diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 400401a52d..40b39028e0 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -455,7 +455,7 @@ func TestArePIDsInControlGroup(t *testing.T) { }, { false, - fmt.Sprintf("couldn't obtain pids from %q path: open %s: no such file or directory", filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName), filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName)), + fmt.Sprintf("couldn't read tasks file from %q path: open %s: no such file or directory", filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName), filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName)), filepath.Join(rootResctrl, monitoringGroupDir), []string{"1", "2"}, }, @@ -578,29 +578,35 @@ func TestGetStats(t *testing.T) { func TestReadTasksFile(t *testing.T) { var testCases = []struct { tasksFile string - expected map[string]struct{} + expected map[int]struct{} + err string }{ {"testing/tasks_two", - map[string]struct{}{ - "12": {}, - "77": {}, + map[int]struct{}{ + 12: {}, + 77: {}, }, + "", }, {"testing/tasks_one", - map[string]struct{}{ - "2": {}}, + map[int]struct{}{ + 2: {}, + }, + "", }, {"testing/tasks_invalid", - map[string]struct{}{}, + map[int]struct{}{}, + "couldn't convert pids from \"testing/tasks_invalid\" path: strconv.Atoi: parsing \"\\x00*\": invalid syntax", }, {"testing/tasks_empty", - map[string]struct{}{}, + map[int]struct{}{}, + "", }, } for _, test := range testCases { actual, err := readTasksFile(test.tasksFile) assert.Equal(t, test.expected, actual) - assert.NoError(t, err) + checkError(t, err, test.err) } } From dbb54d2c7f354277161a3c16bae36bb1035b00bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 16 Aug 2021 15:49:01 +0200 Subject: [PATCH 30/49] Change the way of searching for the control group. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 73 ++++++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index 9bdce1b48e..640e31f72a 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -41,8 +41,6 @@ const ( cpusListFileName = "cpus_list" schemataFileName = "schemata" tasksFileName = "tasks" - modeFileName = "mode" - sizeFileName = "size" infoDirName = "info" monDataDirName = "mon_data" monGroupsDirName = "mon_groups" @@ -57,12 +55,17 @@ const ( ) var ( - rootResctrl = "" - pidsPath = "" - processPath = "/proc" - enabledMBM = false - enabledCMT = false - isResctrlInitialized = false + rootResctrl = "" + pidsPath = "" + processPath = "/proc" + enabledMBM = false + enabledCMT = false + isResctrlInitialized = false + controlGroupDirectories = map[string]struct{}{ + infoDirName: {}, + monDataDirName: {}, + monGroupsDirName: {}, + } ) func Setup() error { @@ -170,49 +173,33 @@ func getAllProcessThreads(threadFiles []os.FileInfo) ([]int, error) { return processThreads, nil } +// findControlGroup returns the path of a control group in which the pids are. func findControlGroup(pids []string) (string, error) { if len(pids) == 0 { return "", fmt.Errorf(noPidsPassedError) } - availablePaths, err := filepath.Glob(filepath.Join(rootResctrl, "*")) + availablePaths := make([]string, 0) + err := filepath.Walk(rootResctrl, func(path string, info os.FileInfo, err error) error { + // Look only for directories. + if info.IsDir() { + // Avoid internal control group dirs. + if _, ok := controlGroupDirectories[filepath.Base(path)]; !ok { + availablePaths = append(availablePaths, path) + } + } + return nil + }) if err != nil { - return "", err + return "", fmt.Errorf("couldn't obtain control groups paths: %w", err) } for _, path := range availablePaths { - switch path { - case filepath.Join(rootResctrl, cpusFileName): - continue - case filepath.Join(rootResctrl, cpusListFileName): - continue - case filepath.Join(rootResctrl, modeFileName): - continue - case filepath.Join(rootResctrl, sizeFileName): - continue - case filepath.Join(rootResctrl, infoDirName): - continue - case filepath.Join(rootResctrl, monDataDirName): - continue - case filepath.Join(rootResctrl, monGroupsDirName): - continue - case filepath.Join(rootResctrl, schemataFileName): - continue - case filepath.Join(rootResctrl, tasksFileName): - inGroup, err := arePIDsInControlGroup(rootResctrl, pids) - if err != nil { - return "", err - } - if inGroup { - return rootResctrl, nil - } - default: - inGroup, err := arePIDsInControlGroup(path, pids) - if err != nil { - return "", err - } - if inGroup { - return path, nil - } + groupFound, err := arePIDsInControlGroup(path, pids) + if err != nil { + return "", err + } + if groupFound { + return path, nil } } From 731606df0dec103aaa622432c531c1710a63a6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 16 Aug 2021 15:57:25 +0200 Subject: [PATCH 31/49] Add comments. Use const value. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index 640e31f72a..f690221b13 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -206,13 +206,13 @@ func findControlGroup(pids []string) (string, error) { return "", fmt.Errorf("there is no available control group") } +// arePIDsInControlGroup returns true if all of the pids are withing control group. func arePIDsInControlGroup(path string, pids []string) (bool, error) { if len(pids) == 0 { - // No container pids passed. return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError) } - tasks, err := readTasksFile(filepath.Join(path, "tasks")) + tasks, err := readTasksFile(filepath.Join(path, tasksFileName)) if err != nil { return false, err } @@ -224,6 +224,7 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { } _, ok := tasks[pid] if !ok { + // There is any missing pid within group. return false, nil } } From 15be02e4e15a20abd3092797e5d15b375acc1d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Mon, 16 Aug 2021 15:59:15 +0200 Subject: [PATCH 32/49] Comment function. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 1 + 1 file changed, 1 insertion(+) diff --git a/resctrl/utils.go b/resctrl/utils.go index f690221b13..42532e1638 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -232,6 +232,7 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return true, nil } +// readTasksFile returns pids map from given tasks path. func readTasksFile(tasksPath string) (map[int]struct{}, error) { tasks := make(map[int]struct{}) From d24ece6d46a14e2861bf8287d987e53b98318d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 18 Aug 2021 08:02:23 +0200 Subject: [PATCH 33/49] Fix typo. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index 42532e1638..a966d73a6d 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -206,7 +206,7 @@ func findControlGroup(pids []string) (string, error) { return "", fmt.Errorf("there is no available control group") } -// arePIDsInControlGroup returns true if all of the pids are withing control group. +// arePIDsInControlGroup returns true if all of the pids are within control group. func arePIDsInControlGroup(path string, pids []string) (bool, error) { if len(pids) == 0 { return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError) From c9da6fb66612f1a50600835de073d5a4736bba8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 18 Aug 2021 08:53:35 +0200 Subject: [PATCH 34/49] Refactor getAllProcessThreads. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 20 ++++++++----- resctrl/utils_test.go | 69 ++++++++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index a966d73a6d..1461dfd1d3 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -122,12 +122,7 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str } for _, pid := range pids { - threadFiles, err := ioutil.ReadDir(filepath.Join(processPath, pid, processTask)) - if err != nil { - return "", err - } - - processThreads, err := getAllProcessThreads(threadFiles) + processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask)) if err != nil { return "", err } @@ -160,9 +155,18 @@ func getPids(containerName string) ([]int, error) { return pids, nil } -func getAllProcessThreads(threadFiles []os.FileInfo) ([]int, error) { +// getAllProcessThreads obtains all available processes from directory. +// e.g. ls /proc/4215/task/ -> 4215, 4216, 4217, 4218 +// func will return [4215, 4216, 4217, 4218]. +func getAllProcessThreads(path string) ([]int, error) { processThreads := make([]int, 0) - for _, file := range threadFiles { + + threadDirs, err := ioutil.ReadDir(path) + if err != nil { + return processThreads, err + } + + for _, file := range threadDirs { pid, err := strconv.Atoi(file.Name()) if err != nil { return nil, fmt.Errorf("couldn't parse %q file: %v", file.Name(), err) diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 40b39028e0..130ba0384f 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -33,8 +33,6 @@ import ( "github.com/opencontainers/runc/libcontainer/intelrdt" "github.com/stretchr/testify/assert" - - "github.com/google/cadvisor/utils/sysfs/fakesysfs" ) func mockAllGetContainerPids() ([]string, error) { @@ -356,33 +354,70 @@ func TestGetPids(t *testing.T) { } func TestGetAllProcessThreads(t *testing.T) { + mockProcFs := func() string { + path, _ := ioutil.TempDir("", "proc") + + var files = []struct { + path string + touch func(string) error + }{ + // correct + { + filepath.Join(path, "4215", processTask, "4215"), + touchDir, + }, + { + filepath.Join(path, "4215", processTask, "4216"), + touchDir, + }, + { + filepath.Join(path, "4215", processTask, "4217"), + touchDir, + }, + { + filepath.Join(path, "4215", processTask, "4218"), + touchDir, + }, + // invalid + { + filepath.Join(path, "301", processTask, "301"), + touchDir, + }, + { + filepath.Join(path, "301", processTask, "incorrect"), + touchDir, + }, + } + + for _, file := range files { + _ = file.touch(file.path) + } + + return path + } + + mockedProcFs := mockProcFs() + defer os.RemoveAll(mockedProcFs) + var testCases = []struct { - filesInfo []os.FileInfo - expected []int - err string + path string + expected []int + err string }{ { - []os.FileInfo{ - &fakesysfs.FileInfo{EntryName: "1"}, - &fakesysfs.FileInfo{EntryName: "2"}, - &fakesysfs.FileInfo{EntryName: "3"}, - &fakesysfs.FileInfo{EntryName: "4"}, - }, - []int{1, 2, 3, 4}, + filepath.Join(mockedProcFs, "4215", processTask), + []int{4215, 4216, 4217, 4218}, "", }, { - []os.FileInfo{ - &fakesysfs.FileInfo{EntryName: "incorrect"}, - &fakesysfs.FileInfo{EntryName: "1"}, - }, + filepath.Join(mockedProcFs, "301", processTask), nil, "couldn't parse \"incorrect\" file: strconv.Atoi: parsing \"incorrect\": invalid syntax", }, } for _, test := range testCases { - actual, err := getAllProcessThreads(test.filesInfo) + actual, err := getAllProcessThreads(test.path) assert.Equal(t, test.expected, actual) checkError(t, err, test.err) } From 8162197cf7ec41658ca1e186019d26a4e5ffcf19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 18 Aug 2021 09:26:40 +0200 Subject: [PATCH 35/49] Refactor GetVendorID. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- machine/info.go | 2 +- machine/machine.go | 6 +++--- machine/topology_test.go | 29 +++++++++++++++++++++-------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/machine/info.go b/machine/info.go index 50f6da2072..25bfd4e62b 100644 --- a/machine/info.go +++ b/machine/info.go @@ -121,7 +121,7 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach machineInfo := &info.MachineInfo{ Timestamp: time.Now(), - VendorID: GetVendorID(cpuinfo), + VendorID: GetCPUVendorID(cpuinfo), NumCores: numCores, NumPhysicalCores: GetPhysicalCores(cpuinfo), NumSockets: GetSockets(cpuinfo), diff --git a/machine/machine.go b/machine/machine.go index 47a4e6fddc..4e3c6b98bf 100644 --- a/machine/machine.go +++ b/machine/machine.go @@ -55,13 +55,13 @@ var ( const memTypeFileName = "dimm_mem_type" const sizeFileName = "size" -// GetVendorID returns "vendor_id" reading /proc/cpuinfo file. -func GetVendorID(procInfo []byte) string { +// GetCPUVendorID returns "vendor_id" reading /proc/cpuinfo file. +func GetCPUVendorID(procInfo []byte) string { vendorID := "" matches := vendorIDRegexp.FindSubmatch(procInfo) if len(matches) != 2 { - klog.Error("Cannot read vendor id correctly, set empty") + klog.Warning("Cannot read vendor id correctly, set empty.") return vendorID } diff --git a/machine/topology_test.go b/machine/topology_test.go index 3c80b72dec..0cc81b9225 100644 --- a/machine/topology_test.go +++ b/machine/topology_test.go @@ -463,13 +463,26 @@ func TestClockSpeedOnCpuLowerCase(t *testing.T) { assert.Equal(t, uint64(1450*1000), clockSpeed) } -func TestGetVendorID(t *testing.T) { - testfile := "./testdata/cpuinfo_onesocket_many_NUMAs" - - testcpuinfo, err := ioutil.ReadFile(testfile) - assert.Nil(t, err) - assert.NotNil(t, testcpuinfo) +func TestGetCPUVendorID(t *testing.T) { + var testCases = []struct { + file string + expected string + }{ + { + "./testdata/cpuinfo_onesocket_many_NUMAs", + "GenuineIntel", + }, + { + "./testdata/cpuinfo_arm", + "", + }, + } - vendorID := GetVendorID(testcpuinfo) - assert.Equal(t, "GenuineIntel", vendorID) + for _, test := range testCases { + testcpuinfo, err := ioutil.ReadFile(test.file) + assert.Nil(t, err) + assert.NotNil(t, testcpuinfo) + cpuVendorID := GetCPUVendorID(testcpuinfo) + assert.Equal(t, test.expected, cpuVendorID) + } } From 9c675e41bd1e57be33a6ba0295f58a58f3b9f3e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 18 Aug 2021 09:27:54 +0200 Subject: [PATCH 36/49] Rename VendorID. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- info/v1/machine.go | 4 ++-- machine/info.go | 2 +- manager/manager.go | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/info/v1/machine.go b/info/v1/machine.go index fca9d1eb7d..108089bc22 100644 --- a/info/v1/machine.go +++ b/info/v1/machine.go @@ -179,7 +179,7 @@ type MachineInfo struct { Timestamp time.Time `json:"timestamp"` // Vendor id of CPU. - VendorID string `json:"vendor_id"` + CPUVendorID string `json:"vendor_id"` // The number of cores in this machine. NumCores int `json:"num_cores"` @@ -252,7 +252,7 @@ func (m *MachineInfo) Clone() *MachineInfo { } } copy := MachineInfo{ - VendorID: m.VendorID, + CPUVendorID: m.CPUVendorID, Timestamp: m.Timestamp, NumCores: m.NumCores, NumPhysicalCores: m.NumPhysicalCores, diff --git a/machine/info.go b/machine/info.go index 25bfd4e62b..0550a5fa29 100644 --- a/machine/info.go +++ b/machine/info.go @@ -121,7 +121,7 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach machineInfo := &info.MachineInfo{ Timestamp: time.Now(), - VendorID: GetCPUVendorID(cpuinfo), + CPUVendorID: GetCPUVendorID(cpuinfo), NumCores: numCores, NumPhysicalCores: GetPhysicalCores(cpuinfo), NumSockets: GetSockets(cpuinfo), diff --git a/manager/manager.go b/manager/manager.go index 38bcc7641f..e7552047d3 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -217,7 +217,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig return nil, err } - newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup, machineInfo.VendorID) + newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup, machineInfo.CPUVendorID) if err != nil { klog.V(4).Infof("Cannot gather resctrl metrics: %v", err) } From a76478c77b4c6138ab43201901b8fe8e89798b3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Thu, 19 Aug 2021 15:12:21 +0200 Subject: [PATCH 37/49] Resctrl collector should be aware of existing mon groups. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- docs/runtime_options.md | 2 + resctrl/collector.go | 13 +++- resctrl/collector_test.go | 3 +- resctrl/utils.go | 140 +++++++++++++++++++++++--------------- resctrl/utils_test.go | 84 +++++++++++++++++++---- 5 files changed, 172 insertions(+), 70 deletions(-) diff --git a/docs/runtime_options.md b/docs/runtime_options.md index b5dfb1f927..1f640f5c88 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -419,6 +419,8 @@ should be a human readable string that will become a metric name. `uncore_imc` prefix. ## Resctrl +To gain metrics, cAdvisor creates own monitoring groups with `cadvisor` prefix. + ``` --resctrl_interval=0: Resctrl mon groups updating interval. Zero value disables updating mon groups. diff --git a/resctrl/collector.go b/resctrl/collector.go index 3363f88a4f..0686184f9d 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -20,6 +20,7 @@ package resctrl import ( "fmt" "os" + "path/filepath" "sync" "time" @@ -155,9 +156,15 @@ func (c *collector) Destroy() { func (c *collector) clear() error { // Not allowed to remove root or undefined resctrl directory. if c.id != rootContainer && c.resctrlPath != "" { - err := os.RemoveAll(c.resctrlPath) - if err != nil { - return fmt.Errorf("couldn't clear mon_group: %v", err) + // Remove only own prepared mon group. + base := filepath.Base(c.resctrlPath) + if len(base) > len(monGroupPrefix) { + if base[:len(monGroupPrefix)] == monGroupPrefix { + err := os.RemoveAll(c.resctrlPath) + if err != nil { + return fmt.Errorf("couldn't clear mon_group: %v", err) + } + } } } return nil diff --git a/resctrl/collector_test.go b/resctrl/collector_test.go index e3d281a8ed..95d8214ea7 100644 --- a/resctrl/collector_test.go +++ b/resctrl/collector_test.go @@ -18,6 +18,7 @@ package resctrl import ( + "fmt" "os" "path/filepath" "testing" @@ -38,7 +39,7 @@ func TestNewCollectorWithSetup(t *testing.T) { defer os.RemoveAll(processPath) expectedID := "container" - expectedResctrlPath := filepath.Join(rootResctrl, monGroupsDirName, expectedID) + expectedResctrlPath := filepath.Join(rootResctrl, monGroupsDirName, fmt.Sprintf("%s-%s", monGroupPrefix, expectedID)) collector := newCollector(expectedID, mockGetContainerPids, 0, 2, "") err := collector.setup() diff --git a/resctrl/utils.go b/resctrl/utils.go index 1461dfd1d3..ee3be14ad8 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -33,25 +33,27 @@ import ( ) const ( - cpuCgroup = "cpu" - rootContainer = "/" - monitoringGroupDir = "mon_groups" - processTask = "task" - cpusFileName = "cpus" - cpusListFileName = "cpus_list" - schemataFileName = "schemata" - tasksFileName = "tasks" - infoDirName = "info" - monDataDirName = "mon_data" - monGroupsDirName = "mon_groups" - noPidsPassedError = "there are no pids passed" - noContainerNameError = "there are no container name passed" - llcOccupancyFileName = "llc_occupancy" - mbmLocalBytesFileName = "mbm_local_bytes" - mbmTotalBytesFileName = "mbm_total_bytes" - containerPrefix = '/' - minContainerNameLen = 2 // "/" e.g. "/a" - unavailable = "Unavailable" + cpuCgroup = "cpu" + rootContainer = "/" + monitoringGroupDir = "mon_groups" + processTask = "task" + cpusFileName = "cpus" + cpusListFileName = "cpus_list" + schemataFileName = "schemata" + tasksFileName = "tasks" + infoDirName = "info" + monDataDirName = "mon_data" + monGroupsDirName = "mon_groups" + noPidsPassedError = "there are no pids passed" + noContainerNameError = "there are no container name passed" + noControlGroupFoundError = "couldn't find control group matching container" + llcOccupancyFileName = "llc_occupancy" + mbmLocalBytesFileName = "mbm_local_bytes" + mbmTotalBytesFileName = "mbm_total_bytes" + containerPrefix = '/' + minContainerNameLen = 2 // "/" e.g. "/a" + unavailable = "Unavailable" + monGroupPrefix = "cadvisor" ) var ( @@ -66,6 +68,9 @@ var ( monDataDirName: {}, monGroupsDirName: {}, } + monGroupDirectories = map[string]struct{}{ + monDataDirName: {}, + } ) func Setup() error { @@ -103,39 +108,50 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str return "", fmt.Errorf("couldn't obtain %q container pids, there is no pids in cgroup", containerName) } - path, err := findControlGroup(pids) + controlGroupPath, err := findGroup(rootResctrl, pids, controlGroupDirectories, false) if err != nil { - return "", fmt.Errorf("couldn't find control group matching %q container: %v", containerName, err) + return "", fmt.Errorf("%q %q: %q", noControlGroupFoundError, containerName, err) } - - if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { - containerName = containerName[1:] + if controlGroupPath == "" { + return "", fmt.Errorf("%q %q", noControlGroupFoundError, containerName) } - properContainerName := strings.Replace(containerName, "/", "-", -1) - monGroupPath := filepath.Join(path, monitoringGroupDir, properContainerName) - - // Create new mon_group if not exists. - err = os.MkdirAll(monGroupPath, os.ModePerm) + // Check if there is any monitoring group. + monGroupPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), pids, monGroupDirectories, true) if err != nil { - return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err) + return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err) } - for _, pid := range pids { - processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask)) + // Prepare new one if not exists. + if monGroupPath == "" { + if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { + containerName = containerName[1:] + } + + properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1)) + monGroupPath = filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName) + + err = os.MkdirAll(monGroupPath, os.ModePerm) if err != nil { - return "", err + return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err) } - for _, thread := range processThreads { - err = intelrdt.WriteIntelRdtTasks(monGroupPath, thread) + + for _, pid := range pids { + processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask)) if err != nil { - secondError := os.Remove(monGroupPath) - if secondError != nil { - return "", fmt.Errorf( - "coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v", - containerName, err, containerName, secondError) + return "", err + } + for _, thread := range processThreads { + err = intelrdt.WriteIntelRdtTasks(monGroupPath, thread) + if err != nil { + secondError := os.Remove(monGroupPath) + if secondError != nil { + return "", fmt.Errorf( + "coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v", + containerName, err, containerName, secondError) + } + return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err) } - return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err) } } } @@ -177,28 +193,34 @@ func getAllProcessThreads(path string) ([]int, error) { return processThreads, nil } -// findControlGroup returns the path of a control group in which the pids are. -func findControlGroup(pids []string) (string, error) { +// findGroup returns the path of a control/monitoring group in which the pids are. +func findGroup(group string, pids []string, skip map[string]struct{}, monGroup bool) (string, error) { if len(pids) == 0 { return "", fmt.Errorf(noPidsPassedError) } availablePaths := make([]string, 0) - err := filepath.Walk(rootResctrl, func(path string, info os.FileInfo, err error) error { + err := filepath.Walk(group, func(path string, info os.FileInfo, err error) error { // Look only for directories. if info.IsDir() { - // Avoid internal control group dirs. - if _, ok := controlGroupDirectories[filepath.Base(path)]; !ok { - availablePaths = append(availablePaths, path) + // Monitoring path shouldn't be considered. + if path == group && monGroup { + return nil } + // Avoid internal group dirs. + if _, ok := skip[filepath.Base(path)]; ok { + return filepath.SkipDir + } + availablePaths = append(availablePaths, path) } return nil }) if err != nil { - return "", fmt.Errorf("couldn't obtain control groups paths: %w", err) + return "", fmt.Errorf("couldn't obtain groups paths: %w", err) } for _, path := range availablePaths { - groupFound, err := arePIDsInControlGroup(path, pids) + // Mon group pids should be exclusive. + groupFound, err := arePIDsInGroup(path, pids, monGroup) if err != nil { return "", err } @@ -207,11 +229,11 @@ func findControlGroup(pids []string) (string, error) { } } - return "", fmt.Errorf("there is no available control group") + return "", nil } -// arePIDsInControlGroup returns true if all of the pids are within control group. -func arePIDsInControlGroup(path string, pids []string) (bool, error) { +// arePIDsInGroup returns true if all of the pids are within control group. +func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) { if len(pids) == 0 { return false, fmt.Errorf("couldn't obtain pids from %q path: %v", path, noPidsPassedError) } @@ -221,6 +243,7 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { return false, err } + any := false for _, pidString := range pids { pid, err := strconv.Atoi(pidString) if err != nil { @@ -228,9 +251,20 @@ func arePIDsInControlGroup(path string, pids []string) (bool, error) { } _, ok := tasks[pid] if !ok { - // There is any missing pid within group. + // There are missing pids within group. + if any { + return false, fmt.Errorf("there should be all pids in group") + } return false, nil } + any = true + } + + // Check if there should be only passed pids in group. + if exclusive { + if len(tasks) != len(pids) { + return false, fmt.Errorf("group should have container pids only") + } } return true, nil diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 130ba0384f..9ba7e42cc4 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -143,6 +143,14 @@ func mockResctrl() string { filepath.Join(path, "m1", tasksFileName), touch, }, + { + filepath.Join(path, "m1", monGroupsDirName, "test"), + touchDir, + }, + { + filepath.Join(path, "m1", monGroupsDirName, "test", tasksFileName), + touch, + }, } for _, file := range files { err := file.touch(file.path) @@ -158,7 +166,12 @@ func mockResctrl() string { } // Mock custom CLOSID "m1" task file. - err = fillPids(filepath.Join(path, "m1", tasksFileName), []int{5, 6}) + err = fillPids(filepath.Join(path, "m1", tasksFileName), []int{5, 6, 7, 8, 9, 10}) + if err != nil { + return "" + } + // Mock custom mon group "test" task file. + err = fillPids(filepath.Join(path, "m1", monGroupsDirName, "test", tasksFileName), []int{7, 8}) if err != nil { return "" } @@ -296,13 +309,13 @@ func TestPrepareMonitoringGroup(t *testing.T) { { "container", mockGetContainerPids, - filepath.Join(rootResctrl, monGroupsDirName, "container"), + filepath.Join(rootResctrl, monGroupsDirName, "cadvisor-container"), "", }, { "another", mockAnotherGetContainerPids, - filepath.Join(rootResctrl, "m1", monGroupsDirName, "another"), + filepath.Join(rootResctrl, "m1", monGroupsDirName, "cadvisor-another"), "", }, { @@ -423,87 +436,132 @@ func TestGetAllProcessThreads(t *testing.T) { } } -func TestFindControlGroup(t *testing.T) { +func TestFindGroup(t *testing.T) { rootResctrl = mockResctrl() defer os.RemoveAll(rootResctrl) var testCases = []struct { + path string pids []string + skip map[string]struct{} + monGroup bool expected string err string }{ { + rootResctrl, []string{"1", "2", "3", "4"}, + controlGroupDirectories, + false, rootResctrl, "", }, { + rootResctrl, []string{}, + controlGroupDirectories, + false, "", "there are no pids passed", }, { + rootResctrl, []string{"5", "6"}, + controlGroupDirectories, + false, filepath.Join(rootResctrl, "m1"), "", }, { + rootResctrl, + []string{"11", "12"}, + controlGroupDirectories, + false, + "", + "", + }, + { + filepath.Join(rootResctrl, "m1", monGroupsDirName), + []string{"5", "6"}, + monGroupDirectories, + true, + "", + "", + }, + { + filepath.Join(rootResctrl, "m1", monGroupsDirName), []string{"7", "8"}, + monGroupDirectories, + true, + filepath.Join(rootResctrl, "m1", monGroupsDirName, "test"), + "", + }, + { + filepath.Join(rootResctrl, "m1", monGroupsDirName), + []string{"7"}, + monGroupDirectories, + true, "", - "there is no available control group", + "group should have container pids only", }, } for _, test := range testCases { - actual, err := findControlGroup(test.pids) + actual, err := findGroup(test.path, test.pids, test.skip, test.monGroup) assert.Equal(t, test.expected, actual) checkError(t, err, test.err) } } -func TestArePIDsInControlGroup(t *testing.T) { +func TestArePIDsInGroup(t *testing.T) { rootResctrl = mockResctrl() defer os.RemoveAll(rootResctrl) var testCases = []struct { - expected bool - err string - path string - pids []string + expected bool + err string + path string + pids []string + exclusive bool }{ { true, "", rootResctrl, []string{"1", "2"}, + false, }, { false, - "", + "there should be all pids in group", rootResctrl, []string{"4", "5"}, + false, }, { false, "", filepath.Join(rootResctrl, "m1"), []string{"1"}, + false, }, { false, fmt.Sprintf("couldn't read tasks file from %q path: open %s: no such file or directory", filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName), filepath.Join(rootResctrl, monitoringGroupDir, tasksFileName)), filepath.Join(rootResctrl, monitoringGroupDir), []string{"1", "2"}, + false, }, { false, fmt.Sprintf("couldn't obtain pids from %q path: %v", rootResctrl, noPidsPassedError), rootResctrl, nil, + false, }, } for _, test := range testCases { - actual, err := arePIDsInControlGroup(test.path, test.pids) + actual, err := arePIDsInGroup(test.path, test.pids, test.exclusive) assert.Equal(t, test.expected, actual) checkError(t, err, test.err) } From 852f7557b4e07e0e483fadd6e89267b5496d2127 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Fri, 20 Aug 2021 15:36:06 +0200 Subject: [PATCH 38/49] Optimization for finding control/monitoring group. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 56 ++++++++++++++++++++----------------------- resctrl/utils_test.go | 28 +++++++++++----------- 2 files changed, 40 insertions(+), 44 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index ee3be14ad8..10773324db 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -57,19 +57,20 @@ const ( ) var ( - rootResctrl = "" - pidsPath = "" - processPath = "/proc" - enabledMBM = false - enabledCMT = false - isResctrlInitialized = false - controlGroupDirectories = map[string]struct{}{ + rootResctrl = "" + pidsPath = "" + processPath = "/proc" + enabledMBM = false + enabledCMT = false + isResctrlInitialized = false + groupDirectories = map[string]struct{}{ + cpusFileName: {}, + cpusListFileName: {}, infoDirName: {}, monDataDirName: {}, monGroupsDirName: {}, - } - monGroupDirectories = map[string]struct{}{ - monDataDirName: {}, + schemataFileName: {}, + tasksFileName: {}, } ) @@ -105,10 +106,10 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str } if len(pids) == 0 { - return "", fmt.Errorf("couldn't obtain %q container pids, there is no pids in cgroup", containerName) + return "", fmt.Errorf("couldn't obtain %q container pids: there is no pids in cgroup", containerName) } - controlGroupPath, err := findGroup(rootResctrl, pids, controlGroupDirectories, false) + controlGroupPath, err := findGroup(rootResctrl, pids, true, false) if err != nil { return "", fmt.Errorf("%q %q: %q", noControlGroupFoundError, containerName, err) } @@ -117,7 +118,7 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str } // Check if there is any monitoring group. - monGroupPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), pids, monGroupDirectories, true) + monGroupPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), pids, false, true) if err != nil { return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err) } @@ -194,33 +195,28 @@ func getAllProcessThreads(path string) ([]int, error) { } // findGroup returns the path of a control/monitoring group in which the pids are. -func findGroup(group string, pids []string, skip map[string]struct{}, monGroup bool) (string, error) { +func findGroup(group string, pids []string, includeGroup bool, exclusive bool) (string, error) { if len(pids) == 0 { return "", fmt.Errorf(noPidsPassedError) } + availablePaths := make([]string, 0) - err := filepath.Walk(group, func(path string, info os.FileInfo, err error) error { - // Look only for directories. - if info.IsDir() { - // Monitoring path shouldn't be considered. - if path == group && monGroup { - return nil - } - // Avoid internal group dirs. - if _, ok := skip[filepath.Base(path)]; ok { - return filepath.SkipDir - } - availablePaths = append(availablePaths, path) + if includeGroup { + availablePaths = append(availablePaths, group) + } + + filenames, err := ioutil.ReadDir(group) + for _, file := range filenames { + if _, ok := groupDirectories[file.Name()]; !ok { + availablePaths = append(availablePaths, filepath.Join(group, file.Name())) } - return nil - }) + } if err != nil { return "", fmt.Errorf("couldn't obtain groups paths: %w", err) } for _, path := range availablePaths { - // Mon group pids should be exclusive. - groupFound, err := arePIDsInGroup(path, pids, monGroup) + groupFound, err := arePIDsInGroup(path, pids, exclusive) if err != nil { return "", err } diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 9ba7e42cc4..6cb491b47c 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -441,17 +441,17 @@ func TestFindGroup(t *testing.T) { defer os.RemoveAll(rootResctrl) var testCases = []struct { - path string - pids []string - skip map[string]struct{} - monGroup bool - expected string - err string + path string + pids []string + includeGroup bool + exclusive bool + expected string + err string }{ { rootResctrl, []string{"1", "2", "3", "4"}, - controlGroupDirectories, + true, false, rootResctrl, "", @@ -459,7 +459,7 @@ func TestFindGroup(t *testing.T) { { rootResctrl, []string{}, - controlGroupDirectories, + true, false, "", "there are no pids passed", @@ -467,7 +467,7 @@ func TestFindGroup(t *testing.T) { { rootResctrl, []string{"5", "6"}, - controlGroupDirectories, + true, false, filepath.Join(rootResctrl, "m1"), "", @@ -475,7 +475,7 @@ func TestFindGroup(t *testing.T) { { rootResctrl, []string{"11", "12"}, - controlGroupDirectories, + true, false, "", "", @@ -483,7 +483,7 @@ func TestFindGroup(t *testing.T) { { filepath.Join(rootResctrl, "m1", monGroupsDirName), []string{"5", "6"}, - monGroupDirectories, + false, true, "", "", @@ -491,7 +491,7 @@ func TestFindGroup(t *testing.T) { { filepath.Join(rootResctrl, "m1", monGroupsDirName), []string{"7", "8"}, - monGroupDirectories, + false, true, filepath.Join(rootResctrl, "m1", monGroupsDirName, "test"), "", @@ -499,14 +499,14 @@ func TestFindGroup(t *testing.T) { { filepath.Join(rootResctrl, "m1", monGroupsDirName), []string{"7"}, - monGroupDirectories, + false, true, "", "group should have container pids only", }, } for _, test := range testCases { - actual, err := findGroup(test.path, test.pids, test.skip, test.monGroup) + actual, err := findGroup(test.path, test.pids, test.includeGroup, test.exclusive) assert.Equal(t, test.expected, actual) checkError(t, err, test.err) } From 97987d8ebdc71151c2ed599f4425b9b99f294a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Fri, 20 Aug 2021 15:40:14 +0200 Subject: [PATCH 39/49] Avoid having ugly errors. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index 0686184f9d..2133e97bdb 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -54,7 +54,7 @@ func (c *collector) setup() error { if c.interval != noInterval { if err != nil { - klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals.", c.id, err) + klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err) } else { c.running = true } @@ -70,13 +70,13 @@ func (c *collector) setup() error { err = c.checkMonitoringGroup() if err != nil { c.running = false - klog.Errorf("Failed to check %q resctrl collector control group: %w \n Trying again in next intervals.", c.id, err) + klog.Errorf("Failed to check %q resctrl collector control group: %s \n Trying again in next intervals.", c.id, err) } } else { c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids) if err != nil { c.running = false - klog.Errorf("Failed to setup container %q resctrl collector: %w \n Trying again in next intervals.", c.id, err) + klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err) } } c.mu.Unlock() From e7628e7ae04d29825028e50c70ff87d0b201ea62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 25 Aug 2021 09:32:35 +0200 Subject: [PATCH 40/49] Use strings.HasPrefix(). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index 2133e97bdb..eb4635450b 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "path/filepath" + "strings" "sync" "time" @@ -157,13 +158,10 @@ func (c *collector) clear() error { // Not allowed to remove root or undefined resctrl directory. if c.id != rootContainer && c.resctrlPath != "" { // Remove only own prepared mon group. - base := filepath.Base(c.resctrlPath) - if len(base) > len(monGroupPrefix) { - if base[:len(monGroupPrefix)] == monGroupPrefix { - err := os.RemoveAll(c.resctrlPath) - if err != nil { - return fmt.Errorf("couldn't clear mon_group: %v", err) - } + if strings.HasPrefix(filepath.Base(c.resctrlPath), monGroupPrefix) { + err := os.RemoveAll(c.resctrlPath) + if err != nil { + return fmt.Errorf("couldn't clear mon_group: %v", err) } } } From 1db6ca26e7c82f6f7a65158a75b825436248415d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 25 Aug 2021 09:40:20 +0200 Subject: [PATCH 41/49] Add comments. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/resctrl/utils.go b/resctrl/utils.go index 10773324db..df151f0622 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -109,6 +109,8 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str return "", fmt.Errorf("couldn't obtain %q container pids: there is no pids in cgroup", containerName) } + // Firstly, find the control group to which the container belongs. + // Consider the root group. controlGroupPath, err := findGroup(rootResctrl, pids, true, false) if err != nil { return "", fmt.Errorf("%q %q: %q", noControlGroupFoundError, containerName, err) @@ -125,10 +127,14 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str // Prepare new one if not exists. if monGroupPath == "" { + // Remove leading prefix. + // e.g. /my/container -> my/container if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { containerName = containerName[1:] } + // Add own prefix and use `-` instead `/`. + // e.g. my/container -> cadvisor-my-container properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1)) monGroupPath = filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName) From fa9b5db97ccc7440b78bc62d5a7ba287ce95b7df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 25 Aug 2021 09:42:22 +0200 Subject: [PATCH 42/49] Rename variables. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index df151f0622..a441f7c209 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -189,10 +189,10 @@ func getAllProcessThreads(path string) ([]int, error) { return processThreads, err } - for _, file := range threadDirs { - pid, err := strconv.Atoi(file.Name()) + for _, dir := range threadDirs { + pid, err := strconv.Atoi(dir.Name()) if err != nil { - return nil, fmt.Errorf("couldn't parse %q file: %v", file.Name(), err) + return nil, fmt.Errorf("couldn't parse %q dir: %v", dir.Name(), err) } processThreads = append(processThreads, pid) } @@ -211,8 +211,8 @@ func findGroup(group string, pids []string, includeGroup bool, exclusive bool) ( availablePaths = append(availablePaths, group) } - filenames, err := ioutil.ReadDir(group) - for _, file := range filenames { + files, err := ioutil.ReadDir(group) + for _, file := range files { if _, ok := groupDirectories[file.Name()]; !ok { availablePaths = append(availablePaths, filepath.Join(group, file.Name())) } From b3f311bd56d9b2ffe858344d40eb91436977ed7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 25 Aug 2021 09:49:10 +0200 Subject: [PATCH 43/49] Fix test. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 6cb491b47c..6fa381f49f 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -425,7 +425,7 @@ func TestGetAllProcessThreads(t *testing.T) { { filepath.Join(mockedProcFs, "301", processTask), nil, - "couldn't parse \"incorrect\" file: strconv.Atoi: parsing \"incorrect\": invalid syntax", + "couldn't parse \"incorrect\" dir: strconv.Atoi: parsing \"incorrect\": invalid syntax", }, } From bbde6363dde55a9187e6c8382cb3f0b10e923108 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Wed, 25 Aug 2021 15:42:48 +0200 Subject: [PATCH 44/49] Use string map instead of int. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/utils.go | 16 ++++------------ resctrl/utils_test.go | 18 +++++++----------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index a441f7c209..23f03a3087 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -246,11 +246,7 @@ func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) { } any := false - for _, pidString := range pids { - pid, err := strconv.Atoi(pidString) - if err != nil { - return false, err - } + for _, pid := range pids { _, ok := tasks[pid] if !ok { // There are missing pids within group. @@ -273,8 +269,8 @@ func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) { } // readTasksFile returns pids map from given tasks path. -func readTasksFile(tasksPath string) (map[int]struct{}, error) { - tasks := make(map[int]struct{}) +func readTasksFile(tasksPath string) (map[string]struct{}, error) { + tasks := make(map[string]struct{}) tasksFile, err := os.Open(tasksPath) if err != nil { @@ -284,11 +280,7 @@ func readTasksFile(tasksPath string) (map[int]struct{}, error) { scanner := bufio.NewScanner(tasksFile) for scanner.Scan() { - id, err := strconv.Atoi(scanner.Text()) - if err != nil { - return tasks, fmt.Errorf("couldn't convert pids from %q path: %w", tasksPath, err) - } - tasks[id] = struct{}{} + tasks[scanner.Text()] = struct{}{} } if err := scanner.Err(); err != nil { diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 6fa381f49f..5c9fb142b6 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -671,28 +671,24 @@ func TestGetStats(t *testing.T) { func TestReadTasksFile(t *testing.T) { var testCases = []struct { tasksFile string - expected map[int]struct{} + expected map[string]struct{} err string }{ {"testing/tasks_two", - map[int]struct{}{ - 12: {}, - 77: {}, + map[string]struct{}{ + "12": {}, + "77": {}, }, "", }, {"testing/tasks_one", - map[int]struct{}{ - 2: {}, + map[string]struct{}{ + "2": {}, }, "", }, - {"testing/tasks_invalid", - map[int]struct{}{}, - "couldn't convert pids from \"testing/tasks_invalid\" path: strconv.Atoi: parsing \"\\x00*\": invalid syntax", - }, {"testing/tasks_empty", - map[int]struct{}{}, + map[string]struct{}{}, "", }, } From ea39458287226de725334d11f4c3cd04bd834fb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Thu, 9 Sep 2021 13:29:40 +0200 Subject: [PATCH 45/49] Now there is no need to use procps in Dockerfile. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- deploy/Dockerfile | 2 +- manager/manager.go | 4 ++-- resctrl/collector.go | 11 ++++++----- resctrl/collector_test.go | 6 +++--- resctrl/manager.go | 11 ++++++----- resctrl/manager_test.go | 19 ++++++++++++++----- resctrl/utils.go | 6 +++++- resctrl/utils_test.go | 4 ++-- 8 files changed, 39 insertions(+), 24 deletions(-) diff --git a/deploy/Dockerfile b/deploy/Dockerfile index 2af193bee1..4163ec4303 100644 --- a/deploy/Dockerfile +++ b/deploy/Dockerfile @@ -34,7 +34,7 @@ RUN ./build/build.sh FROM alpine:3.12 MAINTAINER dengnan@google.com vmarmol@google.com vishnuk@google.com jimmidyson@gmail.com stclair@google.com -RUN apk --no-cache add libc6-compat device-mapper findutils zfs ndctl procps && \ +RUN apk --no-cache add libc6-compat device-mapper findutils zfs ndctl && \ apk --no-cache add thin-provisioning-tools --repository http://dl-3.alpinelinux.org/alpine/edge/main/ && \ echo 'hosts: files mdns4_minimal [NOTFOUND=return] dns mdns4' >> /etc/nsswitch.conf && \ rm -rf /var/cache/apk/* diff --git a/manager/manager.go b/manager/manager.go index 5176ea1e33..27d38950d9 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -222,7 +222,7 @@ func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig return nil, err } - newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup, machineInfo.CPUVendorID) + newManager.resctrlManager, err = resctrl.NewManager(resctrlInterval, resctrl.Setup, machineInfo.CPUVendorID, inHostNamespace) if err != nil { klog.V(4).Infof("Cannot gather resctrl metrics: %v", err) } @@ -971,7 +971,7 @@ func (m *manager) createContainerLocked(containerName string, watchSource watche if m.includedMetrics.Has(container.ResctrlMetrics) { cont.resctrlCollector, err = m.resctrlManager.GetCollector(containerName, func() ([]string, error) { - return cont.getContainerPids(true) + return cont.getContainerPids(m.inHostNamespace) }, len(m.machineInfo.Topology)) if err != nil { klog.V(4).Infof("resctrl metrics will not be available for container %s: %s", cont.info.Name, err) diff --git a/resctrl/collector.go b/resctrl/collector.go index eb4635450b..960cd0a564 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -42,16 +42,17 @@ type collector struct { numberOfNUMANodes int vendorID string mu sync.Mutex + inHostNamespace bool } -func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int, vendorID string) *collector { +func newCollector(id string, getContainerPids func() ([]string, error), interval time.Duration, numberOfNUMANodes int, vendorID string, inHostNamespace bool) *collector { return &collector{id: id, interval: interval, getContainerPids: getContainerPids, numberOfNUMANodes: numberOfNUMANodes, - vendorID: vendorID, mu: sync.Mutex{}} + vendorID: vendorID, mu: sync.Mutex{}, inHostNamespace: inHostNamespace} } func (c *collector) setup() error { var err error - c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids) + c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace) if c.interval != noInterval { if err != nil { @@ -74,7 +75,7 @@ func (c *collector) setup() error { klog.Errorf("Failed to check %q resctrl collector control group: %s \n Trying again in next intervals.", c.id, err) } } else { - c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids) + c.resctrlPath, err = prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace) if err != nil { c.running = false klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err) @@ -95,7 +96,7 @@ func (c *collector) setup() error { } func (c *collector) checkMonitoringGroup() error { - newPath, err := prepareMonitoringGroup(c.id, c.getContainerPids) + newPath, err := prepareMonitoringGroup(c.id, c.getContainerPids, c.inHostNamespace) if err != nil { return fmt.Errorf("couldn't obtain mon_group path: %v", err) } diff --git a/resctrl/collector_test.go b/resctrl/collector_test.go index 95d8214ea7..55f7334881 100644 --- a/resctrl/collector_test.go +++ b/resctrl/collector_test.go @@ -41,7 +41,7 @@ func TestNewCollectorWithSetup(t *testing.T) { expectedID := "container" expectedResctrlPath := filepath.Join(rootResctrl, monGroupsDirName, fmt.Sprintf("%s-%s", monGroupPrefix, expectedID)) - collector := newCollector(expectedID, mockGetContainerPids, 0, 2, "") + collector := newCollector(expectedID, mockGetContainerPids, 0, 2, "", true) err := collector.setup() assert.NoError(t, err) @@ -59,7 +59,7 @@ func TestUpdateStats(t *testing.T) { processPath = mockProcFs() defer os.RemoveAll(processPath) - collector := newCollector("container", mockGetContainerPids, 0, 2, "") + collector := newCollector("container", mockGetContainerPids, 0, 2, "", true) err := collector.setup() assert.NoError(t, err) @@ -97,7 +97,7 @@ func TestDestroy(t *testing.T) { processPath = mockProcFs() defer os.RemoveAll(processPath) - collector := newCollector("container", mockGetContainerPids, 0, 2, "") + collector := newCollector("container", mockGetContainerPids, 0, 2, "", true) err := collector.setup() if err != nil { t.Fail() diff --git a/resctrl/manager.go b/resctrl/manager.go index 032daa2f43..8f9852e270 100644 --- a/resctrl/manager.go +++ b/resctrl/manager.go @@ -31,12 +31,13 @@ type Manager interface { type manager struct { stats.NoopDestroy - interval time.Duration - vendorID string + interval time.Duration + vendorID string + inHostNamespace bool } func (m *manager) GetCollector(containerName string, getContainerPids func() ([]string, error), numberOfNUMANodes int) (stats.Collector, error) { - collector := newCollector(containerName, getContainerPids, m.interval, numberOfNUMANodes, m.vendorID) + collector := newCollector(containerName, getContainerPids, m.interval, numberOfNUMANodes, m.vendorID, m.inHostNamespace) err := collector.setup() if err != nil { return &stats.NoopCollector{}, err @@ -45,7 +46,7 @@ func (m *manager) GetCollector(containerName string, getContainerPids func() ([] return collector, nil } -func NewManager(interval time.Duration, setup func() error, vendorID string) (Manager, error) { +func NewManager(interval time.Duration, setup func() error, vendorID string, inHostNamespace bool) (Manager, error) { err := setup() if err != nil { return &NoopManager{}, err @@ -58,7 +59,7 @@ func NewManager(interval time.Duration, setup func() error, vendorID string) (Ma return &NoopManager{}, errors.New("there are no monitoring features available") } - return &manager{interval: interval, vendorID: vendorID}, nil + return &manager{interval: interval, vendorID: vendorID, inHostNamespace: inHostNamespace}, nil } type NoopManager struct { diff --git a/resctrl/manager_test.go b/resctrl/manager_test.go index 644285facf..42f58bbd48 100644 --- a/resctrl/manager_test.go +++ b/resctrl/manager_test.go @@ -29,6 +29,7 @@ func TestNewManager(t *testing.T) { isResctrlInitialized bool enabledCMT bool enabledMBM bool + inHostNamespace bool err string expected Manager }{ @@ -36,27 +37,31 @@ func TestNewManager(t *testing.T) { true, true, false, + true, "", - &manager{interval: 0}, + &manager{interval: 0, inHostNamespace: true}, }, { true, false, true, + true, "", - &manager{interval: 0}, + &manager{interval: 0, inHostNamespace: true}, }, { true, true, true, + false, "", - &manager{interval: 0}, + &manager{interval: 0, inHostNamespace: false}, }, { false, true, false, + false, "the resctrl isn't initialized", &NoopManager{}, }, @@ -64,6 +69,7 @@ func TestNewManager(t *testing.T) { false, false, true, + false, "the resctrl isn't initialized", &NoopManager{}, }, @@ -71,6 +77,7 @@ func TestNewManager(t *testing.T) { false, true, true, + true, "the resctrl isn't initialized", &NoopManager{}, }, @@ -78,6 +85,7 @@ func TestNewManager(t *testing.T) { false, false, false, + true, "the resctrl isn't initialized", &NoopManager{}, }, @@ -85,6 +93,7 @@ func TestNewManager(t *testing.T) { true, false, false, + true, "there are no monitoring features available", &NoopManager{}, }, @@ -97,7 +106,7 @@ func TestNewManager(t *testing.T) { enabledMBM = test.enabledMBM return nil } - got, err := NewManager(0, setup, "") + got, err := NewManager(0, setup, "", test.inHostNamespace) assert.Equal(t, got, test.expected) checkError(t, err, test.err) } @@ -121,7 +130,7 @@ func TestGetCollector(t *testing.T) { enabledMBM = true return nil } - manager, err := NewManager(0, setup, "") + manager, err := NewManager(0, setup, "", true) assert.NoError(t, err) _, err = manager.GetCollector(expectedID, mockGetContainerPids, 2) diff --git a/resctrl/utils.go b/resctrl/utils.go index 23f03a3087..d305228ade 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -95,7 +95,7 @@ func Setup() error { return nil } -func prepareMonitoringGroup(containerName string, getContainerPids func() ([]string, error)) (string, error) { +func prepareMonitoringGroup(containerName string, getContainerPids func() ([]string, error), inHostNamespace bool) (string, error) { if containerName == rootContainer { return rootResctrl, nil } @@ -143,6 +143,10 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err) } + if !inHostNamespace { + processPath = "/rootfs/proc" + } + for _, pid := range pids { processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask)) if err != nil { diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 5c9fb142b6..184f716cdf 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -327,7 +327,7 @@ func TestPrepareMonitoringGroup(t *testing.T) { } for _, test := range testCases { - actual, err := prepareMonitoringGroup(test.container, test.getContainerPids) + actual, err := prepareMonitoringGroup(test.container, test.getContainerPids, true) assert.Equal(t, test.expected, actual) checkError(t, err, test.err) } @@ -659,7 +659,7 @@ func TestGetStats(t *testing.T) { } for _, test := range testCases { - containerPath, _ := prepareMonitoringGroup(test.container, mockGetContainerPids) + containerPath, _ := prepareMonitoringGroup(test.container, mockGetContainerPids, true) mockResctrlMonData(containerPath) actual, err := getIntelRDTStatsFrom(containerPath, "") checkError(t, err, test.err) From b76ee155244d855b6c0ede3864fcd0f98d856d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Thu, 9 Sep 2021 15:26:51 +0200 Subject: [PATCH 46/49] Update to go 1.17. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- resctrl/collector_test.go | 1 + resctrl/manager_test.go | 1 + resctrl/utils.go | 1 + resctrl/utils_test.go | 1 + 4 files changed, 4 insertions(+) diff --git a/resctrl/collector_test.go b/resctrl/collector_test.go index 55f7334881..e8b665eb5e 100644 --- a/resctrl/collector_test.go +++ b/resctrl/collector_test.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux // Copyright 2021 Google Inc. All Rights Reserved. diff --git a/resctrl/manager_test.go b/resctrl/manager_test.go index 42f58bbd48..854a89ce84 100644 --- a/resctrl/manager_test.go +++ b/resctrl/manager_test.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux // Copyright 2021 Google Inc. All Rights Reserved. diff --git a/resctrl/utils.go b/resctrl/utils.go index d305228ade..f7931d22eb 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux // Copyright 2021 Google Inc. All Rights Reserved. diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 184f716cdf..ad97f65715 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -1,3 +1,4 @@ +//go:build linux // +build linux // Copyright 2021 Google Inc. All Rights Reserved. From 8c734a03c1bc0e803b5b5cbbca03882ea0135bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Thu, 9 Sep 2021 17:12:16 +0200 Subject: [PATCH 47/49] Add information about possible race condition. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- docs/runtime_options.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/runtime_options.md b/docs/runtime_options.md index 10bef08f52..74aa845303 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -425,6 +425,7 @@ should be a human readable string that will become a metric name. ## Resctrl To gain metrics, cAdvisor creates own monitoring groups with `cadvisor` prefix. +Resctrl file system is not hierarchical like cgroups, so it's recommended to set `--docker_only` flag to avoid race conditions and unexpected behaviours. ``` --resctrl_interval=0: Resctrl mon groups updating interval. Zero value disables updating mon groups. From fb4b9c8faea012a823bed006749f8c974dee4ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Fri, 10 Sep 2021 13:07:48 +0200 Subject: [PATCH 48/49] Add warning when docker_only is not set. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- container/raw/factory.go | 4 ++-- resctrl/manager.go | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/container/raw/factory.go b/container/raw/factory.go index ca0cc14376..177d43a37d 100644 --- a/container/raw/factory.go +++ b/container/raw/factory.go @@ -29,7 +29,7 @@ import ( "k8s.io/klog/v2" ) -var dockerOnly = flag.Bool("docker_only", false, "Only report docker containers in addition to root stats") +var DockerOnly = flag.Bool("docker_only", false, "Only report docker containers in addition to root stats") var disableRootCgroupStats = flag.Bool("disable_root_cgroup_stats", false, "Disable collecting root Cgroup stats") type rawFactory struct { @@ -69,7 +69,7 @@ func (f *rawFactory) CanHandleAndAccept(name string) (bool, bool, error) { if name == "/" { return true, true, nil } - if *dockerOnly && f.rawPrefixWhiteList[0] == "" { + if *DockerOnly && f.rawPrefixWhiteList[0] == "" { return true, false, nil } for _, prefix := range f.rawPrefixWhiteList { diff --git a/resctrl/manager.go b/resctrl/manager.go index 96082283b2..672e0c74e0 100644 --- a/resctrl/manager.go +++ b/resctrl/manager.go @@ -22,6 +22,9 @@ import ( "errors" "time" + "k8s.io/klog/v2" + + "github.com/google/cadvisor/container/raw" "github.com/google/cadvisor/stats" ) @@ -60,6 +63,10 @@ func NewManager(interval time.Duration, setup func() error, vendorID string, inH return &NoopManager{}, errors.New("there are no monitoring features available") } + if !*raw.DockerOnly { + klog.Warning("--docker_only should be set when collecting Resctrl metrics! See the runtime docs.") + } + return &manager{interval: interval, vendorID: vendorID, inHostNamespace: inHostNamespace}, nil } From 00b42cfdbfa7c6652aec3fd0e311ea155703b6ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Szulik?= Date: Fri, 10 Sep 2021 13:08:26 +0200 Subject: [PATCH 49/49] Fix typo. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Paweł Szulik --- docs/runtime_options.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/runtime_options.md b/docs/runtime_options.md index 74aa845303..c0b4f89e0c 100644 --- a/docs/runtime_options.md +++ b/docs/runtime_options.md @@ -425,7 +425,7 @@ should be a human readable string that will become a metric name. ## Resctrl To gain metrics, cAdvisor creates own monitoring groups with `cadvisor` prefix. -Resctrl file system is not hierarchical like cgroups, so it's recommended to set `--docker_only` flag to avoid race conditions and unexpected behaviours. +Resctrl file system is not hierarchical like cgroups, so users should set `--docker_only` flag to avoid race conditions and unexpected behaviours. ``` --resctrl_interval=0: Resctrl mon groups updating interval. Zero value disables updating mon groups.