Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(qrm): fix printing of numa mem migrate container stats #710

Merged
merged 2 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -570,10 +570,10 @@ func (p *DynamicPolicy) handleNumaMemoryBalance(_ *config.Configuration,
}

type containerMigrateStat struct {
containerID string
cgroupPath string
rssBefore uint64
rssAfter uint64
ContainerID string
CgroupPath string
RssBefore uint64
RssAfter uint64
}

func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.NumaMemoryBalanceAdvice) error {
Expand Down Expand Up @@ -618,8 +618,8 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu

key := keyFunc(containerInfo.PodUID, containerInfo.ContainerName)
containerStats[key] = &containerMigrateStat{
containerID: containerID,
cgroupPath: memoryAbsCGPath,
ContainerID: containerID,
CgroupPath: memoryAbsCGPath,
}
}

Expand All @@ -633,16 +633,16 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
}

stats := containerStats[containerKey]
rssBefore, err := getAnonOnNumaFunc(stats.cgroupPath, advice.SourceNuma)
rssBefore, err := getAnonOnNumaFunc(stats.CgroupPath, advice.SourceNuma)
if err != nil {
general.Errorf("getAnonOnNumaFunc failed for container[%v/%v] numa [%v],err: %v",
containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, err)
}
stats.rssBefore = rssBefore
stats.RssBefore = rssBefore

containerNumaSet := machine.NewCPUSet(containerInfo.DestNumaList...)
if containerNumaSet.Contains(destNuma) {
err = MigratePagesForContainer(ctx, containerInfo.PodUID, stats.containerID, p.topology.NumNUMANodes,
err = MigratePagesForContainer(ctx, containerInfo.PodUID, stats.ContainerID, p.topology.NumNUMANodes,
machine.NewCPUSet(advice.SourceNuma), machine.NewCPUSet(destNuma))
if err != nil {
general.Errorf("MigratePagesForContainer failed for container[%v/%v] source_numa [%v],dest_numa [%v],err: %v",
Expand All @@ -652,12 +652,12 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
general.Infof("skip migrate container %v/%v memory from %v to %v", containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, advice.DestNumaList)
}

rssAfter, err := getAnonOnNumaFunc(stats.cgroupPath, advice.SourceNuma)
rssAfter, err := getAnonOnNumaFunc(stats.CgroupPath, advice.SourceNuma)
if err != nil {
general.Errorf("getAnonOnNumaFunc failed for container[%v/%v] numa [%v],err: %v",
containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, err)
}
stats.rssAfter = rssAfter
stats.RssAfter = rssAfter

if rssAfter > rssBefore {
general.Infof("rssAfter: %d greater than rssBefore: %d", rssAfter, rssBefore)
Expand All @@ -673,8 +673,8 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
containerStatJson, _ := json.Marshal(containerStats)
general.Infof("containerStats: %+v", string(containerStatJson))
for _, stat := range containerStats {
totalRSSAfter += stat.rssAfter
totalRSSBefore += stat.rssBefore
totalRSSAfter += stat.RssAfter
totalRSSBefore += stat.RssBefore
}
general.Infof("numa memory balance migration succeed, advice total rss: %v, threshold: %v, sourceNuma:%v, targetNuma:%v, total rss before:%v, total rss after: %v",
advice.TotalRSS, advice.Threshold, advice.SourceNuma, destNuma, totalRSSBefore, totalRSSAfter)
Expand Down
23 changes: 15 additions & 8 deletions pkg/util/cgroup/common/cgroup_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,30 @@ func GetCgroupParamInt(cgroupPath, cgroupFile string) (int64, error) {
return res, nil
}

// ParseCgroupNumaValue parse cgroup numa stat files like memory.numa_stat, the format is like "anon N0=1686843392 N1=1069957120 N2=316747776 N3=163962880"
func ParseCgroupNumaValue(cgroupPath, cgroupFile string) (map[string]map[int]uint64, error) {
fileName := filepath.Join(cgroupPath, cgroupFile)
content, err := ioutil.ReadFile(fileName)
if err != nil {
return nil, err
}
/*
ParseCgroupNumaValue parse cgroup numa stat files like `memory.numa_stat`.

cgroup v1 format:
<counter>=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
hierarchical_<counter>=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...

cgroup v2 format:
<counter> N0=<bytes in node 0> N1=<bytes in node 1> ...
*/
func ParseCgroupNumaValue(content string) (map[string]map[int]uint64, error) {
result := make(map[string]map[int]uint64)
lines := strings.Split(string(content), "\n")
lines := strings.Split(content, "\n")
for _, line := range lines {
cols := strings.Fields(line)
if len(cols) <= 1 {
continue
}

key := cols[0]
if index := strings.Index(key, "="); index != -1 {
// For v1 format, remove the suffix after "="
key = key[:index]
}
numaInfo := make(map[int]uint64)
for _, pair := range cols[1:] {
parts := strings.Split(pair, "=")
Expand Down
124 changes: 124 additions & 0 deletions pkg/util/cgroup/common/cgroup_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
//go:build linux
// +build linux

/*
Copyright 2022 The Katalyst Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package common

import (
"reflect"
"testing"
)

func TestParseCgroupNumaValue(t *testing.T) {
t.Parallel()

type args struct {
content string
}
tests := []struct {
name string
args args
want map[string]map[int]uint64
wantErr bool
}{
{
name: "cgroupv1 format",
args: args{
content: `total=7587426 N0=92184 N1=21339 N2=104047 N3=7374122
file=70686 N0=5353 N1=3096 N2=12817 N3=51844
anon=7516740 N0=86831 N1=18243 N2=91230 N3=7322278
unevictable=0 N0=0 N1=0 N2=0 N3=0`,
},
want: map[string]map[int]uint64{
"total": {
0: 92184,
1: 21339,
2: 104047,
3: 7374122,
},
"file": {
0: 5353,
1: 3096,
2: 12817,
3: 51844,
},
"anon": {
0: 86831,
1: 18243,
2: 91230,
3: 7322278,
},
"unevictable": {
0: 0,
1: 0,
2: 0,
3: 0,
},
},
wantErr: false,
},
{
name: "cgroupv2 format",
args: args{
content: `anon N0=1629990912 N1=65225723904
file N0=1892352 N1=37441536
unevictable N0=0 N1=0`,
},
want: map[string]map[int]uint64{
"anon": {
0: 1629990912,
1: 65225723904,
},
"file": {
0: 1892352,
1: 37441536,
},
"unevictable": {
0: 0,
1: 0,
},
},
wantErr: false,
},
{
name: "wrong separator",
args: args{
content: `anon N0:1629990912 N1:65225723904
file N0:1892352 N1:37441536
unevictable N0:0 N1:0`,
},
wantErr: true,
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

got, err := ParseCgroupNumaValue(tt.args.content)
if (err != nil) != tt.wantErr {
t.Errorf("ParseCgroupNumaValue() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ParseCgroupNumaValue() = %v, want %v", got, tt.want)
return
}
})
}
}
10 changes: 8 additions & 2 deletions pkg/util/cgroup/manager/v1/fs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,15 @@ func (m *manager) GetMemory(absCgroupPath string) (*common.MemoryStats, error) {
}

func (m *manager) GetNumaMemory(absCgroupPath string) (map[int]*common.MemoryNumaMetrics, error) {
numaStat, err := common.ParseCgroupNumaValue(absCgroupPath, "memory.numa_stat")
const fileName = "memory.numa_stat"
content, err := libcgroups.ReadFile(absCgroupPath, fileName)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to read %s: %w", fileName, err)
}

numaStat, err := common.ParseCgroupNumaValue(content)
if err != nil {
return nil, fmt.Errorf("failed to parse numa stat: %w", err)
}

pageSize := uint64(syscall.Getpagesize())
Expand Down
11 changes: 8 additions & 3 deletions pkg/util/cgroup/manager/v2/fs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,15 @@ func (m *manager) GetMemory(absCgroupPath string) (*common.MemoryStats, error) {
}

func (m *manager) GetNumaMemory(absCgroupPath string) (map[int]*common.MemoryNumaMetrics, error) {
numaStat, err := common.ParseCgroupNumaValue(absCgroupPath, "memory.numa_stat")
general.Infof("get cgroup %+v numa stat %+v", absCgroupPath, numaStat)
const fileName = "memory.numa_stat"
content, err := libcgroups.ReadFile(absCgroupPath, fileName)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to read %s: %w", fileName, err)
}

numaStat, err := common.ParseCgroupNumaValue(content)
if err != nil {
return nil, fmt.Errorf("failed to parse numa stat: %w", err)
}

result := make(map[int]*common.MemoryNumaMetrics)
Expand Down
Loading