Skip to content

Commit

Permalink
Merge pull request #710 from gary-lgy/gary/fix-numa-balancer-cgroupv1
Browse files Browse the repository at this point in the history
fix(qrm): fix printing of numa mem migrate container stats
luomingmeng authored Nov 20, 2024

Verified

This commit was signed with the committer’s verified signature.
ozline ozlinex
2 parents bb80416 + e2eee22 commit 32a3253
Showing 5 changed files with 168 additions and 26 deletions.
Original file line number Diff line number Diff line change
@@ -570,10 +570,10 @@ func (p *DynamicPolicy) handleNumaMemoryBalance(_ *config.Configuration,
}

type containerMigrateStat struct {
containerID string
cgroupPath string
rssBefore uint64
rssAfter uint64
ContainerID string
CgroupPath string
RssBefore uint64
RssAfter uint64
}

func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.NumaMemoryBalanceAdvice) error {
@@ -618,8 +618,8 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu

key := keyFunc(containerInfo.PodUID, containerInfo.ContainerName)
containerStats[key] = &containerMigrateStat{
containerID: containerID,
cgroupPath: memoryAbsCGPath,
ContainerID: containerID,
CgroupPath: memoryAbsCGPath,
}
}

@@ -633,16 +633,16 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
}

stats := containerStats[containerKey]
rssBefore, err := getAnonOnNumaFunc(stats.cgroupPath, advice.SourceNuma)
rssBefore, err := getAnonOnNumaFunc(stats.CgroupPath, advice.SourceNuma)
if err != nil {
general.Errorf("getAnonOnNumaFunc failed for container[%v/%v] numa [%v],err: %v",
containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, err)
}
stats.rssBefore = rssBefore
stats.RssBefore = rssBefore

containerNumaSet := machine.NewCPUSet(containerInfo.DestNumaList...)
if containerNumaSet.Contains(destNuma) {
err = MigratePagesForContainer(ctx, containerInfo.PodUID, stats.containerID, p.topology.NumNUMANodes,
err = MigratePagesForContainer(ctx, containerInfo.PodUID, stats.ContainerID, p.topology.NumNUMANodes,
machine.NewCPUSet(advice.SourceNuma), machine.NewCPUSet(destNuma))
if err != nil {
general.Errorf("MigratePagesForContainer failed for container[%v/%v] source_numa [%v],dest_numa [%v],err: %v",
@@ -652,12 +652,12 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
general.Infof("skip migrate container %v/%v memory from %v to %v", containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, advice.DestNumaList)
}

rssAfter, err := getAnonOnNumaFunc(stats.cgroupPath, advice.SourceNuma)
rssAfter, err := getAnonOnNumaFunc(stats.CgroupPath, advice.SourceNuma)
if err != nil {
general.Errorf("getAnonOnNumaFunc failed for container[%v/%v] numa [%v],err: %v",
containerInfo.PodUID, containerInfo.ContainerName, advice.SourceNuma, err)
}
stats.rssAfter = rssAfter
stats.RssAfter = rssAfter

if rssAfter > rssBefore {
general.Infof("rssAfter: %d greater than rssBefore: %d", rssAfter, rssBefore)
@@ -673,8 +673,8 @@ func (p *DynamicPolicy) doNumaMemoryBalance(ctx context.Context, advice types.Nu
containerStatJson, _ := json.Marshal(containerStats)
general.Infof("containerStats: %+v", string(containerStatJson))
for _, stat := range containerStats {
totalRSSAfter += stat.rssAfter
totalRSSBefore += stat.rssBefore
totalRSSAfter += stat.RssAfter
totalRSSBefore += stat.RssBefore
}
general.Infof("numa memory balance migration succeed, advice total rss: %v, threshold: %v, sourceNuma:%v, targetNuma:%v, total rss before:%v, total rss after: %v",
advice.TotalRSS, advice.Threshold, advice.SourceNuma, destNuma, totalRSSBefore, totalRSSAfter)
23 changes: 15 additions & 8 deletions pkg/util/cgroup/common/cgroup_linux.go
Original file line number Diff line number Diff line change
@@ -80,23 +80,30 @@ func GetCgroupParamInt(cgroupPath, cgroupFile string) (int64, error) {
return res, nil
}

// ParseCgroupNumaValue parse cgroup numa stat files like memory.numa_stat, the format is like "anon N0=1686843392 N1=1069957120 N2=316747776 N3=163962880"
func ParseCgroupNumaValue(cgroupPath, cgroupFile string) (map[string]map[int]uint64, error) {
fileName := filepath.Join(cgroupPath, cgroupFile)
content, err := ioutil.ReadFile(fileName)
if err != nil {
return nil, err
}
/*
ParseCgroupNumaValue parse cgroup numa stat files like `memory.numa_stat`.
cgroup v1 format:
<counter>=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
hierarchical_<counter>=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
cgroup v2 format:
<counter> N0=<bytes in node 0> N1=<bytes in node 1> ...
*/
func ParseCgroupNumaValue(content string) (map[string]map[int]uint64, error) {
result := make(map[string]map[int]uint64)
lines := strings.Split(string(content), "\n")
lines := strings.Split(content, "\n")
for _, line := range lines {
cols := strings.Fields(line)
if len(cols) <= 1 {
continue
}

key := cols[0]
if index := strings.Index(key, "="); index != -1 {
// For v1 format, remove the suffix after "="
key = key[:index]
}
numaInfo := make(map[int]uint64)
for _, pair := range cols[1:] {
parts := strings.Split(pair, "=")
124 changes: 124 additions & 0 deletions pkg/util/cgroup/common/cgroup_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
//go:build linux
// +build linux

/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package common

import (
"reflect"
"testing"
)

func TestParseCgroupNumaValue(t *testing.T) {
t.Parallel()

type args struct {
content string
}
tests := []struct {
name string
args args
want map[string]map[int]uint64
wantErr bool
}{
{
name: "cgroupv1 format",
args: args{
content: `total=7587426 N0=92184 N1=21339 N2=104047 N3=7374122
file=70686 N0=5353 N1=3096 N2=12817 N3=51844
anon=7516740 N0=86831 N1=18243 N2=91230 N3=7322278
unevictable=0 N0=0 N1=0 N2=0 N3=0`,
},
want: map[string]map[int]uint64{
"total": {
0: 92184,
1: 21339,
2: 104047,
3: 7374122,
},
"file": {
0: 5353,
1: 3096,
2: 12817,
3: 51844,
},
"anon": {
0: 86831,
1: 18243,
2: 91230,
3: 7322278,
},
"unevictable": {
0: 0,
1: 0,
2: 0,
3: 0,
},
},
wantErr: false,
},
{
name: "cgroupv2 format",
args: args{
content: `anon N0=1629990912 N1=65225723904
file N0=1892352 N1=37441536
unevictable N0=0 N1=0`,
},
want: map[string]map[int]uint64{
"anon": {
0: 1629990912,
1: 65225723904,
},
"file": {
0: 1892352,
1: 37441536,
},
"unevictable": {
0: 0,
1: 0,
},
},
wantErr: false,
},
{
name: "wrong separator",
args: args{
content: `anon N0:1629990912 N1:65225723904
file N0:1892352 N1:37441536
unevictable N0:0 N1:0`,
},
wantErr: true,
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

got, err := ParseCgroupNumaValue(tt.args.content)
if (err != nil) != tt.wantErr {
t.Errorf("ParseCgroupNumaValue() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ParseCgroupNumaValue() = %v, want %v", got, tt.want)
return
}
})
}
}
10 changes: 8 additions & 2 deletions pkg/util/cgroup/manager/v1/fs_linux.go
Original file line number Diff line number Diff line change
@@ -239,9 +239,15 @@ func (m *manager) GetMemory(absCgroupPath string) (*common.MemoryStats, error) {
}

func (m *manager) GetNumaMemory(absCgroupPath string) (map[int]*common.MemoryNumaMetrics, error) {
numaStat, err := common.ParseCgroupNumaValue(absCgroupPath, "memory.numa_stat")
const fileName = "memory.numa_stat"
content, err := libcgroups.ReadFile(absCgroupPath, fileName)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to read %s: %w", fileName, err)
}

numaStat, err := common.ParseCgroupNumaValue(content)
if err != nil {
return nil, fmt.Errorf("failed to parse numa stat: %w", err)
}

pageSize := uint64(syscall.Getpagesize())
11 changes: 8 additions & 3 deletions pkg/util/cgroup/manager/v2/fs_linux.go
Original file line number Diff line number Diff line change
@@ -282,10 +282,15 @@ func (m *manager) GetMemory(absCgroupPath string) (*common.MemoryStats, error) {
}

func (m *manager) GetNumaMemory(absCgroupPath string) (map[int]*common.MemoryNumaMetrics, error) {
numaStat, err := common.ParseCgroupNumaValue(absCgroupPath, "memory.numa_stat")
general.Infof("get cgroup %+v numa stat %+v", absCgroupPath, numaStat)
const fileName = "memory.numa_stat"
content, err := libcgroups.ReadFile(absCgroupPath, fileName)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to read %s: %w", fileName, err)
}

numaStat, err := common.ParseCgroupNumaValue(content)
if err != nil {
return nil, fmt.Errorf("failed to parse numa stat: %w", err)
}

result := make(map[int]*common.MemoryNumaMetrics)

0 comments on commit 32a3253

Please sign in to comment.