Skip to content

Commit

Permalink
Modify numa-aware memory headroom policy
Browse files Browse the repository at this point in the history
Numa-aware memory headroom was calculated by
    reclaimable numa free - 2 * watermark_reserved - config_reserved + reclaimed-core requests
Double watermark reservation reserved the memory too much. Besides, portion of inactive file should be
considered as cold pages on the system, which could be charged to memory headroom since it's easy to
be reclaimed by kernel reclaim mechanism.

Therefore, numa-ware memory headroom will be calculated by
    reclaimable numa free - watermark_reserved - config_reserved + cacheBasedRatio * numaInactiveFile + reclaimed-core requests
  • Loading branch information
LuyaoZhong committed Nov 30, 2023
1 parent 578c02f commit 91b6bc8
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ func (p *PolicyNUMAAware) Update() (err error) {
reclaimableMemory float64
data metric.MetricData
)
dynamicConfig := p.conf.GetDynamicConfiguration()

availNUMAs := p.metaServer.CPUDetails.NUMANodes()

Expand Down Expand Up @@ -121,6 +122,13 @@ func (p *PolicyNUMAAware) Update() (err error) {
reclaimableMemory += data.Value
general.InfoS("reclaimable numa memory free", "numaID", numaID, "numaFree", general.FormatMemoryQuantity(data.Value))

data, err = p.metaServer.GetNumaMetric(numaID, consts.MetricMemInactiveFileNuma)
if err != nil {
return err
}
reclaimableMemory += data.Value * dynamicConfig.CacheBasedRatio
general.InfoS("reclaimable numa inactive file", "numaID", numaID, "numaInactiveFile", general.FormatMemoryQuantity(data.Value))

data, err = p.metaServer.GetNumaMetric(numaID, consts.MetricMemTotalNuma)
if err != nil {
return err
Expand All @@ -139,12 +147,13 @@ func (p *PolicyNUMAAware) Update() (err error) {
general.InfoS("Can not get system watermark scale factor")
return err
}
// calculate system factor with double scale_factor to make kswapd less happened
systemWatermarkReserved := availNUMATotal * 2 * watermarkScaleFactor.Value / 10000
// reserve memory for watermark_scale_factor to make kswapd less happened
systemWatermarkReserved := availNUMATotal * watermarkScaleFactor.Value / 10000

general.InfoS("total memory reclaimable",
"reclaimableMemory", general.FormatMemoryQuantity(reclaimableMemory),
"ReservedForAllocate", general.FormatMemoryQuantity(p.essentials.ReservedForAllocate),
"ReservedForWatermark", general.FormatMemoryQuantity(systemWatermarkReserved),
"ResourceUpperBound", general.FormatMemoryQuantity(p.essentials.ResourceUpperBound))
p.memoryHeadroom = general.Clamp(reclaimableMemory-p.essentials.ReservedForAllocate-systemWatermarkReserved, 0, p.essentials.ResourceUpperBound)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/kubewharf/katalyst-api/pkg/consts"
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache"
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types"
"github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/adminqos/reclaimedresource/memoryheadroom"
pkgconsts "github.com/kubewharf/katalyst-core/pkg/consts"
"github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric"
"github.com/kubewharf/katalyst-core/pkg/metrics"
Expand All @@ -44,10 +45,11 @@ func TestPolicyNUMAAware(t *testing.T) {
now := time.Now()

type fields struct {
podList []*v1.Pod
containers []*types.ContainerInfo
essentials types.ResourceEssentials
setFakeMetric func(store *metric.FakeMetricsFetcher)
podList []*v1.Pod
containers []*types.ContainerInfo
memoryHeadroomConfiguration *memoryheadroom.MemoryHeadroomConfiguration
essentials types.ResourceEssentials
setFakeMetric func(store *metric.FakeMetricsFetcher)
}
tests := []struct {
name string
Expand Down Expand Up @@ -101,16 +103,23 @@ func TestPolicyNUMAAware(t *testing.T) {
ResourceUpperBound: 400 << 30,
ReservedForAllocate: 4 << 30,
},
memoryHeadroomConfiguration: &memoryheadroom.MemoryHeadroomConfiguration{
MemoryUtilBasedConfiguration: &memoryheadroom.MemoryUtilBasedConfiguration{
CacheBasedRatio: 0.5,
},
},
setFakeMetric: func(store *metric.FakeMetricsFetcher) {
store.SetNodeMetric(pkgconsts.MetricMemScaleFactorSystem, utilmetric.MetricData{Value: 500, Time: &now})
store.SetNumaMetric(0, pkgconsts.MetricMemTotalNuma, utilmetric.MetricData{Value: 250 << 30, Time: &now})
store.SetNumaMetric(1, pkgconsts.MetricMemTotalNuma, utilmetric.MetricData{Value: 250 << 30, Time: &now})
store.SetNumaMetric(0, pkgconsts.MetricMemFreeNuma, utilmetric.MetricData{Value: 100 << 30, Time: &now})
store.SetNumaMetric(1, pkgconsts.MetricMemFreeNuma, utilmetric.MetricData{Value: 100 << 30, Time: &now})
store.SetNumaMetric(0, pkgconsts.MetricMemInactiveFileNuma, utilmetric.MetricData{Value: 50 << 30, Time: &now})
store.SetNumaMetric(1, pkgconsts.MetricMemInactiveFileNuma, utilmetric.MetricData{Value: 50 << 30, Time: &now})
},
},
wantErr: false,
want: resource.MustParse("146Gi"),
want: resource.MustParse("221Gi"),
},
{
name: "normal: reclaimed_cores containers only",
Expand All @@ -122,6 +131,11 @@ func TestPolicyNUMAAware(t *testing.T) {
consts.PodAnnotationQoSLevelReclaimedCores, nil,
nil, 20<<30),
},
memoryHeadroomConfiguration: &memoryheadroom.MemoryHeadroomConfiguration{
MemoryUtilBasedConfiguration: &memoryheadroom.MemoryUtilBasedConfiguration{
CacheBasedRatio: 0.5,
},
},
essentials: types.ResourceEssentials{
EnableReclaim: true,
ResourceUpperBound: 400 << 30,
Expand All @@ -133,10 +147,12 @@ func TestPolicyNUMAAware(t *testing.T) {
store.SetNumaMetric(1, pkgconsts.MetricMemTotalNuma, utilmetric.MetricData{Value: 250 << 30, Time: &now})
store.SetNumaMetric(0, pkgconsts.MetricMemFreeNuma, utilmetric.MetricData{Value: 100 << 30, Time: &now})
store.SetNumaMetric(1, pkgconsts.MetricMemFreeNuma, utilmetric.MetricData{Value: 100 << 30, Time: &now})
store.SetNumaMetric(0, pkgconsts.MetricMemInactiveFileNuma, utilmetric.MetricData{Value: 50 << 30, Time: &now})
store.SetNumaMetric(1, pkgconsts.MetricMemInactiveFileNuma, utilmetric.MetricData{Value: 50 << 30, Time: &now})
},
},
wantErr: false,
want: resource.MustParse("166Gi"),
want: resource.MustParse("241Gi"),
},
{
name: "normal: reclaimed_cores containers with numa-exclusive containers",
Expand All @@ -156,6 +172,11 @@ func TestPolicyNUMAAware(t *testing.T) {
types.TopologyAwareAssignment{
0: machine.NewCPUSet(0),
}, 30<<30)},
memoryHeadroomConfiguration: &memoryheadroom.MemoryHeadroomConfiguration{
MemoryUtilBasedConfiguration: &memoryheadroom.MemoryUtilBasedConfiguration{
CacheBasedRatio: 0.5,
},
},
essentials: types.ResourceEssentials{
EnableReclaim: true,
ResourceUpperBound: 400 << 30,
Expand All @@ -167,10 +188,12 @@ func TestPolicyNUMAAware(t *testing.T) {
store.SetNumaMetric(1, pkgconsts.MetricMemTotalNuma, utilmetric.MetricData{Value: 250 << 30, Time: &now})
store.SetNumaMetric(0, pkgconsts.MetricMemFreeNuma, utilmetric.MetricData{Value: 100 << 30, Time: &now})
store.SetNumaMetric(1, pkgconsts.MetricMemFreeNuma, utilmetric.MetricData{Value: 100 << 30, Time: &now})
store.SetNumaMetric(0, pkgconsts.MetricMemInactiveFileNuma, utilmetric.MetricData{Value: 50 << 30, Time: &now})
store.SetNumaMetric(1, pkgconsts.MetricMemInactiveFileNuma, utilmetric.MetricData{Value: 50 << 30, Time: &now})
},
},
wantErr: false,
want: resource.MustParse("91Gi"),
want: resource.MustParse("128.5Gi"),
},
}
for _, tt := range tests {
Expand All @@ -184,6 +207,7 @@ func TestPolicyNUMAAware(t *testing.T) {
defer os.RemoveAll(sfDir)

conf := generateTestConfiguration(t, ckDir, sfDir)
conf.GetDynamicConfiguration().MemoryHeadroomConfiguration = tt.fields.memoryHeadroomConfiguration

metricsFetcher := metric.NewFakeMetricsFetcher(metrics.DummyMetrics{})
metaCache, err := metacache.NewMetaCacheImp(conf, metricspool.DummyMetricsEmitterPool{}, metricsFetcher)
Expand Down
13 changes: 7 additions & 6 deletions pkg/consts/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,13 @@ const (

// System numa metrics
const (
MetricMemTotalNuma = "mem.total.numa"
MetricMemUsedNuma = "mem.used.numa"
MetricMemFreeNuma = "mem.free.numa"
MetricMemShmemNuma = "mem.shmem.numa"
MetricMemAvailableNuma = "mem.available.numa"
MetricMemFilepageNuma = "mem.filepage.numa"
MetricMemTotalNuma = "mem.total.numa"
MetricMemUsedNuma = "mem.used.numa"
MetricMemFreeNuma = "mem.free.numa"
MetricMemShmemNuma = "mem.shmem.numa"
MetricMemAvailableNuma = "mem.available.numa"
MetricMemFilepageNuma = "mem.filepage.numa"
MetricMemInactiveFileNuma = "mem.inactivefile.numa"

MetricMemBandwidthNuma = "mem.bandwidth.numa"
MetricMemBandwidthMaxNuma = "mem.bandwidth.max.numa"
Expand Down
2 changes: 2 additions & 0 deletions pkg/metaserver/agent/metric/malachite/fetcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,8 @@ func (m *MalachiteMetricsFetcher) processSystemNumaData(systemMemoryData *types.
utilmetric.MetricData{Value: float64(numa.MemAvailable << 10), Time: &updateTime})
m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemFilepageNuma,
utilmetric.MetricData{Value: float64(numa.MemFilePages << 10), Time: &updateTime})
m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemInactiveFileNuma,
utilmetric.MetricData{Value: float64(numa.MemInactiveFile << 10), Time: &updateTime})

m.metricStore.SetNumaMetric(numa.ID, consts.MetricMemBandwidthNuma,
utilmetric.MetricData{Value: numa.MemReadBandwidthMB/1024.0 + numa.MemWriteBandwidthMB/1024.0, Time: &updateTime})
Expand Down
1 change: 1 addition & 0 deletions pkg/metaserver/agent/metric/malachite/types/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ type Numa struct {
MemShmem uint64 `json:"mem_shmem"`
MemAvailable uint64 `json:"mem_available"`
MemFilePages uint64 `json:"mem_file_pages"`
MemInactiveFile uint64 `json:"mem_inactive_file"`
MemMaxBandwidthMB float64 `json:"mem_mx_bandwidth_mb"`
MemReadBandwidthMB float64 `json:"mem_read_bandwidth_mb"`
MemReadLatency float64 `json:"mem_read_latency"`
Expand Down

0 comments on commit 91b6bc8

Please sign in to comment.