Skip to content

Commit

Permalink
chore(qrm): prefer binding reclaimed_cores as RNB, prioritizing memor…
Browse files Browse the repository at this point in the history
…y availability over CPU.
  • Loading branch information
luomingmeng committed Dec 16, 2024
1 parent 3831122 commit 1f1df86
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 109 deletions.
58 changes: 19 additions & 39 deletions pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_hint_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,22 +410,17 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqFloat floa
// Identify candidate NUMA nodes for RNB (Reclaimed NUMA Binding) cores
// This includes both RNB NUMA nodes and NUMA nodes that can shrink from the non-RNB set
candidateNUMANodes := p.filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonActualBindingReclaimedRequestedQuantity,
nonActualBindingReclaimedNUMAHeadroom, nonActualBindingNUMAs, numaHeadroomState)
nonActualBindingReclaimedNUMAHeadroom, nonActualBindingNUMAs, machineState, numaHeadroomState)

// Sort them based on the other qos numa binding pods and their headroom
p.sortCandidateNUMANodesForReclaimed(candidateNUMANodes, machineState, numaHeadroomState)

candidateLeft, maxCPULeft := p.calculateNUMANodesLeft(candidateNUMANodes, machineState, numaHeadroomState, reqFloat)
candidateLeft := p.calculateNUMANodesLeft(candidateNUMANodes, machineState, numaHeadroomState, reqFloat)

hints := &pluginapi.ListOfTopologyHints{}

nonBindingReclaimedLeft := nonActualBindingReclaimedNUMAHeadroom - nonActualBindingReclaimedRequestedQuantity - reqFloat
if maxCPULeft >= 0 {
p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateNUMANodes, candidateLeft,
0)
} else if nonBindingReclaimedLeft <= 0 {
p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateNUMANodes, candidateLeft,
nonBindingReclaimedLeft)
p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateLeft)

// If no valid hints are generated and this is not a single-NUMA scenario, return an error
if len(hints.Hints) == 0 && !(p.metaServer.NumNUMANodes == 1 && nonActualBindingNUMAs.Size() > 0) {
return nil, errNoAvailableCPUHints
}

general.InfoS("calculate numa hints for reclaimed cores success",
Expand All @@ -434,14 +429,7 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqFloat floa
"nonActualBindingReclaimedNUMAHeadroom", nonActualBindingReclaimedNUMAHeadroom,
"numaHeadroomState", numaHeadroomState,
"candidateNUMANodes", candidateNUMANodes,
"nonBindingReclaimedLeft", nonBindingReclaimedLeft,
"candidateLeft", candidateLeft,
"maxCPULeft", maxCPULeft)

// Finally, add non-RNB NUMA nodes as preferred hints, but these will only be selected if no RNB NUMA nodes meet the requirements
if nonActualBindingNUMAs.Size() > 0 {
util.PopulatePreferHintsByNUMANodes(hints, nonActualBindingNUMAs.ToSliceInt())
}
"candidateLeft", candidateLeft)

return map[string]*pluginapi.ListOfTopologyHints{
string(v1.ResourceCPU): hints,
Expand Down Expand Up @@ -972,6 +960,7 @@ func (p *DynamicPolicy) populateHintsByAvailableNUMANodes(numaNodes []int,
func (p *DynamicPolicy) filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonBindingReclaimedRequestedQuantity,
nonBindingNUMAsCPUQuantity float64,
nonBindingNUMAs machine.CPUSet,
machineState state.NUMANodeMap,
numaHeadroomState map[int]float64,
) []int {
candidateNUMANodes := make([]int, 0, len(numaHeadroomState))
Expand All @@ -981,14 +970,19 @@ func (p *DynamicPolicy) filterNUMANodesByNonBindingReclaimedRequestedQuantity(no
}
}

// Sort candidate NUMA nodes based on the other qos numa binding pods and their headroom
p.sortCandidateNUMANodesForReclaimed(candidateNUMANodes, machineState, numaHeadroomState)

nonBindingNUMAs = nonBindingNUMAs.Clone()
filteredNUMANodes := make([]int, 0, len(candidateNUMANodes))
for _, nodeID := range candidateNUMANodes {
if nonBindingNUMAs.Contains(nodeID) {
allocatableCPUQuantity := numaHeadroomState[nodeID]
// take this non-binding NUMA for candidate reclaimed_cores with numa_binding,
// won't cause non-actual numa binding reclaimed_cores in short supply
if cpuutil.CPUIsSufficient(nonBindingReclaimedRequestedQuantity, nonBindingNUMAsCPUQuantity-allocatableCPUQuantity) {
if cpuutil.CPUIsSufficient(nonBindingReclaimedRequestedQuantity, nonBindingNUMAsCPUQuantity-allocatableCPUQuantity) || nonBindingNUMAs.Size() > 1 {
filteredNUMANodes = append(filteredNUMANodes, nodeID)
nonBindingNUMAs = nonBindingNUMAs.Difference(machine.NewCPUSet(nodeID))
} else {
general.Infof("filter out NUMA: %d since taking it will cause normal reclaimed_cores in short supply;"+
" nonBindingNUMAsCPUQuantity: %.3f, targetNUMAAllocatableCPUQuantity: %.3f, nonBindingReclaimedRequestedQuantity: %.3f",
Expand Down Expand Up @@ -1025,27 +1019,21 @@ func (p *DynamicPolicy) sortCandidateNUMANodesForReclaimed(numaNodes []int,
func (p *DynamicPolicy) calculateNUMANodesLeft(numaNodes []int,
machineState state.NUMANodeMap,
numaHeadroomState map[int]float64, reqFloat float64,
) (map[int]float64, float64) {
) map[int]float64 {
numaNodesCPULeft := make(map[int]float64, len(numaNodes))
maxLeft := -math.MaxFloat64
for _, nodeID := range numaNodes {
allocatedQuantity := state.GetRequestedQuantityFromPodEntries(machineState[nodeID].PodEntries,
state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding),
p.getContainerRequestedCores)
availableCPUQuantity := numaHeadroomState[nodeID] - allocatedQuantity
numaNodesCPULeft[nodeID] = availableCPUQuantity - reqFloat
if availableCPUQuantity > maxLeft {
maxLeft = availableCPUQuantity
}
}
return numaNodesCPULeft, maxLeft
return numaNodesCPULeft
}

func (p *DynamicPolicy) populateBestEffortHintsByAvailableNUMANodes(
hints *pluginapi.ListOfTopologyHints,
numaNodes []int,
candidateLeft map[int]float64,
minLeft float64,
) {
type nodeHint struct {
nodeID int
Expand All @@ -1054,17 +1042,9 @@ func (p *DynamicPolicy) populateBestEffortHintsByAvailableNUMANodes(

var nodeHints []nodeHint
// Collect nodes that meet the requirement
for _, nodeID := range numaNodes {
curLeft := candidateLeft[nodeID]

// Skip this NUMA node if it doesn't greater than non-binding reclaim NUMA headroom left
if curLeft < minLeft {
general.Warningf("Skipping NUMA: %d, insufficient left CPUs: %.3f", nodeID, curLeft)
continue
}

for nodeID := range candidateLeft {
// Collect node and its available left CPU
nodeHints = append(nodeHints, nodeHint{nodeID: nodeID, curLeft: curLeft})
nodeHints = append(nodeHints, nodeHint{nodeID: nodeID, curLeft: candidateLeft[nodeID]})
}

// Sort nodes by available resources (curLeft) in descending order
Expand Down
26 changes: 17 additions & 9 deletions pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2749,7 +2749,7 @@ func TestGetTopologyHints(t *testing.T) {
numaHeadroom: map[int]float64{
0: 2,
1: 1,
2: 2,
2: 2.5,
3: 3,
},
req: &pluginapi.ResourceRequest{
Expand Down Expand Up @@ -2986,10 +2986,6 @@ func TestGetTopologyHints(t *testing.T) {
Nodes: []uint64{1},
Preferred: true,
},
{
Nodes: []uint64{0, 1, 2, 3},
Preferred: true,
},
},
},
},
Expand All @@ -3008,7 +3004,7 @@ func TestGetTopologyHints(t *testing.T) {
numaHeadroom: map[int]float64{
0: 2,
1: 1,
2: 2,
2: 2.5,
3: 3,
},
req: &pluginapi.ResourceRequest{
Expand Down Expand Up @@ -3230,7 +3226,19 @@ func TestGetTopologyHints(t *testing.T) {
string(v1.ResourceCPU): {
Hints: []*pluginapi.TopologyHint{
{
Nodes: []uint64{0, 1, 2, 3},
Nodes: []uint64{3},
Preferred: true,
},
{
Nodes: []uint64{2},
Preferred: true,
},
{
Nodes: []uint64{0},
Preferred: true,
},
{
Nodes: []uint64{1},
Preferred: true,
},
},
Expand All @@ -3251,7 +3259,7 @@ func TestGetTopologyHints(t *testing.T) {
numaHeadroom: map[int]float64{
0: 2,
1: 1,
2: 2,
2: 2.5,
3: 3,
},
req: &pluginapi.ResourceRequest{
Expand Down Expand Up @@ -3481,7 +3489,7 @@ func TestGetTopologyHints(t *testing.T) {
Preferred: true,
},
{
Nodes: []uint64{1, 2, 3},
Nodes: []uint64{0},
Preferred: true,
},
},
Expand Down
76 changes: 31 additions & 45 deletions pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package dynamicpolicy
import (
"context"
"fmt"
"math"
"sort"

v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -481,22 +480,17 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqInt int64,
// Identify candidate NUMA nodes for RNB (Reclaimed NUMA Binding) cores
// This includes both RNB NUMA nodes and NUMA nodes that can shrink from the non-RNB set
candidateNUMANodes := p.filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonActualBindingReclaimedRequestedQuantity,
nonActualBindingReclaimedNUMAHeadroom, nonActualBindingNUMAs, numaHeadroomState)
nonActualBindingReclaimedNUMAHeadroom, nonActualBindingNUMAs, machineState, numaHeadroomState)

// Sort candidate NUMA nodes based on the other qos numa binding pods and their headroom
p.sortCandidateNUMANodesForReclaimed(candidateNUMANodes, machineState, numaHeadroomState)

candidateLeft, maxMemoryLeft := p.calculateNUMANodesLeft(candidateNUMANodes, machineState, numaHeadroomState, reqInt)
candidateLeft := p.calculateNUMANodesLeft(candidateNUMANodes, machineState, numaHeadroomState, reqInt)

hints := &pluginapi.ListOfTopologyHints{}

nonBindingReclaimedLeft := nonActualBindingReclaimedNUMAHeadroom - nonActualBindingReclaimedRequestedQuantity - reqInt
if maxMemoryLeft >= 0 {
p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateNUMANodes, candidateLeft,
0)
} else if nonBindingReclaimedLeft <= 0 {
p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateNUMANodes, candidateLeft,
nonBindingReclaimedLeft)
p.populateBestEffortHintsByAvailableNUMANodes(hints, candidateLeft)

// If no valid hints are generated and this is not a single-NUMA scenario, return an error
if len(hints.Hints) == 0 && !(p.metaServer.NumNUMANodes == 1 && nonActualBindingNUMAs.Size() > 0) {
return nil, errNoAvailableMemoryHints
}

general.InfoS("calculate numa hints for reclaimed cores success",
Expand All @@ -505,14 +499,7 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqInt int64,
"nonActualNUMABindingReclaimedNUMAHeadroom", nonActualBindingReclaimedNUMAHeadroom,
"numaHeadroomState", numaHeadroomState,
"candidateNUMANodes", candidateNUMANodes,
"nonBindingReclaimedLeft", nonBindingReclaimedLeft,
"candidateLeft", candidateLeft,
"maxMemoryLeft", maxMemoryLeft)

// Finally, add non-RNB NUMA nodes as preferred hints, but these will only be selected if no RNB NUMA nodes meet the requirements
if nonActualBindingNUMAs.Size() > 0 {
util.PopulatePreferHintsByNUMANodes(hints, nonActualBindingNUMAs.ToSliceInt())
}
"candidateLeft", candidateLeft)

return map[string]*pluginapi.ListOfTopologyHints{
string(v1.ResourceMemory): hints,
Expand All @@ -522,26 +509,20 @@ func (p *DynamicPolicy) calculateHintsForNUMABindingReclaimedCores(reqInt int64,
func (p *DynamicPolicy) calculateNUMANodesLeft(numaNodes []int,
machineState state.NUMANodeMap,
numaHeadroomState map[int]int64, req int64,
) (map[int]int64, int64) {
) map[int]int64 {
numaNodesCPULeft := make(map[int]int64, len(numaNodes))
maxLeft := int64(math.MinInt64)
for _, nodeID := range numaNodes {
allocatedQuantity := state.GetRequestedQuantityFromPodEntries(machineState[nodeID].PodEntries,
state.WrapAllocationMetaFilter((*commonstate.AllocationMeta).CheckReclaimedActualNUMABinding))
availableCPUQuantity := numaHeadroomState[nodeID] - allocatedQuantity
numaNodesCPULeft[nodeID] = availableCPUQuantity - req
if availableCPUQuantity > maxLeft {
maxLeft = availableCPUQuantity
}
}
return numaNodesCPULeft, maxLeft
return numaNodesCPULeft
}

func (p *DynamicPolicy) populateBestEffortHintsByAvailableNUMANodes(
hints *pluginapi.ListOfTopologyHints,
numaNodes []int,
candidateLeft map[int]int64,
minLeft int64,
) {
type nodeHint struct {
nodeID int
Expand All @@ -550,17 +531,9 @@ func (p *DynamicPolicy) populateBestEffortHintsByAvailableNUMANodes(

var nodeHints []nodeHint
// Collect nodes that meet the requirement
for _, nodeID := range numaNodes {
curLeft := candidateLeft[nodeID]

// Skip this NUMA node if it doesn't greater than non-binding reclaim NUMA headroom left
if curLeft < minLeft {
general.Warningf("Skipping NUMA: %d, insufficient left CPUs: %d", nodeID, curLeft)
continue
}

// Collect node and its available left cpu
nodeHints = append(nodeHints, nodeHint{nodeID: nodeID, curLeft: curLeft})
for nodeID := range candidateLeft {
// Collect node and its available left memory
nodeHints = append(nodeHints, nodeHint{nodeID: nodeID, curLeft: candidateLeft[nodeID]})
}

// Sort nodes by available resources (curLeft) in descending order
Expand All @@ -572,10 +545,17 @@ func (p *DynamicPolicy) populateBestEffortHintsByAvailableNUMANodes(
hintList := make([]*pluginapi.TopologyHint, 0, len(nodeHints))
// Add sorted hints to the hint list
for _, nh := range nodeHints {
hintList = append(hintList, &pluginapi.TopologyHint{
Nodes: []uint64{uint64(nh.nodeID)},
Preferred: true,
})
if nh.curLeft < 0 {
hintList = append(hintList, &pluginapi.TopologyHint{
Nodes: []uint64{uint64(nh.nodeID)},
Preferred: false,
})
} else {
hintList = append(hintList, &pluginapi.TopologyHint{
Nodes: []uint64{uint64(nh.nodeID)},
Preferred: true,
})
}
}

// Update the hints map
Expand Down Expand Up @@ -626,6 +606,7 @@ func (p *DynamicPolicy) sortCandidateNUMANodesForReclaimed(candidates []int,
func (p *DynamicPolicy) filterNUMANodesByNonBindingReclaimedRequestedQuantity(nonBindingReclaimedRequestedQuantity,
nonBindingNUMAsMemoryQuantity int64,
nonBindingNUMAs machine.CPUSet,
machineState state.NUMANodeMap,
numaHeadroomState map[int]int64,
) []int {
candidateNUMANodes := make([]int, 0, len(numaHeadroomState))
Expand All @@ -635,14 +616,19 @@ func (p *DynamicPolicy) filterNUMANodesByNonBindingReclaimedRequestedQuantity(no
}
}

// Sort candidate NUMA nodes based on the other qos numa binding pods and their headroom
p.sortCandidateNUMANodesForReclaimed(candidateNUMANodes, machineState, numaHeadroomState)

nonBindingNUMAs = nonBindingNUMAs.Clone()
filteredNUMANodes := make([]int, 0, len(candidateNUMANodes))
for _, nodeID := range candidateNUMANodes {
if nonBindingNUMAs.Contains(nodeID) {
allocatableMemoryQuantity := numaHeadroomState[nodeID]
// take this non-binding NUMA for candidate reclaimed_cores with numa_binding,
// won't cause non-actual numa binding reclaimed_cores in short supply
if nonBindingReclaimedRequestedQuantity <= nonBindingNUMAsMemoryQuantity-allocatableMemoryQuantity {
if nonBindingReclaimedRequestedQuantity <= nonBindingNUMAsMemoryQuantity-allocatableMemoryQuantity || nonBindingNUMAs.Size() > 1 {
filteredNUMANodes = append(filteredNUMANodes, nodeID)
nonBindingNUMAs = nonBindingNUMAs.Difference(machine.NewCPUSet(nodeID))
} else {
general.Infof("filter out NUMA: %d since taking it will cause normal reclaimed_cores in short supply;"+
" nonBindingNUMAsMemoryQuantity: %d, allocatableMemoryQuantity: %d, nonBindingReclaimedRequestedQuantity: %d",
Expand Down
16 changes: 0 additions & 16 deletions pkg/agent/qrm-plugins/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -356,19 +356,3 @@ func GetPodAggregatedRequestResource(req *pluginapi.ResourceRequest) (int, float
return 0, 0, fmt.Errorf("not support resource name: %s", req.ResourceName)
}
}

// PopulatePreferHintsByNUMANodes populates prefer hints for given numa nodes
func PopulatePreferHintsByNUMANodes(
hints *pluginapi.ListOfTopologyHints,
numaNodes []int,
) {
nodes := make([]uint64, 0, len(numaNodes))
for _, nodeID := range numaNodes {
nodes = append(nodes, uint64(nodeID))
}

hints.Hints = append(hints.Hints, &pluginapi.TopologyHint{
Nodes: nodes,
Preferred: true,
})
}

0 comments on commit 1f1df86

Please sign in to comment.