Skip to content

Commit

Permalink
feat(qrm): fix 3
Browse files Browse the repository at this point in the history
  • Loading branch information
nightmeng committed Aug 20, 2024
1 parent 179885d commit 22eca8e
Show file tree
Hide file tree
Showing 8 changed files with 114 additions and 115 deletions.
2 changes: 1 addition & 1 deletion pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -1192,7 +1192,7 @@ func (p *DynamicPolicy) checkNormalShareCoresCpuResource(req *pluginapi.Resource
}
for _, allocation := range podEntry {
// shareCoresAllocated should involve both main and sidecar containers
if state.CheckShared(allocation) && !allocation.CheckNumaBinding() {
if state.CheckShared(allocation) && !state.CheckNUMABinding(allocation) {
shareCoresAllocated += p.getContainerRequestedCores(allocation)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func (p *DynamicPolicy) sharedCoresHintHandler(ctx context.Context,
return p.sharedCoresWithNUMABindingHintHandler(ctx, req)
}

// TODO: support sidecar follow main container for normal share cores in future
if req.ContainerType == pluginapi.ContainerType_MAIN {
ok, err := p.checkNormalShareCoresCpuResource(req)
if err != nil {
Expand Down Expand Up @@ -544,7 +545,10 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingHintHandler(_ context.Context,
if allocationInfo != nil {
hints = cpuutil.RegenerateHints(allocationInfo, reqInt)

// regenerateHints failed. need to clear container record and re-calculate.
// clear the current container and regenerate machine state in follow cases:
// 1. regenerateHints failed.
// 2. the container is inplace update resizing.
// hints it as a new container
if hints == nil || util.PodInplaceUpdateResizing(req) {
machineState, err = p.clearContainerAndRegenerateMachineState(podEntries, req)
if err != nil {
Expand All @@ -565,7 +569,7 @@ func (p *DynamicPolicy) sharedCoresWithNUMABindingHintHandler(_ context.Context,
if numaset.Size() != 1 {
general.Errorf("pod: %s/%s, container: %s is snb, but its numa set size is %d",
req.PodNamespace, req.PodName, req.ContainerName, numaset.Size())
return nil, fmt.Errorf("snb port not suport cross numa")
return nil, fmt.Errorf("snb port not support cross numa")
}
nodeID := numaset.ToSliceInt()[0]
availableCPUQuantity := machineState[nodeID].GetAvailableCPUQuantity(p.reservedCPUs)
Expand Down
2 changes: 1 addition & 1 deletion pkg/agent/qrm-plugins/cpu/dynamicpolicy/policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5196,7 +5196,7 @@ func TestSNBAdmitWithSidecarReallocate(t *testing.T) {

dynamicPolicy.podAnnotationKeptKeys = []string{
consts.PodAnnotationMemoryEnhancementNumaBinding,
consts.PodAnnotationVPAResizingKey,
consts.PodAnnotationInplaceUpdateResizingKey,
consts.PodAnnotationAggregatedRequestsKey,
}

Expand Down
6 changes: 0 additions & 6 deletions pkg/agent/qrm-plugins/cpu/dynamicpolicy/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,6 @@ func (ai *AllocationInfo) CheckMainContainer() bool {
return ai.ContainerType == pluginapi.ContainerType_MAIN.String()
}

// CheckNumaBinding returns true if the AllocationInfo is for pod with numa-binding enhancement
func (ai *AllocationInfo) CheckNumaBinding() bool {
return ai.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] ==
consts.PodAnnotationMemoryEnhancementNumaBindingEnable
}

// GetAllocationResultNUMASet returns numaSet parsed from TopologyAwareAssignments
func (ai *AllocationInfo) GetAllocationResultNUMASet() machine.CPUSet {
if ai == nil {
Expand Down
94 changes: 47 additions & 47 deletions pkg/agent/qrm-plugins/cpu/dynamicpolicy/vpa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func TestSNBVPA(t *testing.T) {
dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, tmpDir)
as.Nil(err)

dynamicPolicy.podAnnotationKeptKeys = []string{consts.PodAnnotationVPAResizingKey}
dynamicPolicy.podAnnotationKeptKeys = []string{consts.PodAnnotationInplaceUpdateResizingKey}

testName := "test"

Expand Down Expand Up @@ -132,9 +132,9 @@ func TestSNBVPA(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
},
}

Expand All @@ -157,9 +157,9 @@ func TestSNBVPA(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
},
}

Expand All @@ -183,9 +183,9 @@ func TestSNBVPA(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
},
Hint: resizeHints1[0],
}
Expand Down Expand Up @@ -223,7 +223,7 @@ func TestSNBVPAWithSidecar(t *testing.T) {
as.Nil(err)

dynamicPolicy.podAnnotationKeptKeys = []string{
consts.PodAnnotationVPAResizingKey,
consts.PodAnnotationInplaceUpdateResizingKey,
consts.PodAnnotationAggregatedRequestsKey,
}

Expand Down Expand Up @@ -404,10 +404,10 @@ func TestSNBVPAWithSidecar(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"4\"}",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"4\"}",
},
}

Expand All @@ -430,10 +430,10 @@ func TestSNBVPAWithSidecar(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"4\"}",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"4\"}",
},
Hint: resizeMainContainerHints[0],
}
Expand Down Expand Up @@ -490,10 +490,10 @@ func TestSNBVPAWithSidecar(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
},
}

Expand All @@ -515,10 +515,10 @@ func TestSNBVPAWithSidecar(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
},
}
_, err = dynamicPolicy.Allocate(context.Background(), resizeSidecarContainerReq)
Expand All @@ -539,10 +539,10 @@ func TestSNBVPAWithSidecar(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
},
}
resizeMainContainerResp, err = dynamicPolicy.GetTopologyHints(context.Background(), resizeMainContainerReq)
Expand All @@ -563,10 +563,10 @@ func TestSNBVPAWithSidecar(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationMemoryEnhancementKey: `{"numa_binding": "true", "numa_exclusive": "false"}`,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
},
Hint: resizeMainContainerResp.ResourceHints[string(v1.ResourceCPU)].Hints[0],
}
Expand Down Expand Up @@ -634,7 +634,7 @@ func TestNormalShareVPA(t *testing.T) {
dynamicPolicy, err := getTestDynamicPolicyWithInitialization(cpuTopology, tmpDir)
as.Nil(err)

dynamicPolicy.podAnnotationKeptKeys = []string{consts.PodAnnotationMemoryEnhancementNumaBinding, consts.PodAnnotationVPAResizingKey}
dynamicPolicy.podAnnotationKeptKeys = []string{consts.PodAnnotationMemoryEnhancementNumaBinding, consts.PodAnnotationInplaceUpdateResizingKey}
dynamicPolicy.transitionPeriod = 10 * time.Millisecond

testName := "test"
Expand Down Expand Up @@ -702,8 +702,8 @@ func TestNormalShareVPA(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
},
}

Expand All @@ -725,8 +725,8 @@ func TestNormalShareVPA(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
},
}

Expand Down Expand Up @@ -779,7 +779,7 @@ func TestNormalShareVPAWithSidecar(t *testing.T) {

dynamicPolicy.podAnnotationKeptKeys = []string{
consts.PodAnnotationMemoryEnhancementNumaBinding,
consts.PodAnnotationVPAResizingKey,
consts.PodAnnotationInplaceUpdateResizingKey,
consts.PodAnnotationAggregatedRequestsKey,
}

Expand Down Expand Up @@ -914,9 +914,9 @@ func TestNormalShareVPAWithSidecar(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"4\"}",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"4\"}",
},
}

Expand Down Expand Up @@ -982,9 +982,9 @@ func TestNormalShareVPAWithSidecar(t *testing.T) {
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
},
Annotations: map[string]string{
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationVPAResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
consts.PodAnnotationQoSLevelKey: consts.PodAnnotationQoSLevelSharedCores,
consts.PodAnnotationInplaceUpdateResizingKey: "true",
consts.PodAnnotationAggregatedRequestsKey: "{\"cpu\":\"5\"}",
},
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,14 +135,10 @@ func (p *DynamicPolicy) numaBindingAllocationHandler(ctx context.Context,
"memoryReq(bytes)", podAggregatedRequest,
"currentResult(bytes)", allocationInfo.AggregatedQuantity)

if util.PodInplaceUpdateResizing(req) {
// remove the main container of this pod (the main container involve the whole pod requests)
containerEntries := podEntries[req.PodUid]
delete(containerEntries, req.ContainerName)
} else {
// TODO ---> jianyu confirm
delete(podEntries, req.PodUid)
}
// remove the main container of this pod (the main container involve the whole pod requests), and the
// sidecar container request in state is zero.
containerEntries := podEntries[req.PodUid]
delete(containerEntries, req.ContainerName)

var stateErr error
memoryState, stateErr = state.GenerateMemoryStateFromPodEntries(p.state.GetMachineInfo(), podEntries, p.state.GetReservedMemory())
Expand Down
15 changes: 10 additions & 5 deletions pkg/agent/qrm-plugins/memory/dynamicpolicy/policy_hint_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ func (p *DynamicPolicy) sharedCoresHintHandler(ctx context.Context,
return p.numaBindingHintHandler(ctx, req)
}

// TODO: support sidecar follow main container for normal share cores in future
if req.ContainerType == pluginapi.ContainerType_MAIN {
ok, err := p.checkNormalShareCoresResource(req)
if err != nil {
Expand Down Expand Up @@ -129,10 +130,13 @@ func (p *DynamicPolicy) numaBindingHintHandler(_ context.Context,
}
hints = regenerateHints(uint64(podAggregatedRequest), allocationInfo, util.PodInplaceUpdateResizing(req))

// regenerateHints failed, and we need to clear container record and re-calculate.
// clear the current container and regenerate machine state in follow cases:
// 1. regenerateHints failed.
// 2. the container is inplace update resizing.
// hints it as a new container
if hints == nil || util.PodInplaceUpdateResizing(req) {
var err error
resourcesMachineState, err = p.clearPodAndRegenerateMachineState(req)
resourcesMachineState, err = p.clearContainerAndRegenerateMachineState(req)
if err != nil {
general.Errorf("pod: %s/%s, container: %s GenerateMachineStateFromPodEntries failed with error: %v",
req.PodNamespace, req.PodName, req.ContainerName, err)
Expand Down Expand Up @@ -215,11 +219,12 @@ func (p *DynamicPolicy) numaBindingHintHandler(_ context.Context,
return util.PackResourceHintsResponse(req, string(v1.ResourceMemory), hints)
}

func (p *DynamicPolicy) clearPodAndRegenerateMachineState(req *pluginapi.ResourceRequest) (state.NUMANodeResourcesMap, error) {
func (p *DynamicPolicy) clearContainerAndRegenerateMachineState(req *pluginapi.ResourceRequest) (state.NUMANodeResourcesMap, error) {
podResourceEntries := p.state.GetPodResourceEntries()

for resourceName, podEntries := range podResourceEntries {
if resourceName == v1.ResourceMemory {
for _, podEntries := range podResourceEntries {
delete(podEntries[req.PodUid], req.ContainerName)
if len(podEntries[req.PodUid]) == 0 {
delete(podEntries, req.PodUid)
}
}
Expand Down
Loading

0 comments on commit 22eca8e

Please sign in to comment.