Skip to content

Commit

Permalink
enhancement(sysadvisor): export region reclaimable info as non-reclai…
Browse files Browse the repository at this point in the history
…m when dedicated_cores can not be co-located with reclaimed_cores

Signed-off-by: linzhecheng <linzhecheng@bytedance.com>
  • Loading branch information
cheney-lin authored and waynepeking348 committed Nov 7, 2023
1 parent c9953e6 commit ad438bc
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ type QoSRegionBase struct {
// If enableBorweinModel is set, borweinController will update target indicators by model inference.
enableBorweinModel bool
borweinController *borweinctrl.BorweinController

// enableReclaim returns true if the resources of region can be reclaimed to supply for reclaimed_cores
enableReclaim func() bool
}

// NewQoSRegionBase returns a base qos region instance with common region methods
Expand Down Expand Up @@ -144,6 +147,7 @@ func NewQoSRegionBase(name string, ownerPoolName string, regionType types.QoSReg
if r.enableBorweinModel {
r.borweinController = borweinctrl.NewBorweinController(name, regionType, ownerPoolName, conf, metaReader)
}
r.enableReclaim = r.EnableReclaim

klog.Infof("[qosaware-cpu] created region [%v/%v/%v]", r.Name(), r.Type(), r.OwnerPoolName())

Expand Down Expand Up @@ -318,7 +322,7 @@ func (r *QoSRegionBase) GetProvisionPolicy() (policyTopPriority types.CPUProvisi
policyTopPriority = r.provisionPolicies[0].name
}

if !r.EnableReclaim {
if !r.enableReclaim() {
policyInUse = types.CPUProvisionPolicyNonReclaim
} else {
policyInUse = r.provisionPolicyNameInUse
Expand All @@ -336,7 +340,7 @@ func (r *QoSRegionBase) GetHeadRoomPolicy() (policyTopPriority types.CPUHeadroom
policyTopPriority = r.headroomPolicies[0].name
}

if !r.EnableReclaim {
if !r.enableReclaim() {
policyInUse = types.CPUHeadroomPolicyNonReclaim
} else {
policyInUse = r.headroomPolicyNameInUse
Expand Down Expand Up @@ -566,3 +570,7 @@ func (r *QoSRegionBase) updateStatus() {
}
}
}

func (r *QoSRegionBase) EnableReclaim() bool {
return r.ResourceEssentials.EnableReclaim
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,23 @@ limitations under the License.
package region

import (
"context"
"fmt"

"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/klog/v2"
"k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1"

workloadapis "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1"
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/cpu/dynamicpolicy/state"
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache"
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/qosaware/resource/helper"
"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types"
"github.com/kubewharf/katalyst-core/pkg/config"
"github.com/kubewharf/katalyst-core/pkg/consts"
"github.com/kubewharf/katalyst-core/pkg/metaserver"
"github.com/kubewharf/katalyst-core/pkg/metrics"
"github.com/kubewharf/katalyst-core/pkg/util/general"
"github.com/kubewharf/katalyst-core/pkg/util/machine"
)

Expand Down Expand Up @@ -57,10 +62,36 @@ func NewQoSRegionDedicatedNumaExclusive(ci *types.ContainerInfo, conf *config.Co
r.indicatorCurrentGetters = map[string]types.IndicatorCurrentGetter{
string(workloadapis.TargetIndicatorNameCPI): r.getPodCPICurrent,
}
r.enableReclaim = r.EnableReclaim

return r
}

func (r *QoSRegionDedicatedNumaExclusive) getPodUID() (string, error) {
if len(r.podSet) != 1 {
return "", fmt.Errorf("more than one pod are assgined to this policy")
}
for podUID := range r.podSet {
return podUID, nil
}
return "", fmt.Errorf("should never get here")
}

func (r *QoSRegionDedicatedNumaExclusive) EnableReclaim() bool {
podUID, err := r.getPodUID()
if err != nil {
general.ErrorS(err, "getPodUID failed")
return false
}

enableReclaim, err := helper.PodEnableReclaim(context.Background(), r.metaServer, podUID, r.ResourceEssentials.EnableReclaim)
if err != nil {
general.ErrorS(err, "failed to check PodEnableReclaim", "name", r.name)
return false
}
return enableReclaim
}

func (r *QoSRegionDedicatedNumaExclusive) TryUpdateProvision() {
r.Lock()
defer r.Unlock()
Expand Down

0 comments on commit ad438bc

Please sign in to comment.