Skip to content

Commit

Permalink
enhancement(spd): set default baseline ratio by qos level
Browse files Browse the repository at this point in the history
Signed-off-by: linzhecheng <linzhecheng@bytedance.com>
  • Loading branch information
cheney-lin authored and waynepeking348 committed Nov 23, 2023
1 parent fc8b6aa commit a310c49
Show file tree
Hide file tree
Showing 15 changed files with 325 additions and 48 deletions.
1 change: 1 addition & 0 deletions cmd/katalyst-controller/app/controller/spd.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ func StartSPDController(ctx context.Context, controlCtx *katalyst.GenericContext
conf.GenericConfiguration,
conf.GenericControllerConfiguration,
conf.ControllersConfiguration.SPDConfig,
conf.QoSConfiguration,
extraConf)
if err != nil {
klog.Errorf("failed to new spd controller")
Expand Down
7 changes: 6 additions & 1 deletion cmd/katalyst-controller/app/options/spd.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ type SPDOptions struct {
SPDWorkloadGVResources []string
SPDPodLabelIndexerKeys []string
IndicatorPlugins []string
BaselinePercent map[string]int64
}

// NewSPDOptions creates a new Options with a default config.
func NewSPDOptions() *SPDOptions {
return &SPDOptions{
ResyncPeriod: time.Second * 30,
ResyncPeriod: time.Second * 30,
BaselinePercent: map[string]int64{},
}
}

Expand All @@ -53,6 +55,8 @@ func (o *SPDOptions) AddFlags(fss *cliflag.NamedFlagSets) {
"A list of pod label keys to be used as indexers for pod informer")
fs.StringSliceVar(&o.IndicatorPlugins, "spd-indicator-plugins", o.IndicatorPlugins,
"A list of indicator plugins to be used")
fs.StringToInt64Var(&o.BaselinePercent, "spd-qos-baseline-percent", o.BaselinePercent, ""+
"A map of qosLeve to default baseline percent[0,100]")
}

// ApplyTo fills up config with options
Expand All @@ -61,6 +65,7 @@ func (o *SPDOptions) ApplyTo(c *controller.SPDConfig) error {
c.SPDWorkloadGVResources = o.SPDWorkloadGVResources
c.SPDPodLabelIndexerKeys = o.SPDPodLabelIndexerKeys
c.IndicatorPlugins = o.IndicatorPlugins
c.BaselinePercent = o.BaselinePercent
return nil
}

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/gogo/protobuf v1.3.2
github.com/golang/protobuf v1.5.2
github.com/google/cadvisor v0.44.1
github.com/kubewharf/katalyst-api v0.1.17-0.20231121124928-d7149d6e42c2
github.com/kubewharf/katalyst-api v0.1.17-0.20231123025708-2d67eae84665
github.com/montanaflynn/stats v0.7.1
github.com/opencontainers/runc v1.1.1
github.com/pkg/errors v0.9.1
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -543,8 +543,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kubewharf/katalyst-api v0.1.17-0.20231121124928-d7149d6e42c2 h1:WyX0XegR4ZVnHy/6SzWeTKaxMPa7UTrvJ5PslLU7VQ0=
github.com/kubewharf/katalyst-api v0.1.17-0.20231121124928-d7149d6e42c2/go.mod h1:iVILS5UL5PRtkUPH2Iu1K/gFGTPMNItnth5fmQ80VGE=
github.com/kubewharf/katalyst-api v0.1.17-0.20231123025708-2d67eae84665 h1:ftUQ/H9ZEmCGbHSrHHu7NCaitBwCtPMagdewzH501Cg=
github.com/kubewharf/katalyst-api v0.1.17-0.20231123025708-2d67eae84665/go.mod h1:iVILS5UL5PRtkUPH2Iu1K/gFGTPMNItnth5fmQ80VGE=
github.com/kubewharf/kubelet v1.24.6-kubewharf.7 h1:zex5NjgWh3b+fk8sey5Hp/hOVoSKdqf4mJu8MeE8T4k=
github.com/kubewharf/kubelet v1.24.6-kubewharf.7/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//go:build !linux

/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package sockmem

import (
coreconfig "github.com/kubewharf/katalyst-core/pkg/config"
dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
"github.com/kubewharf/katalyst-core/pkg/metaserver"
"github.com/kubewharf/katalyst-core/pkg/metrics"
)

func SetSockMemLimit(conf *coreconfig.Configuration,
_ interface{}, _ *dynamicconfig.DynamicAgentConfiguration,
emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer) {
}
6 changes: 5 additions & 1 deletion pkg/config/controller/spd.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@ type SPDConfig struct {
SPDPodLabelIndexerKeys []string

IndicatorPlugins []string

BaselinePercent map[string]int64
}

func NewSPDConfig() *SPDConfig {
return &SPDConfig{}
return &SPDConfig{
BaselinePercent: map[string]int64{},
}
}
32 changes: 25 additions & 7 deletions pkg/controller/spd/indicator-plugin/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ const (
// IndicatorUpdater is used by IndicatorPlugin as a unified implementation
// to trigger indicator updating logic.
type IndicatorUpdater interface {
// AddBusinessIndicatorSpec + AddSystemIndicatorSpec + AddBusinessIndicatorStatus
// UpdateBusinessIndicatorSpec + UpdateSystemIndicatorSpec + UpdateBusinessIndicatorStatus
// for indicator add functions, IndicatorUpdater will try to merge them in local stores.
AddBusinessIndicatorSpec(_ types.NamespacedName, _ []apiworkload.ServiceBusinessIndicatorSpec)
AddSystemIndicatorSpec(_ types.NamespacedName, _ []apiworkload.ServiceSystemIndicatorSpec)
AddBusinessIndicatorStatus(_ types.NamespacedName, _ []apiworkload.ServiceBusinessIndicatorStatus)
UpdateBusinessIndicatorSpec(_ types.NamespacedName, _ []apiworkload.ServiceBusinessIndicatorSpec)
UpdateSystemIndicatorSpec(_ types.NamespacedName, _ []apiworkload.ServiceSystemIndicatorSpec)
UpdateBusinessIndicatorStatus(_ types.NamespacedName, _ []apiworkload.ServiceBusinessIndicatorStatus)

UpdateBaselinePercent(_ types.NamespacedName, _ int32)
}

// IndicatorGetter is used by spd controller as indicator notifier to trigger
Expand Down Expand Up @@ -78,7 +80,7 @@ func NewIndicatorManager() *IndicatorManager {
}
}

func (u *IndicatorManager) AddBusinessIndicatorSpec(nn types.NamespacedName, indicators []apiworkload.ServiceBusinessIndicatorSpec) {
func (u *IndicatorManager) UpdateBusinessIndicatorSpec(nn types.NamespacedName, indicators []apiworkload.ServiceBusinessIndicatorSpec) {
u.specMtx.Lock()
defer u.specMtx.Unlock()

Expand All @@ -96,7 +98,7 @@ func (u *IndicatorManager) AddBusinessIndicatorSpec(nn types.NamespacedName, ind
}
}

func (u *IndicatorManager) AddSystemIndicatorSpec(nn types.NamespacedName, indicators []apiworkload.ServiceSystemIndicatorSpec) {
func (u *IndicatorManager) UpdateSystemIndicatorSpec(nn types.NamespacedName, indicators []apiworkload.ServiceSystemIndicatorSpec) {
u.specMtx.Lock()
defer u.specMtx.Unlock()

Expand All @@ -114,7 +116,7 @@ func (u *IndicatorManager) AddSystemIndicatorSpec(nn types.NamespacedName, indic
}
}

func (u *IndicatorManager) AddBusinessIndicatorStatus(nn types.NamespacedName, indicators []apiworkload.ServiceBusinessIndicatorStatus) {
func (u *IndicatorManager) UpdateBusinessIndicatorStatus(nn types.NamespacedName, indicators []apiworkload.ServiceBusinessIndicatorStatus) {
u.statusMtx.Lock()
defer u.statusMtx.Unlock()

Expand All @@ -132,6 +134,22 @@ func (u *IndicatorManager) AddBusinessIndicatorStatus(nn types.NamespacedName, i
}
}

func (u *IndicatorManager) UpdateBaselinePercent(nn types.NamespacedName, baselinePercent int32) {
u.specMtx.Lock()
defer u.specMtx.Unlock()

spec, ok := u.specMap[nn]
if !ok {
spec = initServiceProfileDescriptorSpec()
u.specMap[nn] = spec
}

if spec.BaselinePercent == nil || *spec.BaselinePercent != baselinePercent {
spec.BaselinePercent = &baselinePercent
u.specQueue <- nn
}
}

func (u *IndicatorManager) GetIndicatorSpecChan() chan types.NamespacedName {
return u.specQueue
}
Expand Down
32 changes: 29 additions & 3 deletions pkg/controller/spd/spd.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"

"github.com/kubewharf/katalyst-api/pkg/apis/autoscaling/v1alpha1"
apiworkload "github.com/kubewharf/katalyst-api/pkg/apis/workload/v1alpha1"
Expand Down Expand Up @@ -69,8 +70,9 @@ const (
// efficiency, we can't assume that all function callers MUST use an
// indexed informer to look up objects.
type SPDController struct {
ctx context.Context
conf *controller.SPDConfig
ctx context.Context
conf *controller.SPDConfig
qosConfig *generic.QoSConfiguration

podUpdater control.PodUpdater
spdControl control.ServiceProfileControl
Expand Down Expand Up @@ -99,7 +101,7 @@ type SPDController struct {

func NewSPDController(ctx context.Context, controlCtx *katalystbase.GenericContext,
genericConf *generic.GenericConfiguration, _ *controller.GenericControllerConfiguration,
conf *controller.SPDConfig, extraConf interface{}) (*SPDController, error) {
conf *controller.SPDConfig, qosConfig *generic.QoSConfiguration, extraConf interface{}) (*SPDController, error) {
if conf == nil || controlCtx.Client == nil || genericConf == nil {
return nil, fmt.Errorf("client, conf and generalConf can't be nil")
}
Expand All @@ -110,6 +112,7 @@ func NewSPDController(ctx context.Context, controlCtx *katalystbase.GenericConte
spdController := &SPDController{
ctx: ctx,
conf: conf,
qosConfig: qosConfig,
podUpdater: &control.DummyPodUpdater{},
spdControl: &control.DummySPDControl{},
workloadControl: &control.DummyUnstructuredControl{},
Expand Down Expand Up @@ -539,6 +542,28 @@ func (sc *SPDController) getWorkload(gvr schema.GroupVersionResource, namespace,
return workload, nil
}

// defaultBaselinePercent returns default baseline ratio based on the qos level of workload,
// and if the configured data cannot be found, we will return 1.0,
// which signifies that the resources of this workload cannot be reclaimed to reclaimed_cores.
func (sc *SPDController) defaultBaselinePercent(workload *unstructured.Unstructured) *int32 {
annotations, err := native.GetUnstructuredTemplateAnnotations(workload)
if err != nil {
general.ErrorS(err, "failed to GetUnstructuredTemplateAnnotations")
return pointer.Int32(100)
}
qosLevel, err := sc.qosConfig.GetQoSLevel(annotations)
if err != nil {
general.ErrorS(err, "failed to GetQoSLevel")
return pointer.Int32(100)
}
baselinePercent, ok := sc.conf.BaselinePercent[qosLevel]
if !ok {
general.InfoS("failed to get default baseline percent", "qosLevel", qosLevel)
return pointer.Int32(100)
}
return pointer.Int32(int32(baselinePercent))
}

// getOrCreateSPDForWorkload get workload's spd or create one if the spd doesn't exist
func (sc *SPDController) getOrCreateSPDForWorkload(workload *unstructured.Unstructured) (*apiworkload.ServiceProfileDescriptor, error) {
gvk := workload.GroupVersionKind()
Expand All @@ -565,6 +590,7 @@ func (sc *SPDController) getOrCreateSPDForWorkload(workload *unstructured.Unstru
Kind: ownerRef.Kind,
APIVersion: ownerRef.APIVersion,
},
BaselinePercent: sc.defaultBaselinePercent(workload),
},
Status: apiworkload.ServiceProfileDescriptorStatus{
AggMetrics: []apiworkload.AggPodMetrics{},
Expand Down
8 changes: 4 additions & 4 deletions pkg/controller/spd/spd_baseline.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ func (sc *SPDController) updateBaselinePercentile(spd *v1alpha1.ServiceProfileDe
return nil
}

if spd.Spec.BaselineRatio == nil {
if spd.Spec.BaselinePercent == nil {
util.SetSPDBaselinePercentile(spd, nil)
return nil
} else if *spd.Spec.BaselineRatio == 1.0 {
} else if *spd.Spec.BaselinePercent >= 100 {
// if baseline ratio equals 100%, we set baselinePercentile to ""
// which means all pod is baseline
util.SetSPDBaselinePercentile(spd, &util.BaselineCoefficient{})
return nil
} else if *spd.Spec.BaselineRatio == 0 {
} else if *spd.Spec.BaselinePercent <= 0 {
// if baseline ratio equals 0%, we set baselinePercentile to "-1"
// which means the baseline coefficient of all pods no less than the threshold,
// and then without pod is baseline.
Expand Down Expand Up @@ -93,6 +93,6 @@ func (sc *SPDController) calculateBaselinePercentile(spd *v1alpha1.ServiceProfil
sort.SliceStable(bcList, func(i, j int) bool {
return bcList[i].Cmp(bcList[j]) < 0
})
baselineIndex := int(math.Floor(float64(len(bcList)-1) * float64(*spd.Spec.BaselineRatio)))
baselineIndex := int(math.Floor(float64(len(bcList)-1) * float64(*spd.Spec.BaselinePercent) / 100))
return bcList[baselineIndex], nil
}
28 changes: 14 additions & 14 deletions pkg/controller/spd/spd_baseline_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(0.5),
BaselinePercent: pointer.Int32(50),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
Expand All @@ -125,7 +125,7 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(0.5),
BaselinePercent: pointer.Int32(50),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
Expand Down Expand Up @@ -164,7 +164,7 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(0.5),
BaselinePercent: pointer.Int32(50),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
Expand All @@ -180,14 +180,14 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(0.5),
BaselinePercent: pointer.Int32(50),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
wantErr: assert.NoError,
},
{
name: "three pod for 50% baseline ratio",
name: "three pod for 50% baseline percent",
fields: fields{
podList: []runtime.Object{
&v1.Pod{
Expand Down Expand Up @@ -280,7 +280,7 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(0.5),
BaselinePercent: pointer.Int32(50),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
Expand All @@ -299,14 +299,14 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(0.5),
BaselinePercent: pointer.Int32(50),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
wantErr: assert.NoError,
},
{
name: "three pod for 100% baseline ratio",
name: "three pod for 100% baseline percent",
fields: fields{
podList: []runtime.Object{
&v1.Pod{
Expand Down Expand Up @@ -399,7 +399,7 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(1),
BaselinePercent: pointer.Int32(100),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
Expand All @@ -418,14 +418,14 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(1),
BaselinePercent: pointer.Int32(100),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
wantErr: assert.NoError,
},
{
name: "three pod for 0% baseline ratio",
name: "three pod for 0% baseline percent",
fields: fields{
podList: []runtime.Object{
&v1.Pod{
Expand Down Expand Up @@ -518,7 +518,7 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(0),
BaselinePercent: pointer.Int32(0),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
Expand All @@ -537,7 +537,7 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
Name: "sts1",
APIVersion: stsGVK.GroupVersion().String(),
},
BaselineRatio: pointer.Float32(0),
BaselinePercent: pointer.Int32(0),
},
Status: apiworkload.ServiceProfileDescriptorStatus{},
},
Expand All @@ -559,7 +559,7 @@ func TestSPDController_updateBaselinePercentile(t *testing.T) {
[]runtime.Object{tt.fields.spd}, []runtime.Object{tt.fields.workload})
assert.NoError(t, err)

spdController, err := NewSPDController(ctx, controlCtx, genericConfig, controllerConf, spdConfig, struct{}{})
spdController, err := NewSPDController(ctx, controlCtx, genericConfig, controllerConf, spdConfig, nil, struct{}{})
assert.NoError(t, err)

controlCtx.StartInformer(ctx)
Expand Down
Loading

0 comments on commit a310c49

Please sign in to comment.