From 3cb603f908352afb542eeeacd05626c8cf28791c Mon Sep 17 00:00:00 2001 From: zhy76 <958474674@qq.com> Date: Thu, 24 Aug 2023 11:13:52 +0000 Subject: [PATCH] feat: observe cnr numa exclusive anomaly metric Signed-off-by: zhy76 <958474674@qq.com> --- cmd/katalyst-controller/app/controller/cnr.go | 44 ++++ .../app/enablecontrollers.go | 1 + cmd/katalyst-controller/app/options/cnr.go | 40 +++ .../app/options/controller.go | 4 + .../qrm-plugins/advisorsvc/advisor_svc.pb.go | 11 +- .../cpu/dynamicpolicy/cpuadvisor/cpu.pb.go | 14 +- .../dynamicpolicy/cpuadvisor/cpu.pb_test.go | 3 +- pkg/config/controller/cnr.go | 10 + pkg/config/controller/controller_base.go | 2 + pkg/controller/cnr/cnr.go | 221 ++++++++++++++++ pkg/controller/cnr/cnr_indicator.go | 80 ++++++ pkg/controller/cnr/cnr_indicator_test.go | 244 ++++++++++++++++++ pkg/controller/cnr/util.go | 17 ++ pkg/controller/lifecycle/cnr.go | 2 +- pkg/util/native/object.go | 10 + 15 files changed, 690 insertions(+), 13 deletions(-) create mode 100644 cmd/katalyst-controller/app/controller/cnr.go create mode 100644 cmd/katalyst-controller/app/options/cnr.go create mode 100644 pkg/config/controller/cnr.go create mode 100644 pkg/controller/cnr/cnr.go create mode 100644 pkg/controller/cnr/cnr_indicator.go create mode 100644 pkg/controller/cnr/cnr_indicator_test.go create mode 100644 pkg/controller/cnr/util.go diff --git a/cmd/katalyst-controller/app/controller/cnr.go b/cmd/katalyst-controller/app/controller/cnr.go new file mode 100644 index 0000000000..1973e6fa66 --- /dev/null +++ b/cmd/katalyst-controller/app/controller/cnr.go @@ -0,0 +1,44 @@ +package controller + +import ( + "context" + + "k8s.io/klog/v2" + + katalystbase "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/pkg/config" + "github.com/kubewharf/katalyst-core/pkg/controller/cnr" +) + +const ( + CNRControllerName = "cnr" +) + +func StartCNRController(ctx context.Context, controlCtx *katalystbase.GenericContext, + conf *config.Configuration, _ interface{}, _ string) (bool, error) { + var ( + cnrController *cnr.CNRController + err error + ) + + if conf.CNRConfig.EnableCNR { + cnrController, err = cnr.NewCNRController(ctx, + conf.GenericConfiguration, + conf.GenericControllerConfiguration, + controlCtx.Client, + controlCtx.KubeInformerFactory.Core().V1().Pods(), + controlCtx.InternalInformerFactory.Node().V1alpha1().CustomNodeResources(), + controlCtx.EmitterPool.GetDefaultMetricsEmitter(), + ) + if err != nil { + klog.Errorf("failed to new CNR controller") + return false, err + } + } + + if cnrController != nil { + go cnrController.Run() + } + + return true, nil +} diff --git a/cmd/katalyst-controller/app/enablecontrollers.go b/cmd/katalyst-controller/app/enablecontrollers.go index dc9063449a..60c9c58d63 100644 --- a/cmd/katalyst-controller/app/enablecontrollers.go +++ b/cmd/katalyst-controller/app/enablecontrollers.go @@ -51,6 +51,7 @@ func init() { controllerInitializers.Store(controller.KCCControllerName, ControllerStarter{Starter: controller.StartKCCController}) controllerInitializers.Store(controller.SPDControllerName, ControllerStarter{Starter: controller.StartSPDController}) controllerInitializers.Store(controller.LifeCycleControllerName, ControllerStarter{Starter: controller.StartLifeCycleController}) + controllerInitializers.Store(controller.CNRControllerName, ControllerStarter{Starter: controller.StartCNRController}) } // RegisterControllerInitializer is used to register user-defined controllers diff --git a/cmd/katalyst-controller/app/options/cnr.go b/cmd/katalyst-controller/app/options/cnr.go new file mode 100644 index 0000000000..07ec36397a --- /dev/null +++ b/cmd/katalyst-controller/app/options/cnr.go @@ -0,0 +1,40 @@ +package options + +import ( + cliflag "k8s.io/component-base/cli/flag" + + "github.com/kubewharf/katalyst-core/pkg/config/controller" +) + +type CNROptions struct { + // EnableCNR is a flag to enable CNR controller + EnableCNR bool +} + +func NewCNROptions() *CNROptions { + return &CNROptions{ + EnableCNR: true, + } +} + +// AddFlags adds flags to the specified FlagSet. +func (o *CNROptions) AddFlags(fss *cliflag.NamedFlagSets) { + fs := fss.FlagSet("cnr") + + fs.BoolVar(&o.EnableCNR, "cnr-enable", o.EnableCNR, + "whether to enable the cnr controller") +} + +// ApplyTo fills up config with options +func (o *CNROptions) ApplyTo(c *controller.CNRConfig) error { + c.EnableCNR = o.EnableCNR + return nil +} + +func (o *CNROptions) Config() (*controller.CNRConfig, error) { + c := controller.NewCNRConfig() + if err := o.ApplyTo(c); err != nil { + return nil, err + } + return c, nil +} diff --git a/cmd/katalyst-controller/app/options/controller.go b/cmd/katalyst-controller/app/options/controller.go index 0c21599e29..2d66ff91fd 100644 --- a/cmd/katalyst-controller/app/options/controller.go +++ b/cmd/katalyst-controller/app/options/controller.go @@ -28,6 +28,7 @@ type ControllersOptions struct { *KCCOptions *SPDOptions *LifeCycleOptions + *CNROptions } func NewControllersOptions() *ControllersOptions { @@ -36,6 +37,7 @@ func NewControllersOptions() *ControllersOptions { KCCOptions: NewKCCOptions(), SPDOptions: NewSPDOptions(), LifeCycleOptions: NewLifeCycleOptions(), + CNROptions: NewCNROptions(), } } @@ -44,6 +46,7 @@ func (o *ControllersOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.KCCOptions.AddFlags(fss) o.SPDOptions.AddFlags(fss) o.LifeCycleOptions.AddFlags(fss) + o.CNROptions.AddFlags(fss) } // ApplyTo fills up config with options @@ -54,6 +57,7 @@ func (o *ControllersOptions) ApplyTo(c *controllerconfig.ControllersConfiguratio errList = append(errList, o.KCCOptions.ApplyTo(c.KCCConfig)) errList = append(errList, o.SPDOptions.ApplyTo(c.SPDConfig)) errList = append(errList, o.LifeCycleOptions.ApplyTo(c.LifeCycleConfig)) + errList = append(errList, o.CNROptions.ApplyTo(c.CNRConfig)) return errors.NewAggregate(errList) } diff --git a/pkg/agent/qrm-plugins/advisorsvc/advisor_svc.pb.go b/pkg/agent/qrm-plugins/advisorsvc/advisor_svc.pb.go index 9eb0c3900e..c375221454 100644 --- a/pkg/agent/qrm-plugins/advisorsvc/advisor_svc.pb.go +++ b/pkg/agent/qrm-plugins/advisorsvc/advisor_svc.pb.go @@ -20,18 +20,19 @@ package advisorsvc import ( context "context" fmt "fmt" + io "io" + math "math" + math_bits "math/bits" + reflect "reflect" + strings "strings" + _ "github.com/gogo/protobuf/gogoproto" proto "github.com/gogo/protobuf/proto" github_com_gogo_protobuf_sortkeys "github.com/gogo/protobuf/sortkeys" grpc "google.golang.org/grpc" codes "google.golang.org/grpc/codes" status "google.golang.org/grpc/status" - io "io" v1alpha1 "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" - math "math" - math_bits "math/bits" - reflect "reflect" - strings "strings" ) // Reference imports to suppress errors if they are not otherwise used. diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb.go index e2a8074061..28aa4f3205 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb.go @@ -20,19 +20,21 @@ package cpuadvisor import ( context "context" fmt "fmt" + io "io" + math "math" + math_bits "math/bits" + reflect "reflect" + strings "strings" + _ "github.com/gogo/protobuf/gogoproto" proto "github.com/gogo/protobuf/proto" github_com_gogo_protobuf_sortkeys "github.com/gogo/protobuf/sortkeys" - advisorsvc "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/advisorsvc" grpc "google.golang.org/grpc" codes "google.golang.org/grpc/codes" status "google.golang.org/grpc/status" - io "io" _ "k8s.io/kubelet/pkg/apis/resourceplugin/v1alpha1" - math "math" - math_bits "math/bits" - reflect "reflect" - strings "strings" + + advisorsvc "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/advisorsvc" ) // Reference imports to suppress errors if they are not otherwise used. diff --git a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb_test.go b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb_test.go index 61264bfff7..69d09bf7a6 100644 --- a/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb_test.go +++ b/pkg/agent/qrm-plugins/cpu/dynamicpolicy/cpuadvisor/cpu.pb_test.go @@ -21,8 +21,9 @@ import ( context "context" "testing" - "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/advisorsvc" grpc "google.golang.org/grpc" + + "github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/advisorsvc" ) func TestCPUPB(t *testing.T) { diff --git a/pkg/config/controller/cnr.go b/pkg/config/controller/cnr.go new file mode 100644 index 0000000000..4f8ed47270 --- /dev/null +++ b/pkg/config/controller/cnr.go @@ -0,0 +1,10 @@ +package controller + +type CNRConfig struct { + // EnableCNR is a flag to enable CNR controller + EnableCNR bool +} + +func NewCNRConfig() *CNRConfig { + return &CNRConfig{} +} diff --git a/pkg/config/controller/controller_base.go b/pkg/config/controller/controller_base.go index 984508df5d..d988038f86 100644 --- a/pkg/config/controller/controller_base.go +++ b/pkg/config/controller/controller_base.go @@ -46,6 +46,7 @@ type ControllersConfiguration struct { *KCCConfig *SPDConfig *LifeCycleConfig + *CNRConfig } func NewGenericControllerConfiguration() *GenericControllerConfiguration { @@ -58,5 +59,6 @@ func NewControllersConfiguration() *ControllersConfiguration { KCCConfig: NewKCCConfig(), SPDConfig: NewSPDConfig(), LifeCycleConfig: NewLifeCycleConfig(), + CNRConfig: NewCNRConfig(), } } diff --git a/pkg/controller/cnr/cnr.go b/pkg/controller/cnr/cnr.go new file mode 100644 index 0000000000..4a245b4137 --- /dev/null +++ b/pkg/controller/cnr/cnr.go @@ -0,0 +1,221 @@ +package cnr + +import ( + "context" + "fmt" + "time" + + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + coreinformers "k8s.io/client-go/informers/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/cri-api/pkg/errors" + "k8s.io/klog/v2" + + apis "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + informers "github.com/kubewharf/katalyst-api/pkg/client/informers/externalversions/node/v1alpha1" + listers "github.com/kubewharf/katalyst-api/pkg/client/listers/node/v1alpha1" + "github.com/kubewharf/katalyst-core/pkg/client" + "github.com/kubewharf/katalyst-core/pkg/client/control" + "github.com/kubewharf/katalyst-core/pkg/config/controller" + "github.com/kubewharf/katalyst-core/pkg/config/generic" + "github.com/kubewharf/katalyst-core/pkg/metrics" +) + +const ( + cnrControllerName = "cnr" + cnrWorkerCount = 1 +) + +type CNRController struct { + ctx context.Context + + client *client.GenericClientSet + cnrControl control.CNRControl + + cnrListerSynced cache.InformerSynced + cnrLister listers.CustomNodeResourceLister + podListerSynced cache.InformerSynced + podLister corelisters.PodLister + + // queue for cnr + cnrSyncQueue workqueue.RateLimitingInterface + // queue for pod + podSyncQueue workqueue.RateLimitingInterface + + // metricsEmitter for emit metrics + metricsEmitter metrics.MetricEmitter +} + +// NewCNRController create a new CNRController +func NewCNRController( + ctx context.Context, + genericConf *generic.GenericConfiguration, + _ *controller.GenericControllerConfiguration, + client *client.GenericClientSet, + podInformer coreinformers.PodInformer, + cnrInformer informers.CustomNodeResourceInformer, + metricsEmitter metrics.MetricEmitter) (*CNRController, error) { + + cnrController := &CNRController{ + ctx: ctx, + client: client, + cnrSyncQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), + cnrControllerName), + } + + // init cnr informer + cnrInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: cnrController.addCNREventHandler, + UpdateFunc: cnrController.updateCNREventHandler, + }) + // init cnr lister + cnrController.cnrLister = cnrInformer.Lister() + // init cnr synced + cnrController.cnrListerSynced = cnrInformer.Informer().HasSynced + + // init pod informer + podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: cnrController.addPodEventHandler, + UpdateFunc: cnrController.updatePodEventHandler, + }) + // init pod lister + cnrController.podLister = podInformer.Lister() + // init pod synced + cnrController.podListerSynced = podInformer.Informer().HasSynced + + if metricsEmitter == nil { + // if metricsEmitter is nil, use dummy metrics + cnrController.metricsEmitter = metrics.DummyMetrics{} + } else { + // if metricsEmitter is not nil, use metricsEmitter with tags + cnrController.metricsEmitter = metricsEmitter.WithTags(cnrControllerName) + } + + // init cnr control + cnrController.cnrControl = control.DummyCNRControl{} + if !genericConf.DryRun { + // if dryRun is false, use real cnr control + cnrController.cnrControl = control.NewCNRControlImpl(client.InternalClient) + } + + return cnrController, nil +} + +func (ctrl *CNRController) Run() { + defer utilruntime.HandleCrash() + defer ctrl.cnrSyncQueue.ShutDown() + defer ctrl.podSyncQueue.ShutDown() + defer klog.Infof("Shutting down %s controller", cnrControllerName) + + // wait for cache sync + if !cache.WaitForCacheSync(ctrl.ctx.Done(), ctrl.cnrListerSynced) { + utilruntime.HandleError(fmt.Errorf("unable to sync caches for %s controller", cnrControllerName)) + return + } + klog.Infof("Caches are synced for %s controller", cnrControllerName) + klog.Infof("start %d workers for %s controller", cnrWorkerCount, cnrControllerName) + + for i := 0; i < cnrWorkerCount; i++ { + go wait.Until(ctrl.cnrWorker, time.Second, ctrl.ctx.Done()) + } + + <-ctrl.ctx.Done() +} + +func (ctrl *CNRController) cnrWorker() { + for ctrl.processNextCNR() { + } +} + +// processNextCNR dequeues items, processes them, and marks them done. +// It enforces that the sync is never invoked concurrently with the same key. +func (ctrl *CNRController) processNextCNR() bool { + key, quit := ctrl.cnrSyncQueue.Get() + if quit { + return false + } + defer ctrl.cnrSyncQueue.Done(key) + + err := ctrl.syncCNR(key.(string)) + if err == nil { + ctrl.cnrSyncQueue.Forget(key) + return true + } + + // if err is not nil, requeue the key + utilruntime.HandleError(fmt.Errorf("sync %s failed with %v", key, err)) + ctrl.cnrSyncQueue.AddRateLimited(key) + + return true +} + +func (ctrl *CNRController) syncCNR(key string) error { + _, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + return err + } + + cnr, err := ctrl.cnrLister.Get(name) + if errors.IsNotFound(err) { + // cnr is deleted, so we can skip + klog.Info("CNR has been deleted %v", key) + return nil + } + if err != nil { + return err + } + + // check cnr anomaly + // check numa exclusive anomaly + if ctrl.checkNumaExclusiveAnomaly(cnr) { + err = ctrl.emitNumaAnomalyMetric(cnr, reasonNumaExclusiveAnomaly) + if err != nil { + return err + } + } + + return nil +} + +// enqueueCNR enqueues the given CNR in the work queue. +func (ctrl *CNRController) enqueueCNR(cnr *apis.CustomNodeResource) { + key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(cnr) + if err != nil { + utilruntime.HandleError(fmt.Errorf("Cound't get key for CNR %+v: %v", cnr, err)) + return + } + ctrl.cnrSyncQueue.Add(key) +} + +func (ctrl *CNRController) addCNREventHandler(obj interface{}) { + cnr, ok := obj.(*apis.CustomNodeResource) + if !ok { + klog.Errorf("cnanot convert obj to *apis.CNR: %v", obj) + return + } + klog.V(4).Infof("notice addition of cnr %s", cnr.Name) + + ctrl.enqueueCNR(cnr) +} + +func (ctrl *CNRController) updateCNREventHandler(_, newObj interface{}) { + c, ok := newObj.(*apis.CustomNodeResource) + if !ok { + klog.Errorf("cannot convert newObj to *apis.CNR: %v", c) + return + } + klog.V(4).Infof("notice addition of cnr %s", c.Name) + + ctrl.enqueueCNR(c) +} + +func (ctrl *CNRController) addPodEventHandler(obj interface{}) { + ctrl.podSyncQueue.Add(obj) +} + +func (ctrl *CNRController) updatePodEventHandler(oldObj, newObj interface{}) { + ctrl.podSyncQueue.Add(newObj) +} diff --git a/pkg/controller/cnr/cnr_indicator.go b/pkg/controller/cnr/cnr_indicator.go new file mode 100644 index 0000000000..0f86d4caa7 --- /dev/null +++ b/pkg/controller/cnr/cnr_indicator.go @@ -0,0 +1,80 @@ +package cnr + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-core/pkg/metrics" + "github.com/kubewharf/katalyst-core/pkg/util/native" +) + +const ( + metricsNameCNRReportAnomaly = "cnr_report_anomaly" + // metricsNameCNRReportLantency = "cnr_report_latency" +) + +const ( + // reasonNumaExclusiveAnomaly is the reason for numa exclusive anomaly + // when numa_binding and numa_exclusive are both set + // the pod with numa_binding and numa_exclusive shares the numa with other pods + reasonNumaExclusiveAnomaly = "The pod with numa_binding and numa_exclusive shares the numa with other pods" + // // reasonNumaAllocatableSumAnomaly is the reason for numa allocatable sum anomaly + // // when the node's sum of numa allocatable is not equal to the node allocatable + // reasonNumaAllocatableSumAnomaly = "The node's sum of numa allocatable is not equal to the node allocatable" + // // reasonPodAllocationSumAnomaly is the reason for pod allocation sum anomaly + // // when the numa's sum of pod allocation is greater than the numa allocatable + // reasonPodAllocationSumAnomaly = "The numa's sum of pod allocation is greater than the numa allocatable" +) + +// checkNumaExclusiveAnomaly checks whether exist the pod with numa_binding and numa_exclusive shares the numa with other pods +func (ctrl *CNRController) checkNumaExclusiveAnomaly(cnr *v1alpha1.CustomNodeResource) bool { + for _, socket := range cnr.Status.TopologyZone { + for _, numa := range socket.Children { + if numa.Type != v1alpha1.TopologyTypeNuma { + klog.Errorf("[CNRIndicatorNumaExclusiveAnomaly] failed to get numa %s", numa.Name) + return false + } + podNum := len(numa.Allocations) + for _, allocation := range numa.Allocations { + allocat := allocation.DeepCopy() + key := allocat.Consumer + namespace, podname, _, err := native.ParseUniqObjectUIDKey(key) + if err != nil { + klog.Errorf("[CNRIndicatorNumaExclusiveAnomaly] failed to parse uniq object uid key %s", key) + } + pod, err := ctrl.client.KubeClient.CoreV1().Pods(namespace).Get(ctrl.ctx, podname, metav1.GetOptions{ResourceVersion: "0"}) + if err != nil { + klog.Errorf("[CNRIndicatorNumaExclusiveAnomaly] failed to get pod %s", key) + } + if CheckNumaBinding(pod) && CheckNumaExclusive(pod) && podNum > 1 { + return true + } + } + } + } + return false +} + +// emitNumaAnomalyMetric emit numa anomaly metric +func (ctrl *CNRController) emitNumaAnomalyMetric(cnr *v1alpha1.CustomNodeResource, reason string) error { + node, err := ctrl.client.KubeClient.CoreV1().Nodes().Get(ctrl.ctx, cnr.Name, metav1.GetOptions{ResourceVersion: "0"}) + if err != nil { + klog.Errorf("[CNRIndicatorNumaExclusiveAnomaly] failed to get node %s", cnr.Name) + return err + } + + _ = ctrl.metricsEmitter.StoreInt64(metricsNameCNRReportAnomaly, 1, metrics.MetricTypeNameCount, + metrics.MetricTag{ + Key: "nodeIp", Val: node.Status.Addresses[0].Address, + }, + metrics.MetricTag{ + Key: "nodeName", Val: cnr.Name, + }, + metrics.MetricTag{ + Key: "reason", Val: reason, + }, + ) + + return nil +} diff --git a/pkg/controller/cnr/cnr_indicator_test.go b/pkg/controller/cnr/cnr_indicator_test.go new file mode 100644 index 0000000000..281c499b71 --- /dev/null +++ b/pkg/controller/cnr/cnr_indicator_test.go @@ -0,0 +1,244 @@ +package cnr + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + + "github.com/kubewharf/katalyst-api/pkg/apis/node/v1alpha1" + "github.com/kubewharf/katalyst-api/pkg/consts" + katalyst_base "github.com/kubewharf/katalyst-core/cmd/base" + "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options" + metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool" +) + +func Test_checkNumaExclusiveAnomaly(t *testing.T) { + t.Parallel() + + type fields struct { + pods []*v1.Pod + cnr *v1alpha1.CustomNodeResource + } + + tests := []struct { + name string + fields fields + wantResult bool + }{ + { + name: "numa exclusive anomaly with numa binding and numa exclusive pod", + fields: fields{ + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod1", + Namespace: "test-namespace", + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod2", + Namespace: "test-namespace", + }, + }, + }, + cnr: &v1alpha1.CustomNodeResource{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Status: v1alpha1.CustomNodeResourceStatus{ + TopologyZone: []*v1alpha1.TopologyZone{ + { + Children: []*v1alpha1.TopologyZone{ + { + Type: v1alpha1.TopologyTypeNuma, + Allocations: []*v1alpha1.Allocation{ + { + Consumer: "test-namespace/test-pod1/13414141", + }, + { + Consumer: "test-namespace/test-pod2/31413414", + }, + }, + }, + }, + }, + }, + }, + }, + }, + wantResult: true, + }, + { + name: "numa exclusive anomaly test with non numa binding and non numa exclusive pod", + fields: fields{ + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod1", + Namespace: "test-namespace", + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod2", + Namespace: "test-namespace", + }, + }, + }, + cnr: &v1alpha1.CustomNodeResource{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Status: v1alpha1.CustomNodeResourceStatus{ + TopologyZone: []*v1alpha1.TopologyZone{ + { + Children: []*v1alpha1.TopologyZone{ + { + Type: v1alpha1.TopologyTypeNuma, + Allocations: []*v1alpha1.Allocation{ + { + Consumer: "test-namespace/test-pod1/13414141", + }, + { + Consumer: "test-namespace/test-pod2/31413414", + }, + }, + }, + }, + }, + }, + }, + }, + }, + wantResult: false, + }, + { + name: "numa exclusive anomaly with numa binding and non numa exclusive pod", + fields: fields{ + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod1", + Namespace: "test-namespace", + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaBinding: consts.PodAnnotationMemoryEnhancementNumaBindingEnable, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod2", + Namespace: "test-namespace", + }, + }, + }, + cnr: &v1alpha1.CustomNodeResource{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Status: v1alpha1.CustomNodeResourceStatus{ + TopologyZone: []*v1alpha1.TopologyZone{ + { + Children: []*v1alpha1.TopologyZone{ + { + Type: v1alpha1.TopologyTypeNuma, + Allocations: []*v1alpha1.Allocation{ + { + Consumer: "test-namespace/test-pod1/13414141", + }, + { + Consumer: "test-namespace/test-pod2/31413414", + }, + }, + }, + }, + }, + }, + }, + }, + }, + wantResult: false, + }, + { + name: "numa exclusive anomaly with numa binding and non numa exclusive pod", + fields: fields{ + pods: []*v1.Pod{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod1", + Namespace: "test-namespace", + Annotations: map[string]string{ + consts.PodAnnotationMemoryEnhancementNumaExclusive: consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod2", + Namespace: "test-namespace", + }, + }, + }, + cnr: &v1alpha1.CustomNodeResource{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + }, + Status: v1alpha1.CustomNodeResourceStatus{ + TopologyZone: []*v1alpha1.TopologyZone{ + { + Children: []*v1alpha1.TopologyZone{ + { + Type: v1alpha1.TopologyTypeNuma, + Allocations: []*v1alpha1.Allocation{ + { + Consumer: "test-namespace/test-pod1/13414141", + }, + { + Consumer: "test-namespace/test-pod2/31413414", + }, + }, + }, + }, + }, + }, + }, + }, + }, + wantResult: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + genericCtx, err := katalyst_base.GenerateFakeGenericContext([]runtime.Object{tt.fields.pods[0], tt.fields.pods[1]}, []runtime.Object{tt.fields.cnr}) + assert.NoError(t, err) + + conf, err := options.NewOptions().Config() + require.NoError(t, err) + require.NotNil(t, conf) + + ctrl, err := NewCNRController( + context.Background(), + conf.GenericConfiguration, + conf.GenericControllerConfiguration, + genericCtx.Client, + genericCtx.KubeInformerFactory.Core().V1().Pods(), + genericCtx.InternalInformerFactory.Node().V1alpha1().CustomNodeResources(), + metricspool.DummyMetricsEmitterPool.GetDefaultMetricsEmitter(metricspool.DummyMetricsEmitterPool{}), + ) + assert.NoError(t, err) + + result := ctrl.checkNumaExclusiveAnomaly(tt.fields.cnr) + assert.Equal(t, tt.wantResult, result) + }) + } +} diff --git a/pkg/controller/cnr/util.go b/pkg/controller/cnr/util.go new file mode 100644 index 0000000000..3d18bfe091 --- /dev/null +++ b/pkg/controller/cnr/util.go @@ -0,0 +1,17 @@ +package cnr + +import ( + v1 "k8s.io/api/core/v1" + + "github.com/kubewharf/katalyst-api/pkg/consts" +) + +// CheckNumaBinding returns true if the pod has the annotation to enable Numa binding +func CheckNumaBinding(pod *v1.Pod) bool { + return pod.Annotations[consts.PodAnnotationMemoryEnhancementNumaBinding] == consts.PodAnnotationMemoryEnhancementNumaBindingEnable +} + +// CheckNumaExclusive returns true if the pod has the annotation to enable Numa exclusive +func CheckNumaExclusive(pod *v1.Pod) bool { + return pod.Annotations[consts.PodAnnotationMemoryEnhancementNumaExclusive] == consts.PodAnnotationMemoryEnhancementNumaExclusiveEnable +} diff --git a/pkg/controller/lifecycle/cnr.go b/pkg/controller/lifecycle/cnr.go index b98ba220e8..d0f6a4ab8d 100644 --- a/pkg/controller/lifecycle/cnr.go +++ b/pkg/controller/lifecycle/cnr.go @@ -184,7 +184,7 @@ func (cl *CNRLifecycle) addCNREventHandle(obj interface{}) { func (cl *CNRLifecycle) updateCNREventHandle(_, new interface{}) { c, ok := new.(*apis.CustomNodeResource) if !ok { - klog.Errorf("cannot convert oldObj to *apis.CNR: %v", c) + klog.Errorf("cannot convert newObj to *apis.CNR: %v", c) return } klog.V(4).Infof("notice addition of cnr %s", c.Name) diff --git a/pkg/util/native/object.go b/pkg/util/native/object.go index d47f92bee6..0c69111cc3 100644 --- a/pkg/util/native/object.go +++ b/pkg/util/native/object.go @@ -39,6 +39,16 @@ func GenerateUniqObjectUIDKey(obj metav1.Object) string { return fmt.Sprintf("%s/%s/%s", obj.GetNamespace(), obj.GetName(), obj.GetUID()) } +// ParseUniqObjectUIDKey parse the given key into namespace, name and uid +func ParseUniqObjectUIDKey(key string) (namespace string, name string, uid string, err error) { + names := strings.Split(key, "/") + if len(names) != 3 { + return "", "", "", fmt.Errorf("workload key %s split error", key) + } + + return names[0], names[1], names[2], nil +} + // GenerateUniqObjectNameKey generate a uniq key (without UID) for the given object. func GenerateUniqObjectNameKey(obj metav1.Object) string { return GenerateNamespaceNameKey(obj.GetNamespace(), obj.GetName())