From cb37c49e8077dd97fb0346ece8bb91158843b2bc Mon Sep 17 00:00:00 2001 From: kubevirt-bot Date: Fri, 27 Sep 2024 14:22:50 +0200 Subject: [PATCH] raise an alert when the descheduler is not correctly configured (#3118) Descheduler is an optional operator and it's not installed by default nor as a dependency of HCO. When installed it can work on a cluster with KubeVirt only if configured enabling devEnableEvictionsInBackground profileCustomization that is disabled by default. HCO will check if the descheduler is there, and if so it will check its configuration. If the descheduler is misconfigured for the KubeVirt use case, HCO will trigger an alert making the cluster admin aware. HCO is not directly amending the descheduler configuration since it's an external independent operator and directly controlling it is not a safe practice (it could bring to infinite loops fighting with other operators and so on). Signed-off-by: Simone Tiraboschi Co-authored-by: Simone Tiraboschi --- Makefile | 6 +- cmd/hyperconverged-cluster-operator/main.go | 54 +++- controllers/commontestutils/testUtils.go | 40 ++- controllers/crd/crd_controller.go | 103 +++++++ controllers/crd/crd_controller_test.go | 245 ++++++++++++++++ controllers/crd/crd_suite_test.go | 13 + .../descheduler/descheduler_controller.go | 79 +++++ .../descheduler_controller_test.go | 199 +++++++++++++ .../descheduler/descheduler_suite_test.go | 13 + deploy/cluster_role.yaml | 8 + ...perator.v1.13.0.clusterserviceversion.yaml | 8 + ...perator.v1.13.0.clusterserviceversion.yaml | 8 + docs/cluster-configuration.md | 29 ++ docs/metrics.md | 3 + go.mod | 3 +- go.sum | 2 + .../deploy_fake_kubedescheduler.sh | 32 ++ .../kube-descheduler-operator.crd.yaml | 275 ++++++++++++++++++ hack/kubeDescheduler/kubeDescheduler.cr.yaml | 17 ++ hack/prom-rule-ci/prom-rules-tests.yaml | 81 ++++++ pkg/components/components.go | 6 + .../metrics/infrastructure_metrics.go | 20 +- .../rules/alerts/operator_alerts.go | 13 + pkg/util/cluster.go | 57 ++++ pkg/util/cluster_test.go | 229 +++++++++++++++ pkg/util/consts.go | 5 +- tests/func-tests/client.go | 4 + tests/func-tests/monitoring_test.go | 186 ++++++++++++ .../pkg/apis/descheduler/v1/doc.go | 7 + .../pkg/apis/descheduler/v1/register.go | 40 +++ .../apis/descheduler/v1/types_descheduler.go | 189 ++++++++++++ .../descheduler/v1/zz_generated.deepcopy.go | 147 ++++++++++ vendor/modules.txt | 3 + 33 files changed, 2112 insertions(+), 12 deletions(-) create mode 100644 controllers/crd/crd_controller.go create mode 100644 controllers/crd/crd_controller_test.go create mode 100644 controllers/crd/crd_suite_test.go create mode 100644 controllers/descheduler/descheduler_controller.go create mode 100644 controllers/descheduler/descheduler_controller_test.go create mode 100644 controllers/descheduler/descheduler_suite_test.go create mode 100755 hack/kubeDescheduler/deploy_fake_kubedescheduler.sh create mode 100644 hack/kubeDescheduler/kube-descheduler-operator.crd.yaml create mode 100644 hack/kubeDescheduler/kubeDescheduler.cr.yaml create mode 100644 vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/doc.go create mode 100644 vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/register.go create mode 100644 vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/types_descheduler.go create mode 100644 vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/zz_generated.deepcopy.go diff --git a/Makefile b/Makefile index 2553006a32..046a1461a6 100644 --- a/Makefile +++ b/Makefile @@ -218,7 +218,7 @@ charts: local: ./hack/make_local.sh -deploy_cr: deploy_hco_cr deploy_hpp +deploy_cr: deploy_hco_cr deploy_hpp deploy_fake_kubedescheduler deploy_hco_cr: ./hack/deploy_only_cr.sh @@ -226,6 +226,9 @@ deploy_hco_cr: deploy_hpp: ./hack/hpp/deploy_hpp.sh +deploy_fake_kubedescheduler: + ./hack/kubeDescheduler/deploy_fake_kubedescheduler.sh + validate-no-offensive-lang: ./hack/validate-no-offensive-lang.sh @@ -280,6 +283,7 @@ bump-hco: kubevirt-nightly-test \ local \ deploy_cr \ + deploy_fake_kubedescheduler \ build-docgen \ generate \ generate-doc \ diff --git a/cmd/hyperconverged-cluster-operator/main.go b/cmd/hyperconverged-cluster-operator/main.go index d8e3728755..674d6b8fe2 100644 --- a/cmd/hyperconverged-cluster-operator/main.go +++ b/cmd/hyperconverged-cluster-operator/main.go @@ -12,6 +12,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" operatorv1 "github.com/openshift/api/operator/v1" openshiftroutev1 "github.com/openshift/api/route/v1" + deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" csvv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" operatorsapiv2 "github.com/operator-framework/api/pkg/operators/v2" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" @@ -50,6 +51,8 @@ import ( "github.com/kubevirt/hyperconverged-cluster-operator/api" hcov1beta1 "github.com/kubevirt/hyperconverged-cluster-operator/api/v1beta1" "github.com/kubevirt/hyperconverged-cluster-operator/cmd/cmdcommon" + "github.com/kubevirt/hyperconverged-cluster-operator/controllers/crd" + "github.com/kubevirt/hyperconverged-cluster-operator/controllers/descheduler" "github.com/kubevirt/hyperconverged-cluster-operator/controllers/hyperconverged" "github.com/kubevirt/hyperconverged-cluster-operator/controllers/observability" "github.com/kubevirt/hyperconverged-cluster-operator/controllers/operands" @@ -86,6 +89,7 @@ var ( operatorsapiv2.AddToScheme, imagev1.Install, aaqv1alpha1.AddToScheme, + deschedulerv1.AddToScheme, } ) @@ -121,7 +125,7 @@ func main() { needLeaderElection := !ci.IsRunningLocally() // Create a new Cmd to provide shared dependencies and start components - mgr, err := manager.New(cfg, getManagerOptions(operatorNamespace, needLeaderElection, ci.IsMonitoringAvailable(), ci.IsOpenshift(), scheme)) + mgr, err := manager.New(cfg, getManagerOptions(operatorNamespace, needLeaderElection, ci, scheme)) cmdHelper.ExitOnError(err, "can't initiate manager") // register pprof instrumentation if HCO_PPROF_ADDR is set @@ -165,6 +169,10 @@ func main() { upgradeableCondition, err = hcoutil.NewOperatorCondition(ci, mgr.GetClient(), operatorsapiv2.Upgradeable) cmdHelper.ExitOnError(err, "Cannot create Upgradeable Operator Condition") + // a channel to trigger a restart of the operator + // via a clean cancel of the manager + restartCh := make(chan struct{}) + // Create a new reconciler if err := hyperconverged.RegisterReconciler(mgr, ci, upgradeableCondition); err != nil { logger.Error(err, "failed to register the HyperConverged controller") @@ -172,6 +180,13 @@ func main() { os.Exit(1) } + // Create a new CRD reconciler + if err := crd.RegisterReconciler(mgr, restartCh); err != nil { + logger.Error(err, "failed to register the CRD controller") + eventEmitter.EmitEvent(nil, corev1.EventTypeWarning, "InitError", "Unable to register CRD controller; "+err.Error()) + os.Exit(1) + } + if ci.IsOpenshift() { if err = observability.SetupWithManager(mgr); err != nil { logger.Error(err, "unable to create controller", "controller", "Observability") @@ -179,6 +194,15 @@ func main() { } } + if ci.IsDeschedulerAvailable() { + // Create a new reconciler for KubeDescheduler + if err := descheduler.RegisterReconciler(mgr); err != nil { + logger.Error(err, "failed to register the KubeDescheduler controller") + eventEmitter.EmitEvent(nil, corev1.EventTypeWarning, "InitError", "Unable to register KubeDescheduler controller; "+err.Error()) + os.Exit(1) + } + } + err = createPriorityClass(ctx, mgr) cmdHelper.ExitOnError(err, "Failed creating PriorityClass") @@ -193,8 +217,17 @@ func main() { logger.Info("Starting the Cmd.") eventEmitter.EmitEvent(nil, corev1.EventTypeNormal, "Init", "Starting the HyperConverged Pod") + // create context with cancel for the manager + mgrCtx, mgrCancel := context.WithCancel(signals.SetupSignalHandler()) + + defer mgrCancel() + go func() { + <-restartCh + mgrCancel() + }() + // Start the Cmd - if err := mgr.Start(signals.SetupSignalHandler()); err != nil { + if err := mgr.Start(mgrCtx); err != nil { logger.Error(err, "Manager exited non-zero") eventEmitter.EmitEvent(nil, corev1.EventTypeWarning, "UnexpectedError", "HyperConverged crashed; "+err.Error()) os.Exit(1) @@ -203,7 +236,7 @@ func main() { // Restricts the cache's ListWatch to specific fields/labels per GVK at the specified object to control the memory impact // this is used to completely overwrite the NewCache function so all the interesting objects should be explicitly listed here -func getCacheOption(operatorNamespace string, isMonitoringAvailable, isOpenshift bool) cache.Options { +func getCacheOption(operatorNamespace string, ci hcoutil.ClusterInfo) cache.Options { namespaceSelector := fields.Set{"metadata.namespace": operatorNamespace}.AsSelector() labelSelector := labels.Set{hcoutil.AppLabel: hcoutil.HyperConvergedName}.AsSelector() labelSelectorForNamespace := labels.Set{hcoutil.KubernetesMetadataName: operatorNamespace}.AsSelector() @@ -257,6 +290,10 @@ func getCacheOption(operatorNamespace string, isMonitoringAvailable, isOpenshift }, } + cacheOptionsByObjectForDescheduler := map[client.Object]cache.ByObject{ + &deschedulerv1.KubeDescheduler{}: {}, + } + cacheOptionsByObjectForOpenshift := map[client.Object]cache.ByObject{ &openshiftroutev1.Route{}: { Namespaces: map[string]cache.Config{ @@ -279,10 +316,13 @@ func getCacheOption(operatorNamespace string, isMonitoringAvailable, isOpenshift }, } - if isMonitoringAvailable { + if ci.IsMonitoringAvailable() { maps.Copy(cacheOptions.ByObject, cacheOptionsByObjectForMonitoring) } - if isOpenshift { + if ci.IsDeschedulerAvailable() { + maps.Copy(cacheOptions.ByObject, cacheOptionsByObjectForDescheduler) + } + if ci.IsOpenshift() { maps.Copy(cacheOptions.ByObject, cacheOptionsByObjectForOpenshift) } @@ -290,7 +330,7 @@ func getCacheOption(operatorNamespace string, isMonitoringAvailable, isOpenshift } -func getManagerOptions(operatorNamespace string, needLeaderElection, isMonitoringAvailable, isOpenshift bool, scheme *apiruntime.Scheme) manager.Options { +func getManagerOptions(operatorNamespace string, needLeaderElection bool, ci hcoutil.ClusterInfo, scheme *apiruntime.Scheme) manager.Options { return manager.Options{ Metrics: server.Options{ BindAddress: fmt.Sprintf("%s:%d", hcoutil.MetricsHost, hcoutil.MetricsPort), @@ -305,7 +345,7 @@ func getManagerOptions(operatorNamespace string, needLeaderElection, isMonitorin // "configmapsleases". Therefore, having only "leases" should be safe now. LeaderElectionResourceLock: resourcelock.LeasesResourceLock, LeaderElectionID: "hyperconverged-cluster-operator-lock", - Cache: getCacheOption(operatorNamespace, isMonitoringAvailable, isOpenshift), + Cache: getCacheOption(operatorNamespace, ci), Scheme: scheme, } } diff --git a/controllers/commontestutils/testUtils.go b/controllers/commontestutils/testUtils.go index 8d7bf62555..9d989c29a7 100644 --- a/controllers/commontestutils/testUtils.go +++ b/controllers/commontestutils/testUtils.go @@ -14,6 +14,7 @@ import ( imagev1 "github.com/openshift/api/image/v1" operatorv1 "github.com/openshift/api/operator/v1" routev1 "github.com/openshift/api/route/v1" + deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" csvv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" @@ -164,6 +165,7 @@ func GetScheme() *runtime.Scheme { openshiftconfigv1.Install, csvv1alpha1.AddToScheme, aaqv1alpha1.AddToScheme, + deschedulerv1.AddToScheme, } { Expect(f(testScheme)).ToNot(HaveOccurred()) } @@ -307,6 +309,15 @@ func (c ClusterInfoMock) IsConsolePluginImageProvided() bool { func (c ClusterInfoMock) IsMonitoringAvailable() bool { return true } +func (c ClusterInfoMock) IsDeschedulerAvailable() bool { + return true +} +func (c ClusterInfoMock) IsDeschedulerCRDDeployed(_ context.Context, _ client.Client) bool { + return true +} +func (c ClusterInfoMock) IsDeschedulerMisconfigured() bool { + return false +} func (c ClusterInfoMock) IsSingleStackIPv6() bool { return true } @@ -330,6 +341,9 @@ func (ClusterInfoMock) GetTLSSecurityProfile(_ *openshiftconfigv1.TLSSecurityPro func (ClusterInfoMock) RefreshAPIServerCR(_ context.Context, _ client.Client) error { return nil } +func (ClusterInfoMock) RefreshDeschedulerCR(_ context.Context, _ client.Client) error { + return nil +} // ClusterInfoSNOMock mocks Openshift SNO type ClusterInfoSNOMock struct{} @@ -378,12 +392,24 @@ func (ClusterInfoSNOMock) GetTLSSecurityProfile(_ *openshiftconfigv1.TLSSecurity func (ClusterInfoSNOMock) RefreshAPIServerCR(_ context.Context, _ client.Client) error { return nil } +func (ClusterInfoSNOMock) RefreshDeschedulerCR(_ context.Context, _ client.Client) error { + return nil +} func (ClusterInfoSNOMock) IsConsolePluginImageProvided() bool { return true } func (c ClusterInfoSNOMock) IsMonitoringAvailable() bool { return true } +func (c ClusterInfoSNOMock) IsDeschedulerAvailable() bool { + return true +} +func (c ClusterInfoSNOMock) IsDeschedulerCRDDeployed(_ context.Context, _ client.Client) bool { + return true +} +func (c ClusterInfoSNOMock) IsDeschedulerMisconfigured() bool { + return false +} func (c ClusterInfoSNOMock) IsSingleStackIPv6() bool { return true } @@ -432,7 +458,16 @@ func (ClusterInfoSRCPHAIMock) IsConsolePluginImageProvided() bool { func (ClusterInfoSRCPHAIMock) IsMonitoringAvailable() bool { return true } -func (m ClusterInfoSRCPHAIMock) IsSingleStackIPv6() bool { +func (ClusterInfoSRCPHAIMock) IsDeschedulerAvailable() bool { + return true +} +func (ClusterInfoSRCPHAIMock) IsDeschedulerCRDDeployed(_ context.Context, _ client.Client) bool { + return true +} +func (ClusterInfoSRCPHAIMock) IsDeschedulerMisconfigured() bool { + return false +} +func (ClusterInfoSRCPHAIMock) IsSingleStackIPv6() bool { return true } func (ClusterInfoSRCPHAIMock) GetTLSSecurityProfile(_ *openshiftconfigv1.TLSSecurityProfile) *openshiftconfigv1.TLSSecurityProfile { @@ -444,6 +479,9 @@ func (ClusterInfoSRCPHAIMock) GetTLSSecurityProfile(_ *openshiftconfigv1.TLSSecu func (ClusterInfoSRCPHAIMock) RefreshAPIServerCR(_ context.Context, _ client.Client) error { return nil } +func (ClusterInfoSRCPHAIMock) RefreshDeschedulerCR(_ context.Context, _ client.Client) error { + return nil +} func KeysFromSSMap(ssmap map[string]string) gstruct.Keys { keys := gstruct.Keys{} diff --git a/controllers/crd/crd_controller.go b/controllers/crd/crd_controller.go new file mode 100644 index 0000000000..8db569e02d --- /dev/null +++ b/controllers/crd/crd_controller.go @@ -0,0 +1,103 @@ +package crd + +import ( + "context" + + operatorhandler "github.com/operator-framework/operator-lib/handler" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util" +) + +var ( + log = logf.Log.WithName("controller_crd") +) + +// RegisterReconciler creates a new Descheduler Reconciler and registers it into manager. +func RegisterReconciler(mgr manager.Manager, restartCh chan<- struct{}) error { + return add(mgr, newReconciler(mgr, restartCh)) +} + +// newReconciler returns a new reconcile.Reconciler +func newReconciler(mgr manager.Manager, restartCh chan<- struct{}) reconcile.Reconciler { + + r := &ReconcileCRD{ + client: mgr.GetClient(), + restartCh: restartCh, + eventEmitter: hcoutil.GetEventEmitter(), + } + + return r +} + +// add adds a new Controller to mgr with r as the reconcile.Reconciler +func add(mgr manager.Manager, r reconcile.Reconciler) error { + + // Create a new controller + c, err := controller.New("crd-controller", mgr, controller.Options{Reconciler: r}) + if err != nil { + return err + } + + // Watch for changes to selected (by name) CRDs + // look only at descheduler CRD for now + err = c.Watch( + source.Kind( + mgr.GetCache(), client.Object(&apiextensionsv1.CustomResourceDefinition{}), + &operatorhandler.InstrumentedEnqueueRequestForObject[client.Object]{}, + predicate.NewPredicateFuncs(func(object client.Object) bool { + switch object.GetName() { + case hcoutil.DeschedulerCRDName: + return true + } + return false + }), + )) + if err != nil { + return err + } + + return nil +} + +// ReconcileCRD reconciles a CRD object +type ReconcileCRD struct { + // This client, initialized using mgr.Client() above, is a split client + // that reads objects from the cache and writes to the apiserver + client client.Client + eventEmitter hcoutil.EventEmitter + restartCh chan<- struct{} +} + +// operatorRestart triggers a restart of the operator: +// the controller-runtime caching client can only handle kinds +// that were already defined when the client cache got initialized. +// If a new relevant kind got deployed at runtime, +// the operator should restart to be able to read it. +// See: https://github.com/kubernetes-sigs/controller-runtime/issues/2456 +func (r *ReconcileCRD) operatorRestart() { + r.restartCh <- struct{}{} +} + +// Reconcile refreshes KubeDesheduler view on ClusterInfo singleton +func (r *ReconcileCRD) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) { + + log.Info("Triggered by a CRD") + if !hcoutil.GetClusterInfo().IsDeschedulerAvailable() { + if hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(ctx, r.client) { + log.Info("KubeDescheduler CRD got deployed, restarting the operator to reconfigure the operator for the new kind") + r.eventEmitter.EmitEvent(nil, corev1.EventTypeNormal, "KubeDescheduler CRD got deployed, restarting the operator to reconfigure the operator for the new kind", "Restarting the operator to be able to read KubeDescheduler CRs ") + r.operatorRestart() + } + } + + return reconcile.Result{}, nil +} diff --git a/controllers/crd/crd_controller_test.go b/controllers/crd/crd_controller_test.go new file mode 100644 index 0000000000..ada4bba6e5 --- /dev/null +++ b/controllers/crd/crd_controller_test.go @@ -0,0 +1,245 @@ +package crd + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + openshiftconfigv1 "github.com/openshift/api/config/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/kubevirt/hyperconverged-cluster-operator/controllers/commontestutils" + hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util" +) + +// Mock TestRequest to simulate Reconcile() being called on an event for a watched resource +var ( + deschedulerRequest = reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: hcoutil.DeschedulerCRDName, + }, + } + otherRequest = reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: "other", + }, + } +) + +var _ = Describe("CRDController", func() { + + getClusterInfo := hcoutil.GetClusterInfo + + Describe("Reconcile KubeDescheduler", func() { + + Context("Descheduler CRD", func() { + + externalClusterInfo := hcoutil.GetClusterInfo + + logger := zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)).WithName("descheduler_controller_test") + + BeforeEach(func() { + hcoutil.GetClusterInfo = getClusterInfo + }) + + AfterEach(func() { + hcoutil.GetClusterInfo = externalClusterInfo + }) + + clusterObjects := []client.Object{ + &openshiftconfigv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "version", + }, + Spec: openshiftconfigv1.ClusterVersionSpec{ + ClusterID: "clusterId", + }, + }, + &openshiftconfigv1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Status: openshiftconfigv1.InfrastructureStatus{ + ControlPlaneTopology: openshiftconfigv1.HighlyAvailableTopologyMode, + InfrastructureTopology: openshiftconfigv1.HighlyAvailableTopologyMode, + PlatformStatus: &openshiftconfigv1.PlatformStatus{ + Type: "mocked", + }, + }, + }, + &openshiftconfigv1.Ingress{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Spec: openshiftconfigv1.IngressSpec{ + Domain: "domain", + }, + }, + &openshiftconfigv1.DNS{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Spec: openshiftconfigv1.DNSSpec{ + BaseDomain: commontestutils.BaseDomain, + }, + }, + &openshiftconfigv1.Network{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Status: openshiftconfigv1.NetworkStatus{ + ClusterNetwork: []openshiftconfigv1.ClusterNetworkEntry{ + { + CIDR: "10.128.0.0/14", + }, + }, + }, + }, + &openshiftconfigv1.APIServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Spec: openshiftconfigv1.APIServerSpec{}, + }, + } + + It("Should trigger a restart of the operator if KubeDescheduler was not there and it appeared", func() { + + cl := commontestutils.InitClient(clusterObjects) + Expect(hcoutil.GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(hcoutil.GetClusterInfo().IsDeschedulerAvailable()).To(BeFalse(), "KubeDescheduler is not installed") + Expect(hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeFalse(), "KubeDescheduler is not installed") + + testCh := make(chan struct{}, 1) + + r := &ReconcileCRD{ + client: cl, + restartCh: testCh, + eventEmitter: commontestutils.NewEventEmitterMock(), + } + + deschedulerCRD := &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: hcoutil.DeschedulerCRDName, + }, + } + + err := cl.Create(context.TODO(), deschedulerCRD) + Expect(err).NotTo(HaveOccurred()) + Expect(hcoutil.GetClusterInfo().IsDeschedulerAvailable()).To(BeFalse(), "When the operator started the KubeDescheduler wasn't available") + Expect(hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "KubeDescheduler is now installed") + + res, err := r.Reconcile(context.Background(), deschedulerRequest) + Expect(err).ToNot(HaveOccurred()) + Expect(res.Requeue).To(BeFalse()) + Expect(res).To(Equal(reconcile.Result{})) + Eventually(testCh).Should(Receive()) + + }) + + It("Should not trigger a restart of the operator if KubeDescheduler was not there and another CRD appeared", func() { + + cl := commontestutils.InitClient(clusterObjects) + Expect(hcoutil.GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(hcoutil.GetClusterInfo().IsDeschedulerAvailable()).To(BeFalse(), "KubeDescheduler is not installed") + Expect(hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeFalse(), "KubeDescheduler is not installed") + + testCh := make(chan struct{}, 1) + + r := &ReconcileCRD{ + client: cl, + restartCh: testCh, + eventEmitter: commontestutils.NewEventEmitterMock(), + } + + Expect(hcoutil.GetClusterInfo().IsDeschedulerAvailable()).To(BeFalse(), "When the operator started the KubeDescheduler wasn't available") + Expect(hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeFalse(), "KubeDescheduler is still not installed") + + res, err := r.Reconcile(context.Background(), otherRequest) + Expect(err).ToNot(HaveOccurred()) + Expect(res.Requeue).To(BeFalse()) + Expect(res).To(Equal(reconcile.Result{})) + Consistently(testCh).Should(Not(Receive())) + + }) + + It("Should not trigger a restart of the operator if KubeDescheduler was already there and its CRD got updated", func() { + + deschedulerCRD := &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: hcoutil.DeschedulerCRDName, + }, + } + clusterObjects := append(clusterObjects, deschedulerCRD) + + cl := commontestutils.InitClient(clusterObjects) + Expect(hcoutil.GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(hcoutil.GetClusterInfo().IsDeschedulerAvailable()).To(BeTrue(), "KubeDescheduler is alredy installed") + Expect(hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "KubeDescheduler is already installed") + + testCh := make(chan struct{}, 1) + + r := &ReconcileCRD{ + client: cl, + restartCh: testCh, + eventEmitter: commontestutils.NewEventEmitterMock(), + } + + Expect(hcoutil.GetClusterInfo().IsDeschedulerAvailable()).To(BeTrue(), "When the operator started the KubeDescheduler was already available") + Expect(hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "KubeDescheduler is already installed") + + res, err := r.Reconcile(context.Background(), deschedulerRequest) + Expect(err).ToNot(HaveOccurred()) + Expect(res.Requeue).To(BeFalse()) + Expect(res).To(Equal(reconcile.Result{})) + Consistently(testCh).Should(Not(Receive())) + + }) + + It("Should not trigger a restart of the operator if KubeDescheduler was already there and another CRD got updated", func() { + + deschedulerCRD := &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: hcoutil.DeschedulerCRDName, + }, + } + clusterObjects := append(clusterObjects, deschedulerCRD) + + cl := commontestutils.InitClient(clusterObjects) + Expect(hcoutil.GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(hcoutil.GetClusterInfo().IsDeschedulerAvailable()).To(BeTrue(), "KubeDescheduler is alredy installed") + Expect(hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "KubeDescheduler is already installed") + + testCh := make(chan struct{}, 1) + + r := &ReconcileCRD{ + client: cl, + restartCh: testCh, + eventEmitter: commontestutils.NewEventEmitterMock(), + } + + Expect(hcoutil.GetClusterInfo().IsDeschedulerAvailable()).To(BeTrue(), "When the operator started the KubeDescheduler was already available") + Expect(hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "KubeDescheduler is already installed") + + res, err := r.Reconcile(context.Background(), otherRequest) + Expect(err).ToNot(HaveOccurred()) + Expect(res.Requeue).To(BeFalse()) + Expect(res).To(Equal(reconcile.Result{})) + Consistently(testCh).Should(Not(Receive())) + + }) + + }) + + }) +}) diff --git a/controllers/crd/crd_suite_test.go b/controllers/crd/crd_suite_test.go new file mode 100644 index 0000000000..19c8ce765c --- /dev/null +++ b/controllers/crd/crd_suite_test.go @@ -0,0 +1,13 @@ +package crd_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestCrd(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "CRD Suite") +} diff --git a/controllers/descheduler/descheduler_controller.go b/controllers/descheduler/descheduler_controller.go new file mode 100644 index 0000000000..231825fa8b --- /dev/null +++ b/controllers/descheduler/descheduler_controller.go @@ -0,0 +1,79 @@ +package descheduler + +import ( + "context" + + deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" + operatorhandler "github.com/operator-framework/operator-lib/handler" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + "github.com/kubevirt/hyperconverged-cluster-operator/pkg/monitoring/metrics" + hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util" +) + +var ( + log = logf.Log.WithName("controller_descheduler") +) + +// RegisterReconciler creates a new Descheduler Reconciler and registers it into manager. +func RegisterReconciler(mgr manager.Manager) error { + return add(mgr, newReconciler(mgr)) +} + +// newReconciler returns a new reconcile.Reconciler +func newReconciler(mgr manager.Manager) reconcile.Reconciler { + + r := &ReconcileDescheduler{ + client: mgr.GetClient(), + } + + return r +} + +// add adds a new Controller to mgr with r as the reconcile.Reconciler +func add(mgr manager.Manager, r reconcile.Reconciler) error { + // Create a new controller + c, err := controller.New("kubedescheduler-controller", mgr, controller.Options{Reconciler: r}) + if err != nil { + return err + } + + // Watch for changes to KubeDeschedulers + err = c.Watch( + source.Kind( + mgr.GetCache(), client.Object(&deschedulerv1.KubeDescheduler{}), + &operatorhandler.InstrumentedEnqueueRequestForObject[client.Object]{}, + predicate.Or[client.Object](predicate.GenerationChangedPredicate{}, predicate.AnnotationChangedPredicate{}), + )) + if err != nil { + return err + } + + return nil +} + +// ReconcileDescheduler reconciles a KubeDescheduler object +type ReconcileDescheduler struct { + // This client, initialized using mgr.Client() above, is a split client + // that reads objects from the cache and writes to the apiserver + client client.Client +} + +// Reconcile refreshes KubeDesheduler view on ClusterInfo singleton +func (r *ReconcileDescheduler) Reconcile(ctx context.Context, _ reconcile.Request) (reconcile.Result, error) { + + log.Info("Triggered by Descheduler CR, refreshing it") + err := hcoutil.GetClusterInfo().RefreshDeschedulerCR(ctx, r.client) + if err != nil { + return reconcile.Result{}, err + } + metrics.SetHCOMetricMisconfiguredDescheduler(hcoutil.GetClusterInfo().IsDeschedulerMisconfigured()) + + return reconcile.Result{}, nil +} diff --git a/controllers/descheduler/descheduler_controller_test.go b/controllers/descheduler/descheduler_controller_test.go new file mode 100644 index 0000000000..9dae413d36 --- /dev/null +++ b/controllers/descheduler/descheduler_controller_test.go @@ -0,0 +1,199 @@ +package descheduler + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + openshiftconfigv1 "github.com/openshift/api/config/v1" + + deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" + corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/kubevirt/hyperconverged-cluster-operator/controllers/commontestutils" + hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util" +) + +// Mock TestRequest to simulate Reconcile() being called on an event for a watched resource +var ( + request = reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: hcoutil.DeschedulerCRName, + Namespace: hcoutil.DeschedulerNamespace, + }, + } +) + +var _ = Describe("DeschedulerController", func() { + + getClusterInfo := hcoutil.GetClusterInfo + + Describe("Reconcile KubeDescheduler", func() { + + BeforeEach(func() { + hcoutil.GetClusterInfo = func() hcoutil.ClusterInfo { + return commontestutils.ClusterInfoMock{} + } + }) + + AfterEach(func() { + hcoutil.GetClusterInfo = getClusterInfo + }) + + Context("KubeDescheduler CR", func() { + + externalClusterInfo := hcoutil.GetClusterInfo + + BeforeEach(func() { + hcoutil.GetClusterInfo = getClusterInfo + }) + + AfterEach(func() { + hcoutil.GetClusterInfo = externalClusterInfo + }) + + It("Should refresh cached KubeDescheduler if the reconciliation is caused by a change there", func() { + + clusterVersion := &openshiftconfigv1.ClusterVersion{ + ObjectMeta: metav1.ObjectMeta{ + Name: "version", + }, + Spec: openshiftconfigv1.ClusterVersionSpec{ + ClusterID: "clusterId", + }, + } + + infrastructure := &openshiftconfigv1.Infrastructure{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Status: openshiftconfigv1.InfrastructureStatus{ + ControlPlaneTopology: openshiftconfigv1.HighlyAvailableTopologyMode, + InfrastructureTopology: openshiftconfigv1.HighlyAvailableTopologyMode, + PlatformStatus: &openshiftconfigv1.PlatformStatus{ + Type: "mocked", + }, + }, + } + + ingress := &openshiftconfigv1.Ingress{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Spec: openshiftconfigv1.IngressSpec{ + Domain: "domain", + }, + } + + dns := &openshiftconfigv1.DNS{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Spec: openshiftconfigv1.DNSSpec{ + BaseDomain: commontestutils.BaseDomain, + }, + } + + ipv4network := &openshiftconfigv1.Network{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Status: openshiftconfigv1.NetworkStatus{ + ClusterNetwork: []openshiftconfigv1.ClusterNetworkEntry{ + { + CIDR: "10.128.0.0/14", + }, + }, + }, + } + + apiServer := &openshiftconfigv1.APIServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Spec: openshiftconfigv1.APIServerSpec{}, + } + + deschedulerCRD := &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: hcoutil.DeschedulerCRDName, + }, + } + + deschedulerNamespace := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: hcoutil.DeschedulerNamespace, + Annotations: map[string]string{ + hcoutil.OpenshiftNodeSelectorAnn: "", + }, + }, + } + + descheduler := &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: hcoutil.DeschedulerCRName, + Namespace: hcoutil.DeschedulerNamespace, + }, + Spec: deschedulerv1.KubeDeschedulerSpec{}, + } + + resources := []client.Object{deschedulerCRD, deschedulerNamespace, descheduler, clusterVersion, infrastructure, ingress, dns, ipv4network, apiServer} + cl := commontestutils.InitClient(resources) + + logger := zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)).WithName("descheduler_controller_test") + Expect(hcoutil.GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(hcoutil.GetClusterInfo().IsDeschedulerMisconfigured()).To(BeTrue(), "default KubeDescheduler should not fit KubeVirt") + + r := &ReconcileDescheduler{ + client: cl, + } + + // Reconcile to get all related objects under HCO's status + res, err := r.Reconcile(context.TODO(), request) + Expect(err).ToNot(HaveOccurred()) + Expect(res.Requeue).To(BeFalse()) + Expect(res).To(Equal(reconcile.Result{})) + + // Update KubeDescheduler CR + descheduler.Spec.ProfileCustomizations = &deschedulerv1.ProfileCustomizations{ + DevEnableEvictionsInBackground: true, + } + Expect(cl.Update(context.TODO(), descheduler)).To(Succeed()) + Expect(hcoutil.GetClusterInfo().IsDeschedulerMisconfigured()).To(BeTrue(), "should still return the cached value (initial value)") + + // Reconcile again to refresh KubeDescheduler CR in memory + res, err = r.Reconcile(context.TODO(), request) + Expect(err).ToNot(HaveOccurred()) + Expect(res.Requeue).To(BeFalse()) + Expect(res).To(Equal(reconcile.Result{})) + + Expect(hcoutil.GetClusterInfo().IsDeschedulerMisconfigured()).To(BeFalse(), "should return the up-to-date value") + + // Update again the KubeDescheduler CR + descheduler.Spec.ProfileCustomizations = &deschedulerv1.ProfileCustomizations{ + DevEnableEvictionsInBackground: false, + } + Expect(cl.Update(context.TODO(), descheduler)).To(Succeed()) + Expect(hcoutil.GetClusterInfo().IsDeschedulerMisconfigured()).To(BeFalse(), "should still return the cached value (previous value)") + + // Reconcile again to refresh KubeDescheduler CR in memory + res, err = r.Reconcile(context.TODO(), request) + Expect(err).ToNot(HaveOccurred()) + Expect(res.Requeue).To(BeFalse()) + Expect(res).To(Equal(reconcile.Result{})) + + Expect(hcoutil.GetClusterInfo().IsDeschedulerMisconfigured()).To(BeTrue(), "should return a different up-to-date value") + + }) + + }) + + }) +}) diff --git a/controllers/descheduler/descheduler_suite_test.go b/controllers/descheduler/descheduler_suite_test.go new file mode 100644 index 0000000000..ee87072996 --- /dev/null +++ b/controllers/descheduler/descheduler_suite_test.go @@ -0,0 +1,13 @@ +package descheduler_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestDescheduler(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Descheduler Suite") +} diff --git a/deploy/cluster_role.yaml b/deploy/cluster_role.yaml index c43e3e5d4d..07a3317398 100644 --- a/deploy/cluster_role.yaml +++ b/deploy/cluster_role.yaml @@ -984,6 +984,14 @@ rules: - get - list - watch +- apiGroups: + - operator.openshift.io + resources: + - kubedeschedulers + verbs: + - get + - list + - watch - apiGroups: - config.openshift.io resources: diff --git a/deploy/index-image/community-kubevirt-hyperconverged/1.13.0/manifests/kubevirt-hyperconverged-operator.v1.13.0.clusterserviceversion.yaml b/deploy/index-image/community-kubevirt-hyperconverged/1.13.0/manifests/kubevirt-hyperconverged-operator.v1.13.0.clusterserviceversion.yaml index 629e19bf55..6f592a05a3 100644 --- a/deploy/index-image/community-kubevirt-hyperconverged/1.13.0/manifests/kubevirt-hyperconverged-operator.v1.13.0.clusterserviceversion.yaml +++ b/deploy/index-image/community-kubevirt-hyperconverged/1.13.0/manifests/kubevirt-hyperconverged-operator.v1.13.0.clusterserviceversion.yaml @@ -454,6 +454,14 @@ spec: - get - list - watch + - apiGroups: + - operator.openshift.io + resources: + - kubedeschedulers + verbs: + - get + - list + - watch - apiGroups: - config.openshift.io resources: diff --git a/deploy/olm-catalog/community-kubevirt-hyperconverged/1.13.0/manifests/kubevirt-hyperconverged-operator.v1.13.0.clusterserviceversion.yaml b/deploy/olm-catalog/community-kubevirt-hyperconverged/1.13.0/manifests/kubevirt-hyperconverged-operator.v1.13.0.clusterserviceversion.yaml index 5b1a091d4e..933f7606db 100644 --- a/deploy/olm-catalog/community-kubevirt-hyperconverged/1.13.0/manifests/kubevirt-hyperconverged-operator.v1.13.0.clusterserviceversion.yaml +++ b/deploy/olm-catalog/community-kubevirt-hyperconverged/1.13.0/manifests/kubevirt-hyperconverged-operator.v1.13.0.clusterserviceversion.yaml @@ -454,6 +454,14 @@ spec: - get - list - watch + - apiGroups: + - operator.openshift.io + resources: + - kubedeschedulers + verbs: + - get + - list + - watch - apiGroups: - config.openshift.io resources: diff --git a/docs/cluster-configuration.md b/docs/cluster-configuration.md index 696ead1f75..56932fcc6f 100644 --- a/docs/cluster-configuration.md +++ b/docs/cluster-configuration.md @@ -1551,3 +1551,32 @@ To enable this `tuningPolicy` profile, the following patch may be applied: ```bash kubectl patch -n kubevirt-hyperconverged hco kubevirt-hyperconverged --type=json -p='[{"op": "add", "path": "/spec/tuningPolicy", "value": "highBurst"}]' ``` + +## Kube Descheduler integration +A [Descheduler](https://github.com/kubernetes-sigs/descheduler) is a Kubernetes application that causes the control plane to re-arrange the workloads in a better way. +It operates every pre-defined period and goes back to sleep after it had performed its job. + +The descheduler uses the Kubernetes [eviction API](https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/) to evict pods, and receives feedback from `kube-api` whether the eviction request was granted or not. +On the other side, in order to keep VM live and trigger live-migration, KubeVirt handles eviction requests in a custom way and unfortunately a live migration takes time. +So from the descheduler's point of view, `virt-launcher` pods fail to be evicted, but they actually migrating to another node in background. +The descheduler notes the failure to evict the `virt-launcher` pod and keeps trying to evict other pods, typically resulting in it attempting to evict substantially all `virt-launcher` pods from the node triggering a migration storm. +In other words, the way KubeVirt handles eviction requests causes the descheduler to make wrong decisions and take wrong actions that could destabilize the cluster. +Using the descheduler operator with the `LowNodeUtilization` strategy results in unstable/oscillatory behavior if the descheduler is used in this way to migrate VMs. +To correctly handle the special case of `VM` pod evicted triggering a live migration to another node, the `Kube Descheduler Operator` introduced a `profileCustomizations` named `devEnableEvictionsInBackground` +which is currently considered an `alpha` [feature](https://github.com/kubernetes-sigs/descheduler/tree/master/keps/1397-evictions-in-background) on `Kube Descheduler Operator` side. +to prevent unexpected behaviours, if the `Kube Descheduler Operator` is installed and configured alongside `HCO`, `HCO` will check its configuration looking for the presence of `devEnableEvictionsInBackground` `profileCustomizations` eventually +suggesting to the cluster admin to fix the configuration of the `Kube Descheduler Operator` via an `alert` and its linked `runbook`. + +In order to fix the configuration of the `Kube Descheduler Operator` to be suitable also for the KubeVirt use case, +something like: +```yaml +apiVersion: operator.openshift.io/v1 +kind: KubeDescheduler +metadata: + name: cluster + namespace: openshift-kube-descheduler-operator +spec: + profileCustomizations: + devEnableEvictionsInBackground: true +``` +should be merged in its configuration. diff --git a/docs/metrics.md b/docs/metrics.md index 7717028043..13029b3675 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -9,6 +9,9 @@ Monitors resources for potential problems. Type: Gauge. ### kubevirt_hco_hyperconverged_cr_exists Indicates whether the HyperConverged custom resource exists (1) or not (0). Type: Gauge. +### kubevirt_hco_misconfigured_descheduler +Indicates whether the optional descheduler is not properly configured (1) to work with KubeVirt or not (0). Type: Gauge. + ### kubevirt_hco_out_of_band_modifications_total Count of out-of-band modifications overwritten by HCO. Type: Counter. diff --git a/go.mod b/go.mod index 2b4cea20c1..abdb6b6905 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/kubevirt/hyperconverged-cluster-operator -go 1.22.4 +go 1.22.5 require ( dario.cat/mergo v1.0.1 @@ -16,6 +16,7 @@ require ( github.com/onsi/ginkgo/v2 v2.20.2 github.com/onsi/gomega v1.34.2 github.com/openshift/api v3.9.1-0.20190517100836-d5b34b957e91+incompatible + github.com/openshift/cluster-kube-descheduler-operator v0.0.0-20240916113608-1a30f3be33fa github.com/openshift/custom-resource-status v1.1.2 github.com/openshift/library-go v0.0.0-20240830130947-d9523164b328 github.com/operator-framework/api v0.27.0 diff --git a/go.sum b/go.sum index dcf221eabb..1ae15975f8 100644 --- a/go.sum +++ b/go.sum @@ -227,6 +227,8 @@ github.com/onsi/gomega v1.34.2 h1:pNCwDkzrsv7MS9kpaQvVb1aVLahQXyJ/Tv5oAZMI3i8= github.com/onsi/gomega v1.34.2/go.mod h1:v1xfxRgk0KIsG+QOdm7p8UosrOzPYRo60fd3B/1Dukc= github.com/openshift/api v0.0.0-20230503133300-8bbcb7ca7183 h1:t/CahSnpqY46sQR01SoS+Jt0jtjgmhgE6lFmRnO4q70= github.com/openshift/api v0.0.0-20230503133300-8bbcb7ca7183/go.mod h1:4VWG+W22wrB4HfBL88P40DxLEpSOaiBVxUnfalfJo9k= +github.com/openshift/cluster-kube-descheduler-operator v0.0.0-20240916113608-1a30f3be33fa h1:bLljfvA1G/n60SmMCkrV9ydlUX711WTYIXEO/JbMZQw= +github.com/openshift/cluster-kube-descheduler-operator v0.0.0-20240916113608-1a30f3be33fa/go.mod h1:vq3hTEopoGcGHa64WQ1LTbyjDqKQhSGNgj9NOIu6nK4= github.com/openshift/custom-resource-status v1.1.2 h1:C3DL44LEbvlbItfd8mT5jWrqPfHnSOQoQf/sypqA6A4= github.com/openshift/custom-resource-status v1.1.2/go.mod h1:DB/Mf2oTeiAmVVX1gN+NEqweonAPY0TKUwADizj8+ZA= github.com/openshift/library-go v0.0.0-20240830130947-d9523164b328 h1:CEEgSC4+IVv/DbVv0Mbnx1cu5iOB5W2e7UuKGuHxcXs= diff --git a/hack/kubeDescheduler/deploy_fake_kubedescheduler.sh b/hack/kubeDescheduler/deploy_fake_kubedescheduler.sh new file mode 100755 index 0000000000..b2fb6d301c --- /dev/null +++ b/hack/kubeDescheduler/deploy_fake_kubedescheduler.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# +# Configures, for testing purposes, fake kubeDesheduler CRD and CR to mimic kubeDesheduler APIs when not there +# + +set -ex + +readonly SCRIPT_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") + +KUBEDES_CRD_NUM=$( + oc get crds --field-selector=metadata.name=kubedeschedulers.operator.openshift.io -o=name | wc -l +) + +if [[ "KUBEDES_CRD_NUM" -eq 0 ]]; then + echo "Create a CRD for a fake KubeDescheduler" + oc apply -f "${SCRIPT_DIR}/kube-descheduler-operator.crd.yaml" + KUBEDES_NS_NUM=$( + oc get namespaces --field-selector=metadata.name=openshift-kube-descheduler-operator -o=name | wc -l + ) + if [[ "KUBEDES_NS_NUM" -eq 0 ]]; then + echo "Creating a namespace for KubeDescheduler" + oc create namespace openshift-kube-descheduler-operator + fi + KUBEDES_CR_NUM=$( + oc get kubedeschedulers -n=openshift-kube-descheduler-operator --field-selector=metadata.name=cluster -o=name | wc -l + ) + if [[ "KUBEDES_CR_NUM" -eq 0 ]]; then + echo "Create a CR for a fake KubeDescheduler" + oc apply -f "${SCRIPT_DIR}/kubeDescheduler.cr.yaml" + fi +fi diff --git a/hack/kubeDescheduler/kube-descheduler-operator.crd.yaml b/hack/kubeDescheduler/kube-descheduler-operator.crd.yaml new file mode 100644 index 0000000000..d110534477 --- /dev/null +++ b/hack/kubeDescheduler/kube-descheduler-operator.crd.yaml @@ -0,0 +1,275 @@ +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: kubedeschedulers.operator.openshift.io +spec: + group: operator.openshift.io + names: + kind: KubeDescheduler + listKind: KubeDeschedulerList + plural: kubedeschedulers + singular: kubedescheduler + scope: Namespaced + versions: + - name: v1 + schema: + openAPIV3Schema: + description: KubeDescheduler is the Schema for the deschedulers API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: spec holds user settable values for configuration + properties: + deschedulingIntervalSeconds: + description: DeschedulingIntervalSeconds is the number of seconds + between descheduler runs + format: int32 + type: integer + evictionLimits: + description: evictionLimits restrict the number of evictions during + each descheduling run + properties: + total: + description: total restricts the maximum number of overall evictions + format: int32 + type: integer + type: object + logLevel: + default: Normal + description: |- + logLevel is an intent based logging for an overall component. It does not give fine grained control, but it is a + simple way to manage coarse grained logging choices that operators have to interpret for their operands. + + + Valid values are: "Normal", "Debug", "Trace", "TraceAll". + Defaults to "Normal". + enum: + - "" + - Normal + - Debug + - Trace + - TraceAll + type: string + managementState: + description: managementState indicates whether and how the operator + should manage the component + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + mode: + default: Predictive + description: Mode configures the descheduler to either evict pods + (Automatic) or to simulate the eviction (Predictive) + enum: + - Automatic + - Predictive + type: string + observedConfig: + description: |- + observedConfig holds a sparse config that controller has observed from the cluster state. It exists in spec because + it is an input to the level for the operator + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + operatorLogLevel: + default: Normal + description: |- + operatorLogLevel is an intent based logging for the operator itself. It does not give fine grained control, but it is a + simple way to manage coarse grained logging choices that operators have to interpret for themselves. + + + Valid values are: "Normal", "Debug", "Trace", "TraceAll". + Defaults to "Normal". + enum: + - "" + - Normal + - Debug + - Trace + - TraceAll + type: string + profileCustomizations: + description: ProfileCustomizations contains various parameters for + modifying the default behavior of certain profiles + properties: + devEnableEvictionsInBackground: + description: |- + DevEnableEvictionsInBackground enables descheduler's EvictionsInBackground alpha feature. + The EvictionsInBackground alpha feature is a subject to change. + Currently provided as an experimental feature. + type: boolean + devHighNodeUtilizationThresholds: + description: |- + devHighNodeUtilizationThresholds enumerates thresholds for node utilization levels. + The threshold values are subject to change. + Currently provided as an experimental feature. + enum: + - Minimal + - Modest + - Moderate + - "" + type: string + devLowNodeUtilizationThresholds: + description: LowNodeUtilizationThresholds enumerates predefined + experimental thresholds + enum: + - Low + - Medium + - High + - "" + type: string + namespaces: + description: |- + Namespaces overrides included and excluded namespaces while keeping + the default exclusion of all openshift-*, kube-system and hypershift namespaces + properties: + excluded: + items: + type: string + type: array + included: + items: + type: string + type: array + type: object + podLifetime: + description: |- + PodLifetime is the length of time after which pods should be evicted + This field should be used with profiles that enable the PodLifetime strategy, such as LifecycleAndUtilization + format: duration + type: string + thresholdPriority: + description: |- + ThresholdPriority when set will reject eviction of any pod with priority equal or higher + It is invalid to set it alongside ThresholdPriorityClassName + format: int32 + type: integer + thresholdPriorityClassName: + description: |- + ThresholdPriorityClassName when set will reject eviction of any pod with priority equal or higher + It is invalid to set it alongside ThresholdPriority + type: string + type: object + profiles: + description: Profiles sets which descheduler strategy profiles are + enabled + items: + description: |- + DeschedulerProfile allows configuring the enabled strategy profiles for the descheduler + it allows multiple profiles to be enabled at once, which will have cumulative effects on the cluster. + enum: + - AffinityAndTaints + - TopologyAndDuplicates + - LifecycleAndUtilization + - DevPreviewLongLifecycle + - LongLifecycle + - SoftTopologyAndDuplicates + - EvictPodsWithLocalStorage + - EvictPodsWithPVC + - CompactAndScale + type: string + type: array + unsupportedConfigOverrides: + description: |- + unsupportedConfigOverrides overrides the final configuration that was computed by the operator. + Red Hat does not support the use of this field. + Misuse of this field could lead to unexpected behavior or conflict with other configuration options. + Seek guidance from the Red Hat support before using this field. + Use of this property blocks cluster upgrades, it must be removed before upgrading your cluster. + nullable: true + type: object + x-kubernetes-preserve-unknown-fields: true + type: object + status: + description: status holds observed values from the cluster. They may not + be overridden. + properties: + conditions: + description: conditions is a list of conditions and their status + items: + description: OperatorCondition is just the standard condition fields. + properties: + lastTransitionTime: + format: date-time + type: string + message: + type: string + reason: + type: string + status: + type: string + type: + type: string + required: + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + generations: + description: generations are used to determine when an item needs + to be reconciled or has changed in a way that needs a reaction. + items: + description: GenerationStatus keeps track of the generation for + a given resource so that decisions about forced updates can be + made. + properties: + group: + description: group is the group of the thing you're tracking + type: string + hash: + description: hash is an optional field set for resources without + generation that are content sensitive like secrets and configmaps + type: string + lastGeneration: + description: lastGeneration is the last generation of the workload + controller involved + format: int64 + type: integer + name: + description: name is the name of the thing you're tracking + type: string + namespace: + description: namespace is where the thing you're tracking is + type: string + resource: + description: resource is the resource type of the thing you're + tracking + type: string + type: object + type: array + x-kubernetes-list-type: atomic + observedGeneration: + description: observedGeneration is the last generation change you've + dealt with + format: int64 + type: integer + readyReplicas: + description: readyReplicas indicates how many replicas are ready and + at the desired state + format: int32 + type: integer + version: + description: version is the level this availability applies to + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/hack/kubeDescheduler/kubeDescheduler.cr.yaml b/hack/kubeDescheduler/kubeDescheduler.cr.yaml new file mode 100644 index 0000000000..4064dd0857 --- /dev/null +++ b/hack/kubeDescheduler/kubeDescheduler.cr.yaml @@ -0,0 +1,17 @@ +apiVersion: operator.openshift.io/v1 +kind: KubeDescheduler +metadata: + name: cluster + namespace: openshift-kube-descheduler-operator +spec: + deschedulingIntervalSeconds: 1800 + managementState: Managed + profiles: + - AffinityAndTaints + - LifecycleAndUtilization + profileCustomizations: + podLifetime: 5m + namespaces: + included: + - ns1 + - nsi2 diff --git a/hack/prom-rule-ci/prom-rules-tests.yaml b/hack/prom-rule-ci/prom-rules-tests.yaml index add28cfbd3..49decf236e 100755 --- a/hack/prom-rule-ci/prom-rules-tests.yaml +++ b/hack/prom-rule-ci/prom-rules-tests.yaml @@ -6,6 +6,7 @@ group_eval_order: - kubevirt.hyperconverged.rules tests: + # Test out-of-bound modification counter - interval: 1m input_series: @@ -689,3 +690,83 @@ tests: exp_samples: - labels: 'kubevirt_hyperconverged_operator_health_status{name="kubevirt-hyperconverged"}' value: 2 + + # Test kubevirt_hco_misconfigured_descheduler +- interval: 1m + input_series: + - series: 'kubevirt_hco_misconfigured_descheduler' + # time: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 + values: "stale stale 1 1 1 1 1 1 1 1 0 0 0 0 0 1 stale stale stale 1 1 1 1 1 1 1 1 1 1 1 1" + + alert_rule_test: + # No metric, no alert + - eval_time: 1m + alertname: HCOMisconfiguredDescheduler + exp_alerts: [ ] + + + # First increase must trigger an alert + - eval_time: 2m + alertname: HCOMisconfiguredDescheduler + exp_alerts: + - exp_annotations: + description: "Kube Descheduler is not correctly configured for KubeVirt" + summary: "Kube Descheduler is not correctly configured for KubeVirt" + runbook_url: "https://kubevirt.io/monitoring/runbooks/HCOMisconfiguredDescheduler" + exp_labels: + severity: "critical" + operator_health_impact: "critical" + kubernetes_operator_part_of: "kubevirt" + kubernetes_operator_component: "hyperconverged-cluster-operator" + + # The alert should disappear if the configuration gets fixed + - eval_time: 10m + alertname: HCOMisconfiguredDescheduler + exp_alerts: [ ] + + # The alert should reappear if the configuration gets broken again + - eval_time: 15m + alertname: HCOMisconfiguredDescheduler + exp_alerts: + - exp_annotations: + description: "Kube Descheduler is not correctly configured for KubeVirt" + summary: "Kube Descheduler is not correctly configured for KubeVirt" + runbook_url: "https://kubevirt.io/monitoring/runbooks/HCOMisconfiguredDescheduler" + exp_labels: + severity: "critical" + operator_health_impact: "critical" + kubernetes_operator_part_of: "kubevirt" + kubernetes_operator_component: "hyperconverged-cluster-operator" + + # No alerts if down + - eval_time: 17m + alertname: HCOMisconfiguredDescheduler + exp_alerts: [ ] + + # After restart, First increase must trigger an alert again + - eval_time: 19m + alertname: HCOMisconfiguredDescheduler + exp_alerts: + - exp_annotations: + description: "Kube Descheduler is not correctly configured for KubeVirt" + summary: "Kube Descheduler is not correctly configured for KubeVirt" + runbook_url: "https://kubevirt.io/monitoring/runbooks/HCOMisconfiguredDescheduler" + exp_labels: + severity: "critical" + operator_health_impact: "critical" + kubernetes_operator_part_of: "kubevirt" + kubernetes_operator_component: "hyperconverged-cluster-operator" + + # After restart, it should be firing until fixed or silenced + - eval_time: 30m + alertname: HCOMisconfiguredDescheduler + exp_alerts: + - exp_annotations: + description: "Kube Descheduler is not correctly configured for KubeVirt" + summary: "Kube Descheduler is not correctly configured for KubeVirt" + runbook_url: "https://kubevirt.io/monitoring/runbooks/HCOMisconfiguredDescheduler" + exp_labels: + severity: "critical" + operator_health_impact: "critical" + kubernetes_operator_part_of: "kubevirt" + kubernetes_operator_component: "hyperconverged-cluster-operator" diff --git a/pkg/components/components.go b/pkg/components/components.go index d1fce7c921..8d0661df66 100644 --- a/pkg/components/components.go +++ b/pkg/components/components.go @@ -469,6 +469,7 @@ var ( func GetClusterPermissions() []rbacv1.PolicyRule { const configOpenshiftIO = "config.openshift.io" + const operatorOpenshiftIO = "operator.openshift.io" return []rbacv1.PolicyRule{ { APIGroups: stringListToSlice(util.APIVersionGroup), @@ -555,6 +556,11 @@ func GetClusterPermissions() []rbacv1.PolicyRule { Resources: stringListToSlice("apiservers"), Verbs: stringListToSlice("get", "list", "watch"), }, + { + APIGroups: stringListToSlice(operatorOpenshiftIO), + Resources: stringListToSlice("kubedeschedulers"), + Verbs: stringListToSlice("get", "list", "watch"), + }, { APIGroups: stringListToSlice(configOpenshiftIO), Resources: stringListToSlice("dnses"), diff --git a/pkg/monitoring/metrics/infrastructure_metrics.go b/pkg/monitoring/metrics/infrastructure_metrics.go index ab4c1af386..7a0e07b10e 100644 --- a/pkg/monitoring/metrics/infrastructure_metrics.go +++ b/pkg/monitoring/metrics/infrastructure_metrics.go @@ -5,12 +5,15 @@ import ( ) const ( - singleStackIPv6True = 1.0 + singleStackIPv6True = 1.0 + misconfiguredDeschedulerTrue = 1.0 + misconfiguredDeschedulerFalse = 0.0 ) var ( infrastructureMetrics = []operatormetrics.Metric{ singleStackIpv6, + misconfiguredDescheduler, } singleStackIpv6 = operatormetrics.NewGauge( @@ -19,9 +22,24 @@ var ( Help: "Indicates whether the underlying cluster is single stack IPv6 (1) or not (0)", }, ) + + misconfiguredDescheduler = operatormetrics.NewGauge( + operatormetrics.MetricOpts{ + Name: "kubevirt_hco_misconfigured_descheduler", + Help: "Indicates whether the optional descheduler is not properly configured (1) to work with KubeVirt or not (0)", + }, + ) ) // SetHCOMetricSingleStackIPv6True sets the gauge to 1 (true) func SetHCOMetricSingleStackIPv6True() { singleStackIpv6.Set(singleStackIPv6True) } + +func SetHCOMetricMisconfiguredDescheduler(misconfigured bool) { + if misconfigured { + misconfiguredDescheduler.Set(misconfiguredDeschedulerTrue) + } else { + misconfiguredDescheduler.Set(misconfiguredDeschedulerFalse) + } +} diff --git a/pkg/monitoring/rules/alerts/operator_alerts.go b/pkg/monitoring/rules/alerts/operator_alerts.go index 8b7c021534..30102412fe 100644 --- a/pkg/monitoring/rules/alerts/operator_alerts.go +++ b/pkg/monitoring/rules/alerts/operator_alerts.go @@ -11,6 +11,7 @@ const ( unsafeModificationAlert = "UnsupportedHCOModification" installationNotCompletedAlert = "HCOInstallationIncomplete" singleStackIPv6Alert = "SingleStackIPv6Unsupported" + MisconfiguredDeschedulerAlert = "HCOMisconfiguredDescheduler" severityAlertLabelKey = "severity" healthImpactAlertLabelKey = "operator_health_impact" ) @@ -66,5 +67,17 @@ func operatorAlerts() []promv1.Rule { healthImpactAlertLabelKey: "critical", }, }, + { + Alert: MisconfiguredDeschedulerAlert, + Expr: intstr.FromString("kubevirt_hco_misconfigured_descheduler == 1"), + Annotations: map[string]string{ + "description": "Kube Descheduler is not correctly configured for KubeVirt", + "summary": "Kube Descheduler is not correctly configured for KubeVirt", + }, + Labels: map[string]string{ + severityAlertLabelKey: "critical", + healthImpactAlertLabelKey: "critical", + }, + }, } } diff --git a/pkg/util/cluster.go b/pkg/util/cluster.go index 2a2bf27c98..2a6584d04c 100644 --- a/pkg/util/cluster.go +++ b/pkg/util/cluster.go @@ -8,6 +8,7 @@ import ( "github.com/go-logr/logr" openshiftconfigv1 "github.com/openshift/api/config/v1" + deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" csvv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -35,9 +36,13 @@ type ClusterInfo interface { IsInfrastructureHighlyAvailable() bool IsConsolePluginImageProvided() bool IsMonitoringAvailable() bool + IsDeschedulerAvailable() bool + IsDeschedulerMisconfigured() bool + IsDeschedulerCRDDeployed(ctx context.Context, cl client.Client) bool IsSingleStackIPv6() bool GetTLSSecurityProfile(hcoTLSSecurityProfile *openshiftconfigv1.TLSSecurityProfile) *openshiftconfigv1.TLSSecurityProfile RefreshAPIServerCR(ctx context.Context, c client.Client) error + RefreshDeschedulerCR(ctx context.Context, c client.Client) error GetPod() *corev1.Pod GetDeployment() *appsv1.Deployment GetCSV() *csvv1alpha1.ClusterServiceVersion @@ -51,6 +56,7 @@ type ClusterInfoImp struct { infrastructureHighlyAvailable bool consolePluginImageProvided bool monitoringAvailable bool + deschedulerAvailable bool singlestackipv6 bool domain string baseDomain string @@ -61,6 +67,7 @@ type ClusterInfoImp struct { var clusterInfo ClusterInfo var validatedAPIServerTLSSecurityProfile *openshiftconfigv1.TLSSecurityProfile +var misconfiguredDescheduler bool var GetClusterInfo = func() ClusterInfo { return clusterInfo @@ -96,12 +103,22 @@ func (c *ClusterInfoImp) Init(ctx context.Context, cl client.Client, logger logr c.consolePluginImageProvided = uiPluginVarExists && len(uiPluginVarValue) > 0 && uiProxyVarExists && len(uiProxyVarValue) > 0 c.monitoringAvailable = isPrometheusExists(ctx, cl) + c.deschedulerAvailable = isDeschedulerExists(ctx, cl) + c.logger.Info("addOns ", + "monitoring", c.monitoringAvailable, + "kubeDescheduler", c.deschedulerAvailable, + ) err = c.RefreshAPIServerCR(ctx, cl) if err != nil { return err } + err = c.RefreshDeschedulerCR(ctx, cl) + if err != nil { + return err + } + c.ownResources = findOwnResources(ctx, cl, c.logger) return nil } @@ -192,6 +209,14 @@ func (c *ClusterInfoImp) IsMonitoringAvailable() bool { return c.monitoringAvailable } +func (c *ClusterInfoImp) IsDeschedulerAvailable() bool { + return c.deschedulerAvailable +} + +func (c *ClusterInfoImp) IsDeschedulerCRDDeployed(ctx context.Context, cl client.Client) bool { + return isCRDExists(ctx, cl, DeschedulerCRDName) +} + func (c *ClusterInfoImp) IsRunningLocally() bool { return c.runningLocally } @@ -260,6 +285,10 @@ func isPrometheusExists(ctx context.Context, cl client.Client) bool { return prometheusRuleCRDExists && serviceMonitorCRDExists } +func isDeschedulerExists(ctx context.Context, cl client.Client) bool { + return isCRDExists(ctx, cl, DeschedulerCRDName) +} + func isCRDExists(ctx context.Context, cl client.Client, crdName string) bool { found := &apiextensionsv1.CustomResourceDefinition{} key := client.ObjectKey{Name: crdName} @@ -318,6 +347,10 @@ func (c *ClusterInfoImp) GetTLSSecurityProfile(hcoTLSSecurityProfile *openshiftc } } +func (c *ClusterInfoImp) IsDeschedulerMisconfigured() bool { + return misconfiguredDescheduler +} + func (c *ClusterInfoImp) RefreshAPIServerCR(ctx context.Context, cl client.Client) error { if c.IsOpenshift() { instance := &openshiftconfigv1.APIServer{} @@ -335,6 +368,30 @@ func (c *ClusterInfoImp) RefreshAPIServerCR(ctx context.Context, cl client.Clien return nil } +func (c *ClusterInfoImp) RefreshDeschedulerCR(ctx context.Context, cl client.Client) error { + if c.IsDeschedulerAvailable() { + instance := &deschedulerv1.KubeDescheduler{} + + key := client.ObjectKey{Namespace: DeschedulerNamespace, Name: DeschedulerCRName} + err := cl.Get(ctx, key, instance) + if err != nil { + if apierrors.IsNotFound(err) { + misconfiguredDescheduler = false + return nil + } + return err + } + if instance.Spec.ProfileCustomizations == nil { + misconfiguredDescheduler = true + } else { + misconfiguredDescheduler = !instance.Spec.ProfileCustomizations.DevEnableEvictionsInBackground + } + return nil + } + misconfiguredDescheduler = false + return nil +} + func (c *ClusterInfoImp) validateAPIServerTLSSecurityProfile(apiServerTLSSecurityProfile *openshiftconfigv1.TLSSecurityProfile) *openshiftconfigv1.TLSSecurityProfile { if apiServerTLSSecurityProfile == nil || apiServerTLSSecurityProfile.Type != openshiftconfigv1.TLSProfileCustomType { return apiServerTLSSecurityProfile diff --git a/pkg/util/cluster_test.go b/pkg/util/cluster_test.go index fb32114be8..0223d2d2a1 100644 --- a/pkg/util/cluster_test.go +++ b/pkg/util/cluster_test.go @@ -5,6 +5,10 @@ import ( "fmt" "os" + operatorv1 "github.com/openshift/api/operator/v1" + deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" openshiftconfigv1 "github.com/openshift/api/config/v1" @@ -112,10 +116,35 @@ var _ = Describe("test clusterInfo", func() { }, }, } + + deschedulerCRD = &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRDName, + }, + } + + deschedulerNamespace = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerNamespace, + Annotations: map[string]string{ + OpenshiftNodeSelectorAnn: "", + }, + }, + } + + deschedulerCR = &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRName, + Namespace: DeschedulerNamespace, + }, + Spec: deschedulerv1.KubeDeschedulerSpec{}, + } ) testScheme := scheme.Scheme Expect(openshiftconfigv1.Install(testScheme)).To(Succeed()) + Expect(deschedulerv1.AddToScheme(testScheme)).To(Succeed()) + Expect(apiextensionsv1.AddToScheme(testScheme)).To(Succeed()) logger := zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)).WithName("clusterInfo_test") @@ -141,6 +170,172 @@ var _ = Describe("test clusterInfo", func() { Expect(GetClusterInfo().IsManagedByOLM()).To(BeTrue(), "should return true for IsManagedByOLM()") }) + It("check Init on kubernetes, with KubeDescheduler CRD without any CR for it", func() { + cl := fake.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(deschedulerCRD). + WithStatusSubresource(deschedulerCRD). + Build() + Expect(GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(GetClusterInfo().IsOpenshift()).To(BeFalse(), "should return false for IsOpenshift()") + Expect(GetClusterInfo().IsDeschedulerAvailable()).To(BeTrue(), "should return true for IsDeschedulerAvailable()") + Expect(GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "should return true for IsDeschedulerCRDDeployed(...)") + Expect(GetClusterInfo().IsDeschedulerMisconfigured()).To(BeFalse(), "should return false for IsDeschedulerMisconfigured()") + }) + + It("check Init on kubernetes, with KubeDescheduler CRD with a CR for it with default values", func() { + cl := fake.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(deschedulerCRD, deschedulerNamespace, deschedulerCR). + WithStatusSubresource(deschedulerCRD, deschedulerNamespace, deschedulerCR). + Build() + Expect(GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(GetClusterInfo().IsOpenshift()).To(BeFalse(), "should return false for IsOpenshift()") + Expect(GetClusterInfo().IsDeschedulerAvailable()).To(BeTrue(), "should return true for IsDeschedulerAvailable()") + Expect(GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "should return true for IsDeschedulerCRDDeployed(...)") + Expect(GetClusterInfo().IsDeschedulerMisconfigured()).To(BeTrue(), "should return true for IsDeschedulerMisconfigured()") + }) + + It("check Init on openshift, with KubeDescheduler CRD without any CR for it", func() { + cl := fake.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(clusterVersion, infrastructure, ingress, apiServer, dns, ipv4network, deschedulerCRD). + WithStatusSubresource(clusterVersion, infrastructure, ingress, apiServer, dns, ipv4network, deschedulerCRD). + Build() + Expect(GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(GetClusterInfo().IsOpenshift()).To(BeTrue(), "should return true for IsOpenshift()") + Expect(GetClusterInfo().IsDeschedulerAvailable()).To(BeTrue(), "should return true for IsDeschedulerAvailable()") + Expect(GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "should return true for IsDeschedulerCRDDeployed(...)") + Expect(GetClusterInfo().IsDeschedulerMisconfigured()).To(BeFalse(), "should return false for IsDeschedulerMisconfigured()") + }) + + DescribeTable( + "check Init on openshift, with KubeDescheduler CRD with a CR for it ...", + func(deschedulerCR *deschedulerv1.KubeDescheduler, expectedIsDeschedulerMisconfigured bool) { + cl := fake.NewClientBuilder(). + WithScheme(testScheme). + WithObjects(clusterVersion, infrastructure, ingress, apiServer, dns, ipv4network, deschedulerCRD, deschedulerNamespace, deschedulerCR). + WithStatusSubresource(clusterVersion, infrastructure, ingress, apiServer, dns, ipv4network, deschedulerCRD, deschedulerNamespace, deschedulerCR). + Build() + Expect(GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(GetClusterInfo().IsOpenshift()).To(BeTrue(), "should return true for IsOpenshift()") + Expect(GetClusterInfo().IsDeschedulerAvailable()).To(BeTrue(), "should return true for IsDeschedulerAvailable()") + Expect(GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "should return true for IsDeschedulerCRDDeployed(...)") + Expect(GetClusterInfo().IsDeschedulerMisconfigured()).To(Equal(expectedIsDeschedulerMisconfigured)) + }, + Entry( + "with default configuration", + &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRName, + Namespace: DeschedulerNamespace, + }, + Spec: deschedulerv1.KubeDeschedulerSpec{}, + }, + true, + ), + Entry( + "with configuration tuned for KubeVirt", + &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRName, + Namespace: DeschedulerNamespace, + }, + Spec: deschedulerv1.KubeDeschedulerSpec{ + ProfileCustomizations: &deschedulerv1.ProfileCustomizations{ + DevEnableEvictionsInBackground: true, + }, + }, + }, + false, + ), + Entry( + "with wrong configuration 1", + &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRName, + Namespace: DeschedulerNamespace, + }, + Spec: deschedulerv1.KubeDeschedulerSpec{ + ProfileCustomizations: &deschedulerv1.ProfileCustomizations{ + DevEnableEvictionsInBackground: false, + }, + }, + }, + true, + ), + Entry( + "with wrong configuration 2", + &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRName, + Namespace: DeschedulerNamespace, + }, + Spec: deschedulerv1.KubeDeschedulerSpec{ + ProfileCustomizations: &deschedulerv1.ProfileCustomizations{ + ThresholdPriorityClassName: "testvalue", + Namespaces: deschedulerv1.Namespaces{}, + DevEnableEvictionsInBackground: false, + }, + }, + }, + true, + ), + Entry( + "with wrong configuration 3", + &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRName, + Namespace: DeschedulerNamespace, + }, + Spec: deschedulerv1.KubeDeschedulerSpec{ + OperatorSpec: operatorv1.OperatorSpec{ + ManagementState: "testvalue", + LogLevel: "testvalue", + OperatorLogLevel: "testvalue", + }, + Profiles: []deschedulerv1.DeschedulerProfile{"test1", "test2"}, + Mode: "testvalue", + }, + }, + true, + ), + Entry( + "with configuration tuned for KubeVirt but with a wrong name", + &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: DeschedulerNamespace, + }, + Spec: deschedulerv1.KubeDeschedulerSpec{ + ProfileCustomizations: &deschedulerv1.ProfileCustomizations{ + DevEnableEvictionsInBackground: true, + }, + }, + }, + false, + ), + Entry( + "with configuration tuned for KubeVirt but in the wrong namespace", + &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRName, + Namespace: "test", + }, + Spec: deschedulerv1.KubeDeschedulerSpec{ + ProfileCustomizations: &deschedulerv1.ProfileCustomizations{ + DevEnableEvictionsInBackground: true, + }, + }, + }, + false, + ), + ) + It("check Init on openshift, with OLM", func() { os.Setenv(OperatorConditionNameEnvVar, "aValue") cl := fake.NewClientBuilder(). @@ -540,6 +735,40 @@ var _ = Describe("test clusterInfo", func() { }) + It("should detect that KubeDescheduler CRD got deployed if initially unavailable", func() { + os.Setenv(OperatorConditionNameEnvVar, "aValue") + + testAPIServer := &openshiftconfigv1.APIServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster", + }, + Spec: openshiftconfigv1.APIServerSpec{}, + } + + cl := fake.NewClientBuilder(). + WithScheme(testScheme). + WithRuntimeObjects(clusterVersion, infrastructure, ingress, testAPIServer, dns, ipv4network). + Build() + Expect(GetClusterInfo().Init(context.TODO(), cl, logger)).To(Succeed()) + + Expect(GetClusterInfo().IsOpenshift()).To(BeTrue(), "should return true for IsOpenshift()") + Expect(GetClusterInfo().IsManagedByOLM()).To(BeTrue(), "should return true for IsManagedByOLM()") + Expect(GetClusterInfo().IsDeschedulerAvailable()).To(BeFalse(), "should initially return false for IsDeschedulerAvailable()") + Expect(GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeFalse(), "should initially return false for IsDeschedulerCRDDeployed(...)") + Expect(GetClusterInfo().IsDeschedulerMisconfigured()).To(BeFalse(), "should initially return false for IsDeschedulerMisconfigured()") + + deschedulerCRD = &apiextensionsv1.CustomResourceDefinition{ + ObjectMeta: metav1.ObjectMeta{ + Name: DeschedulerCRDName, + }, + } + + Expect(cl.Create(context.TODO(), deschedulerCRD)).To(Succeed()) + Expect(GetClusterInfo().IsDeschedulerAvailable()).To(BeFalse(), "should still return false for IsDeschedulerAvailable() (until the operator will restart)") + Expect(GetClusterInfo().IsDeschedulerCRDDeployed(context.TODO(), cl)).To(BeTrue(), "should now return true for IsDeschedulerCRDDeployed(...)") + + }) + }) }) diff --git a/pkg/util/consts.go b/pkg/util/consts.go index 47399c38ed..fe5e6a911c 100644 --- a/pkg/util/consts.go +++ b/pkg/util/consts.go @@ -21,6 +21,7 @@ const ( HcoMutatingWebhookNS = "mutate-ns-hco.kubevirt.io" PrometheusRuleCRDName = "prometheusrules.monitoring.coreos.com" ServiceMonitorCRDName = "servicemonitors.monitoring.coreos.com" + DeschedulerCRDName = "kubedeschedulers.operator.openshift.io" HcoMutatingWebhookHyperConverged = "mutate-hyperconverged-hco.kubevirt.io" AppLabel = "app" UndefinedNamespace = "" @@ -69,7 +70,9 @@ const ( UIPluginServerPort int32 = 9443 UIProxyServerPort int32 = 8080 - APIServerCRName = "cluster" + APIServerCRName = "cluster" + DeschedulerCRName = "cluster" + DeschedulerNamespace = "openshift-kube-descheduler-operator" DataImportCronEnabledAnnotation = "dataimportcrontemplate.kubevirt.io/enable" ) diff --git a/tests/func-tests/client.go b/tests/func-tests/client.go index ad1e9fa5f0..d0d8a72f48 100644 --- a/tests/func-tests/client.go +++ b/tests/func-tests/client.go @@ -7,9 +7,11 @@ import ( "github.com/onsi/ginkgo/v2" consolev1 "github.com/openshift/api/console/v1" imagev1 "github.com/openshift/api/image/v1" + deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -83,6 +85,8 @@ func setScheme(cli client.Client) { sspv1beta2.AddToScheme, imagev1.AddToScheme, monitoringv1.AddToScheme, + deschedulerv1.AddToScheme, + apiextensionsv1.AddToScheme, } for _, f := range funcs { diff --git a/tests/func-tests/monitoring_test.go b/tests/func-tests/monitoring_test.go index 040e76ecfb..246846026c 100644 --- a/tests/func-tests/monitoring_test.go +++ b/tests/func-tests/monitoring_test.go @@ -13,12 +13,15 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" openshiftroutev1 "github.com/openshift/api/route/v1" + deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" promApi "github.com/prometheus/client_golang/api" promApiv1 "github.com/prometheus/client_golang/api/prometheus/v1" promConfig "github.com/prometheus/common/config" promModel "github.com/prometheus/common/model" authenticationv1 "k8s.io/api/authentication/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" @@ -28,6 +31,8 @@ import ( kubevirtcorev1 "kubevirt.io/api/core/v1" + hcoalerts "github.com/kubevirt/hyperconverged-cluster-operator/pkg/monitoring/rules/alerts" + hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util" tests "github.com/kubevirt/hyperconverged-cluster-operator/tests/func-tests" ) @@ -186,6 +191,187 @@ var _ = Describe("[crit:high][vendor:cnv-qe@redhat.com][level:system]Monitoring" }).WithTimeout(60 * time.Second).WithPolling(time.Second).WithContext(ctx).ShouldNot(BeNil()) verifyOperatorHealthMetricValue(ctx, promClient, hcoClient, initialOperatorHealthMetricValue, warningImpact) }) + + Describe("KubeDescheduler", Serial, Ordered, Label(tests.OpenshiftLabel, "monitoring"), func() { + + var ( + initialDescheduler = &deschedulerv1.KubeDescheduler{} + ) + + BeforeAll(func(ctx context.Context) { + crd := &apiextensionsv1.CustomResourceDefinition{} + crdKey := client.ObjectKey{Name: hcoutil.DeschedulerCRDName} + key := client.ObjectKey{Namespace: hcoutil.DeschedulerNamespace, Name: hcoutil.DeschedulerCRName} + + Eventually(func(g Gomega, ctx context.Context) { + err := cli.Get(ctx, crdKey, crd) + if apierrors.IsNotFound(err) { + Skip("Skip test when KubeDescheduler CRD is not present") + } + g.Expect(err).NotTo(HaveOccurred()) + err = cli.Get(ctx, key, initialDescheduler) + if apierrors.IsNotFound(err) { + Skip("Skip test when KubeDescheduler CR is not present") + } + g.Expect(err).NotTo(HaveOccurred()) + }).WithTimeout(10 * time.Second).WithPolling(500 * time.Millisecond).WithContext(ctx).Should(Succeed()) + }) + + AfterAll(func(ctx context.Context) { + key := client.ObjectKey{Namespace: hcoutil.DeschedulerNamespace, Name: hcoutil.DeschedulerCRName} + + Eventually(func(g Gomega, ctx context.Context) { + descheduler := &deschedulerv1.KubeDescheduler{} + err := cli.Get(ctx, key, descheduler) + g.Expect(err).NotTo(HaveOccurred()) + initialDescheduler.Spec.DeepCopyInto(&descheduler.Spec) + err = cli.Update(ctx, descheduler) + g.Expect(err).NotTo(HaveOccurred()) + }).WithTimeout(10 * time.Second).WithPolling(500 * time.Millisecond).WithContext(ctx).Should(Succeed()) + }) + + It("KubeVirtCRModified alert should fired when KubeDescheduler is installed and not properly configured for KubeVirt", Serial, func(ctx context.Context) { + + const ( + query = `kubevirt_hco_misconfigured_descheduler` + jsonPatchMisconfigure = `[{"op": "replace", "path": "/spec", "value": {"managementState": "Managed"}}]` + jsonPatchConfigure = `[{"op": "replace", "path": "/spec", "value": {"managementState": "Managed", "profileCustomizations": {"devEnableEvictionsInBackground": true }}}]` + ) + + By(fmt.Sprintf("Reading the `%s` metric from HCO prometheus endpoint", query)) + var valueBefore float64 + Eventually(func(g Gomega, ctx context.Context) { + var err error + valueBefore, err = hcoClient.GetHCOMetric(ctx, query) + g.Expect(err).NotTo(HaveOccurred()) + }).WithTimeout(10 * time.Second).WithPolling(500 * time.Millisecond).WithContext(ctx).Should(Succeed()) + GinkgoWriter.Printf("The metric value before the test is: %0.2f\n", valueBefore) + + patchMisconfigure := client.RawPatch(types.JSONPatchType, []byte(jsonPatchMisconfigure)) + patchConfigure := client.RawPatch(types.JSONPatchType, []byte(jsonPatchConfigure)) + + descheduler := &deschedulerv1.KubeDescheduler{ + ObjectMeta: metav1.ObjectMeta{ + Name: hcoutil.DeschedulerCRName, + Namespace: hcoutil.DeschedulerNamespace, + }, + } + + By("Misconfiguring the descheduler") + Expect(cli.Patch(ctx, descheduler, patchMisconfigure)).To(Succeed()) + By("checking that the metric reports it as misconfigured (1.0)") + Eventually(func(g Gomega, ctx context.Context) float64 { + valueAfter, err := hcoClient.GetHCOMetric(ctx, query) + g.Expect(err).NotTo(HaveOccurred()) + return valueAfter + }). + WithTimeout(60*time.Second). + WithPolling(time.Second). + WithContext(ctx). + Should( + Equal(float64(1)), + "expected descheduler to be misconfigured; expected value: %0.2f", float64(1), + ) + + By("checking that the prometheus metric reports it as misconfigured (0.0)") + Eventually(func(ctx context.Context) float64 { + return getMetricValue(ctx, promClient, query) + }). + WithTimeout(60*time.Second). + WithPolling(time.Second). + WithContext(ctx). + Should( + Equal(float64(1)), + "expected descheduler to be misconfigured; expected value: %0.2f", float64(1), + ) + + By("Checking the alert") + Eventually(func(ctx context.Context) *promApiv1.Alert { + alerts, err := promClient.Alerts(ctx) + Expect(err).ToNot(HaveOccurred()) + alert := getAlertByName(alerts, hcoalerts.MisconfiguredDeschedulerAlert) + return alert + }).WithTimeout(60 * time.Second).WithPolling(time.Second).WithContext(ctx).ShouldNot(BeNil()) + + verifyOperatorHealthMetricValue(ctx, promClient, hcoClient, initialOperatorHealthMetricValue, criticalImpact) + + By("Correctly configuring the descheduler for KubeVirt") + Expect(cli.Patch(ctx, descheduler, patchConfigure)).To(Succeed()) + By("checking that the metric doesn't report it as misconfigured (0.0)") + Eventually(func(g Gomega, ctx context.Context) float64 { + valueAfter, err := hcoClient.GetHCOMetric(ctx, query) + g.Expect(err).NotTo(HaveOccurred()) + return valueAfter + }). + WithTimeout(60*time.Second). + WithPolling(time.Second). + WithContext(ctx). + Should( + Equal(float64(0)), + "expected descheduler to NOT be misconfigured; expected value: %0.2f", float64(0), + ) + + By("checking that the prometheus metric doesn't report it as misconfigured (0.0)") + Eventually(func(ctx context.Context) float64 { + return getMetricValue(ctx, promClient, query) + }). + WithTimeout(60*time.Second). + WithPolling(time.Second). + WithContext(ctx). + Should( + Equal(float64(0)), + "expected descheduler to NOT be misconfigured; expected value: %0.2f", float64(0), + ) + + By("Checking the alert is not firing") + Eventually(func(ctx context.Context) *promApiv1.Alert { + alerts, err := promClient.Alerts(ctx) + Expect(err).ToNot(HaveOccurred()) + alert := getAlertByName(alerts, hcoalerts.MisconfiguredDeschedulerAlert) + return alert + }).WithTimeout(60 * time.Second).WithPolling(time.Second).WithContext(ctx).Should(BeNil()) + + By("Misconfiguring a second time the descheduler") + Expect(cli.Patch(ctx, descheduler, patchMisconfigure)).To(Succeed()) + By("checking that the metric reports it as misconfigured (1.0)") + Eventually(func(g Gomega, ctx context.Context) float64 { + valueAfter, err := hcoClient.GetHCOMetric(ctx, query) + g.Expect(err).NotTo(HaveOccurred()) + return valueAfter + }). + WithTimeout(60*time.Second). + WithPolling(time.Second). + WithContext(ctx). + Should( + Equal(float64(1)), + "expected descheduler to be misconfigured; expected value: %0.2f", float64(1), + ) + + By("checking that the prometheus metric reports it as misconfigured (0.0)") + Eventually(func(ctx context.Context) float64 { + return getMetricValue(ctx, promClient, query) + }). + WithTimeout(60*time.Second). + WithPolling(time.Second). + WithContext(ctx). + Should( + Equal(float64(1)), + "expected descheduler to be misconfigured; expected value: %0.2f", float64(1), + ) + + By("Checking the alert") + Eventually(func(ctx context.Context) *promApiv1.Alert { + alerts, err := promClient.Alerts(ctx) + Expect(err).ToNot(HaveOccurred()) + alert := getAlertByName(alerts, hcoalerts.MisconfiguredDeschedulerAlert) + return alert + }).WithTimeout(60 * time.Second).WithPolling(time.Second).WithContext(ctx).ShouldNot(BeNil()) + + verifyOperatorHealthMetricValue(ctx, promClient, hcoClient, initialOperatorHealthMetricValue, criticalImpact) + + }) + }) + }) func getAlertByName(alerts promApiv1.AlertsResult, alertName string) *promApiv1.Alert { diff --git a/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/doc.go b/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/doc.go new file mode 100644 index 0000000000..46f1a2906c --- /dev/null +++ b/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/doc.go @@ -0,0 +1,7 @@ +// Package v1 contains API Schema definitions for the descheduler v1 API group +// +k8s:defaulter-gen=TypeMeta +// +k8s:deepcopy-gen=package,register +// +groupName=operator.openshift.io +// +groupGoName=Kubedeschedulers +// +kubebuilder:validation:Optional +package v1 diff --git a/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/register.go b/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/register.go new file mode 100644 index 0000000000..39a605410c --- /dev/null +++ b/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/register.go @@ -0,0 +1,40 @@ +// NOTE: Boilerplate only. Ignore this file. + +// Package v1 contains API Schema definitions for the descheduler v1 API group +// +k8s:deepcopy-gen=package,register +// +groupName=operator.openshift.io +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +var ( + // SchemeGroupVersion is group version used to register these objects + SchemeGroupVersion = schema.GroupVersion{Group: "operator.openshift.io", Version: "v1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes) + AddToScheme = SchemeBuilder.AddToScheme +) + +func init() { + SchemeBuilder.Register(addKnownTypes) +} + +// Resource takes an unqualified resource and returns a Group qualified GroupResource +func Resource(resource string) schema.GroupResource { + return SchemeGroupVersion.WithResource(resource).GroupResource() +} + +func addKnownTypes(scheme *runtime.Scheme) error { + scheme.AddKnownTypes(SchemeGroupVersion, + &KubeDescheduler{}, + &KubeDeschedulerList{}, + ) + + metav1.AddToGroupVersion(scheme, SchemeGroupVersion) + return nil +} diff --git a/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/types_descheduler.go b/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/types_descheduler.go new file mode 100644 index 0000000000..193a64cd63 --- /dev/null +++ b/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/types_descheduler.go @@ -0,0 +1,189 @@ +package v1 + +import ( + operatorv1 "github.com/openshift/api/operator/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// KubeDescheduler is the Schema for the deschedulers API +// +k8s:openapi-gen=true +// +genclient +// +kubebuilder:storageversion +// +kubebuilder:subresource:status +type KubeDescheduler struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + // spec holds user settable values for configuration + // +required + Spec KubeDeschedulerSpec `json:"spec"` + // status holds observed values from the cluster. They may not be overridden. + // +optional + Status KubeDeschedulerStatus `json:"status"` +} + +// KubeDeschedulerSpec defines the desired state of KubeDescheduler +type KubeDeschedulerSpec struct { + operatorv1.OperatorSpec `json:",inline"` + + // Profiles sets which descheduler strategy profiles are enabled + Profiles []DeschedulerProfile `json:"profiles"` + + // DeschedulingIntervalSeconds is the number of seconds between descheduler runs + // +optional + DeschedulingIntervalSeconds *int32 `json:"deschedulingIntervalSeconds,omitempty"` + + // evictionLimits restrict the number of evictions during each descheduling run + EvictionLimits *EvictionLimits `json:"evictionLimits,omitempty"` + + // ProfileCustomizations contains various parameters for modifying the default behavior of certain profiles + ProfileCustomizations *ProfileCustomizations `json:"profileCustomizations,omitempty"` + + // Mode configures the descheduler to either evict pods (Automatic) or to simulate the eviction (Predictive) + // +optional + // +kubebuilder:default=Predictive + Mode Mode `json:"mode"` +} + +type EvictionLimits struct { + // total restricts the maximum number of overall evictions + Total *int32 `json:"total,omitempty"` +} + +// ProfileCustomizations contains various parameters for modifying the default behavior of certain profiles +type ProfileCustomizations struct { + // PodLifetime is the length of time after which pods should be evicted + // This field should be used with profiles that enable the PodLifetime strategy, such as LifecycleAndUtilization + // +kubebuilder:validation:Format=duration + PodLifetime *metav1.Duration `json:"podLifetime,omitempty"` + + // ThresholdPriority when set will reject eviction of any pod with priority equal or higher + // It is invalid to set it alongside ThresholdPriorityClassName + ThresholdPriority *int32 `json:"thresholdPriority,omitempty"` + + // ThresholdPriorityClassName when set will reject eviction of any pod with priority equal or higher + // It is invalid to set it alongside ThresholdPriority + ThresholdPriorityClassName string `json:"thresholdPriorityClassName,omitempty"` + + // Namespaces overrides included and excluded namespaces while keeping + // the default exclusion of all openshift-*, kube-system and hypershift namespaces + Namespaces Namespaces `json:"namespaces"` + + // LowNodeUtilizationThresholds enumerates predefined experimental thresholds + // +kubebuilder:validation:Enum=Low;Medium;High;"" + DevLowNodeUtilizationThresholds *LowNodeUtilizationThresholdsType `json:"devLowNodeUtilizationThresholds"` + + // DevEnableEvictionsInBackground enables descheduler's EvictionsInBackground alpha feature. + // The EvictionsInBackground alpha feature is a subject to change. + // Currently provided as an experimental feature. + DevEnableEvictionsInBackground bool `json:"devEnableEvictionsInBackground,omitempty"` + + // devHighNodeUtilizationThresholds enumerates thresholds for node utilization levels. + // The threshold values are subject to change. + // Currently provided as an experimental feature. + // +kubebuilder:validation:Enum=Minimal;Modest;Moderate;"" + DevHighNodeUtilizationThresholds *HighNodeUtilizationThresholdsType `json:"devHighNodeUtilizationThresholds"` +} + +type LowNodeUtilizationThresholdsType string + +var ( + // LowThreshold sets thresholds:targetThresholds in 10%/30% ratio + LowThreshold LowNodeUtilizationThresholdsType = "Low" + + // MediumThreshold sets thresholds:targetThresholds in 20%/50% ratio + MediumThreshold LowNodeUtilizationThresholdsType = "Medium" + + // HighThreshold sets thresholds:targetThresholds in 40%/70% ratio + HighThreshold LowNodeUtilizationThresholdsType = "High" +) + +type HighNodeUtilizationThresholdsType string + +var ( + // CompactLowThreshold sets thresholds to 10% ratio. + // The threshold value is subject to change. + CompactMinimalThreshold HighNodeUtilizationThresholdsType = "Minimal" + + // CompactMediumThreshold sets thresholds to 20% ratio. + // The threshold value is subject to change. + CompactModestThreshold HighNodeUtilizationThresholdsType = "Modest" + + // CompactHighThreshold sets thresholds to 30% ratio. + // The threshold value is subject to change. + CompactModerateThreshold HighNodeUtilizationThresholdsType = "Moderate" +) + +// Namespaces overrides included and excluded namespaces while keeping +// the default exclusion of all openshift-*, kube-system and hypershift namespaces +type Namespaces struct { + Included []string `json:"included"` + Excluded []string `json:"excluded"` +} + +// DeschedulerProfile allows configuring the enabled strategy profiles for the descheduler +// it allows multiple profiles to be enabled at once, which will have cumulative effects on the cluster. +// +kubebuilder:validation:Enum=AffinityAndTaints;TopologyAndDuplicates;LifecycleAndUtilization;DevPreviewLongLifecycle;LongLifecycle;SoftTopologyAndDuplicates;EvictPodsWithLocalStorage;EvictPodsWithPVC;CompactAndScale +type DeschedulerProfile string + +var ( + // AffinityAndTaints enables descheduling strategies that balance pods based on affinity and + // node taint violations. + AffinityAndTaints DeschedulerProfile = "AffinityAndTaints" + + // TopologyAndDuplicates attempts to spread pods evenly among nodes based on topology spread + // constraints and duplicate replicas on the same node. + TopologyAndDuplicates DeschedulerProfile = "TopologyAndDuplicates" + + // SoftTopologyAndDuplicates attempts to spread pods evenly similar to TopologyAndDuplicates, but includes + // soft ("ScheduleAnyway") topology spread constraints + SoftTopologyAndDuplicates DeschedulerProfile = "SoftTopologyAndDuplicates" + + // LifecycleAndUtilization attempts to balance pods based on node resource usage, pod age, and pod restarts + LifecycleAndUtilization DeschedulerProfile = "LifecycleAndUtilization" + + // EvictPodsWithLocalStorage enables pods with local storage to be evicted by the descheduler by all other profiles + EvictPodsWithLocalStorage DeschedulerProfile = "EvictPodsWithLocalStorage" + + // EvictPodsWithPVC prevents pods with PVCs from being evicted by all other profiles + EvictPodsWithPVC DeschedulerProfile = "EvictPodsWithPVC" + + // DevPreviewLongLifecycle handles cluster lifecycle over a long term + // Deprecated: use LongLifecycle instead + DevPreviewLongLifecycle DeschedulerProfile = "DevPreviewLongLifecycle" + + // LongLifecycle handles cluster lifecycle over a long term + LongLifecycle DeschedulerProfile = "LongLifecycle" + + // CompactAndScale seeks to evict pods to enable the same workload to run on a smaller set of nodes. + CompactAndScale DeschedulerProfile = "CompactAndScale" +) + +// DeschedulerProfile allows configuring the enabled strategy profiles for the descheduler +// it allows multiple profiles to be enabled at once, which will have cumulative effects on the cluster. +// +kubebuilder:validation:Enum=Automatic;Predictive +type Mode string + +var ( + // Automatic mode evicts pods from the cluster + Automatic Mode = "Automatic" + + // Predictive mode simulates eviction of pods + Predictive Mode = "Predictive" +) + +// KubeDeschedulerStatus defines the observed state of KubeDescheduler +type KubeDeschedulerStatus struct { + operatorv1.OperatorStatus `json:",inline"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// KubeDeschedulerList contains a list of KubeDescheduler +type KubeDeschedulerList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []KubeDescheduler `json:"items"` +} diff --git a/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/zz_generated.deepcopy.go b/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/zz_generated.deepcopy.go new file mode 100644 index 0000000000..54c304f3f2 --- /dev/null +++ b/vendor/github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1/zz_generated.deepcopy.go @@ -0,0 +1,147 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KubeDescheduler) DeepCopyInto(out *KubeDescheduler) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeDescheduler. +func (in *KubeDescheduler) DeepCopy() *KubeDescheduler { + if in == nil { + return nil + } + out := new(KubeDescheduler) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *KubeDescheduler) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KubeDeschedulerList) DeepCopyInto(out *KubeDeschedulerList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]KubeDescheduler, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeDeschedulerList. +func (in *KubeDeschedulerList) DeepCopy() *KubeDeschedulerList { + if in == nil { + return nil + } + out := new(KubeDeschedulerList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *KubeDeschedulerList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KubeDeschedulerSpec) DeepCopyInto(out *KubeDeschedulerSpec) { + *out = *in + in.OperatorSpec.DeepCopyInto(&out.OperatorSpec) + if in.Profiles != nil { + in, out := &in.Profiles, &out.Profiles + *out = make([]DeschedulerProfile, len(*in)) + copy(*out, *in) + } + if in.DeschedulingIntervalSeconds != nil { + in, out := &in.DeschedulingIntervalSeconds, &out.DeschedulingIntervalSeconds + *out = new(int32) + **out = **in + } + if in.ProfileCustomizations != nil { + in, out := &in.ProfileCustomizations, &out.ProfileCustomizations + *out = new(ProfileCustomizations) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeDeschedulerSpec. +func (in *KubeDeschedulerSpec) DeepCopy() *KubeDeschedulerSpec { + if in == nil { + return nil + } + out := new(KubeDeschedulerSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *KubeDeschedulerStatus) DeepCopyInto(out *KubeDeschedulerStatus) { + *out = *in + in.OperatorStatus.DeepCopyInto(&out.OperatorStatus) + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new KubeDeschedulerStatus. +func (in *KubeDeschedulerStatus) DeepCopy() *KubeDeschedulerStatus { + if in == nil { + return nil + } + out := new(KubeDeschedulerStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ProfileCustomizations) DeepCopyInto(out *ProfileCustomizations) { + *out = *in + if in.PodLifetime != nil { + in, out := &in.PodLifetime, &out.PodLifetime + *out = new(metav1.Duration) + **out = **in + } + if in.ThresholdPriority != nil { + in, out := &in.ThresholdPriority, &out.ThresholdPriority + *out = new(int32) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProfileCustomizations. +func (in *ProfileCustomizations) DeepCopy() *ProfileCustomizations { + if in == nil { + return nil + } + out := new(ProfileCustomizations) + in.DeepCopyInto(out) + return out +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 8598480e08..82ef4b2a65 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -208,6 +208,9 @@ github.com/openshift/api/image/v1 github.com/openshift/api/operator/v1 github.com/openshift/api/quota/v1 github.com/openshift/api/route/v1 +# github.com/openshift/cluster-kube-descheduler-operator v0.0.0-20240916113608-1a30f3be33fa +## explicit; go 1.22.5 +github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1 # github.com/openshift/custom-resource-status v1.1.2 ## explicit; go 1.12 github.com/openshift/custom-resource-status/conditions/v1