Skip to content

Commit

Permalink
raise an alert when the descheduler is not correctly configured (#3118)
Browse files Browse the repository at this point in the history
Descheduler is an optional operator and it's
not installed by default nor as a dependency of HCO.
When installed it can work on a cluster with
KubeVirt only if configured enabling
devEnableEvictionsInBackground profileCustomization
that is disabled by default.
HCO will check if the descheduler is there,
and if so it will check its configuration.
If the descheduler is misconfigured for
the KubeVirt use case, HCO will trigger
an alert making the cluster admin aware.
HCO is not directly amending the descheduler
configuration since it's an external independent
operator and directly controlling it is not a safe
practice (it could bring to infinite loops fighting
with other operators and so on).

Signed-off-by: Simone Tiraboschi <stirabos@redhat.com>
Co-authored-by: Simone Tiraboschi <stirabos@redhat.com>
  • Loading branch information
kubevirt-bot and tiraboschi authored Sep 27, 2024
1 parent ff0feb5 commit cb37c49
Show file tree
Hide file tree
Showing 33 changed files with 2,112 additions and 12 deletions.
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -218,14 +218,17 @@ charts:
local:
./hack/make_local.sh

deploy_cr: deploy_hco_cr deploy_hpp
deploy_cr: deploy_hco_cr deploy_hpp deploy_fake_kubedescheduler

deploy_hco_cr:
./hack/deploy_only_cr.sh

deploy_hpp:
./hack/hpp/deploy_hpp.sh

deploy_fake_kubedescheduler:
./hack/kubeDescheduler/deploy_fake_kubedescheduler.sh

validate-no-offensive-lang:
./hack/validate-no-offensive-lang.sh

Expand Down Expand Up @@ -280,6 +283,7 @@ bump-hco:
kubevirt-nightly-test \
local \
deploy_cr \
deploy_fake_kubedescheduler \
build-docgen \
generate \
generate-doc \
Expand Down
54 changes: 47 additions & 7 deletions cmd/hyperconverged-cluster-operator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
imagev1 "github.com/openshift/api/image/v1"
operatorv1 "github.com/openshift/api/operator/v1"
openshiftroutev1 "github.com/openshift/api/route/v1"
deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1"
csvv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
operatorsapiv2 "github.com/operator-framework/api/pkg/operators/v2"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
Expand Down Expand Up @@ -50,6 +51,8 @@ import (
"github.com/kubevirt/hyperconverged-cluster-operator/api"
hcov1beta1 "github.com/kubevirt/hyperconverged-cluster-operator/api/v1beta1"
"github.com/kubevirt/hyperconverged-cluster-operator/cmd/cmdcommon"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/crd"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/descheduler"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/hyperconverged"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/observability"
"github.com/kubevirt/hyperconverged-cluster-operator/controllers/operands"
Expand Down Expand Up @@ -86,6 +89,7 @@ var (
operatorsapiv2.AddToScheme,
imagev1.Install,
aaqv1alpha1.AddToScheme,
deschedulerv1.AddToScheme,
}
)

Expand Down Expand Up @@ -121,7 +125,7 @@ func main() {
needLeaderElection := !ci.IsRunningLocally()

// Create a new Cmd to provide shared dependencies and start components
mgr, err := manager.New(cfg, getManagerOptions(operatorNamespace, needLeaderElection, ci.IsMonitoringAvailable(), ci.IsOpenshift(), scheme))
mgr, err := manager.New(cfg, getManagerOptions(operatorNamespace, needLeaderElection, ci, scheme))
cmdHelper.ExitOnError(err, "can't initiate manager")

// register pprof instrumentation if HCO_PPROF_ADDR is set
Expand Down Expand Up @@ -165,20 +169,40 @@ func main() {
upgradeableCondition, err = hcoutil.NewOperatorCondition(ci, mgr.GetClient(), operatorsapiv2.Upgradeable)
cmdHelper.ExitOnError(err, "Cannot create Upgradeable Operator Condition")

// a channel to trigger a restart of the operator
// via a clean cancel of the manager
restartCh := make(chan struct{})

// Create a new reconciler
if err := hyperconverged.RegisterReconciler(mgr, ci, upgradeableCondition); err != nil {
logger.Error(err, "failed to register the HyperConverged controller")
eventEmitter.EmitEvent(nil, corev1.EventTypeWarning, "InitError", "Unable to register HyperConverged controller; "+err.Error())
os.Exit(1)
}

// Create a new CRD reconciler
if err := crd.RegisterReconciler(mgr, restartCh); err != nil {
logger.Error(err, "failed to register the CRD controller")
eventEmitter.EmitEvent(nil, corev1.EventTypeWarning, "InitError", "Unable to register CRD controller; "+err.Error())
os.Exit(1)
}

if ci.IsOpenshift() {
if err = observability.SetupWithManager(mgr); err != nil {
logger.Error(err, "unable to create controller", "controller", "Observability")
os.Exit(1)
}
}

if ci.IsDeschedulerAvailable() {
// Create a new reconciler for KubeDescheduler
if err := descheduler.RegisterReconciler(mgr); err != nil {
logger.Error(err, "failed to register the KubeDescheduler controller")
eventEmitter.EmitEvent(nil, corev1.EventTypeWarning, "InitError", "Unable to register KubeDescheduler controller; "+err.Error())
os.Exit(1)
}
}

err = createPriorityClass(ctx, mgr)
cmdHelper.ExitOnError(err, "Failed creating PriorityClass")

Expand All @@ -193,8 +217,17 @@ func main() {
logger.Info("Starting the Cmd.")
eventEmitter.EmitEvent(nil, corev1.EventTypeNormal, "Init", "Starting the HyperConverged Pod")

// create context with cancel for the manager
mgrCtx, mgrCancel := context.WithCancel(signals.SetupSignalHandler())

defer mgrCancel()
go func() {
<-restartCh
mgrCancel()
}()

// Start the Cmd
if err := mgr.Start(signals.SetupSignalHandler()); err != nil {
if err := mgr.Start(mgrCtx); err != nil {
logger.Error(err, "Manager exited non-zero")
eventEmitter.EmitEvent(nil, corev1.EventTypeWarning, "UnexpectedError", "HyperConverged crashed; "+err.Error())
os.Exit(1)
Expand All @@ -203,7 +236,7 @@ func main() {

// Restricts the cache's ListWatch to specific fields/labels per GVK at the specified object to control the memory impact
// this is used to completely overwrite the NewCache function so all the interesting objects should be explicitly listed here
func getCacheOption(operatorNamespace string, isMonitoringAvailable, isOpenshift bool) cache.Options {
func getCacheOption(operatorNamespace string, ci hcoutil.ClusterInfo) cache.Options {
namespaceSelector := fields.Set{"metadata.namespace": operatorNamespace}.AsSelector()
labelSelector := labels.Set{hcoutil.AppLabel: hcoutil.HyperConvergedName}.AsSelector()
labelSelectorForNamespace := labels.Set{hcoutil.KubernetesMetadataName: operatorNamespace}.AsSelector()
Expand Down Expand Up @@ -257,6 +290,10 @@ func getCacheOption(operatorNamespace string, isMonitoringAvailable, isOpenshift
},
}

cacheOptionsByObjectForDescheduler := map[client.Object]cache.ByObject{
&deschedulerv1.KubeDescheduler{}: {},
}

cacheOptionsByObjectForOpenshift := map[client.Object]cache.ByObject{
&openshiftroutev1.Route{}: {
Namespaces: map[string]cache.Config{
Expand All @@ -279,18 +316,21 @@ func getCacheOption(operatorNamespace string, isMonitoringAvailable, isOpenshift
},
}

if isMonitoringAvailable {
if ci.IsMonitoringAvailable() {
maps.Copy(cacheOptions.ByObject, cacheOptionsByObjectForMonitoring)
}
if isOpenshift {
if ci.IsDeschedulerAvailable() {
maps.Copy(cacheOptions.ByObject, cacheOptionsByObjectForDescheduler)
}
if ci.IsOpenshift() {
maps.Copy(cacheOptions.ByObject, cacheOptionsByObjectForOpenshift)
}

return cacheOptions

}

func getManagerOptions(operatorNamespace string, needLeaderElection, isMonitoringAvailable, isOpenshift bool, scheme *apiruntime.Scheme) manager.Options {
func getManagerOptions(operatorNamespace string, needLeaderElection bool, ci hcoutil.ClusterInfo, scheme *apiruntime.Scheme) manager.Options {
return manager.Options{
Metrics: server.Options{
BindAddress: fmt.Sprintf("%s:%d", hcoutil.MetricsHost, hcoutil.MetricsPort),
Expand All @@ -305,7 +345,7 @@ func getManagerOptions(operatorNamespace string, needLeaderElection, isMonitorin
// "configmapsleases". Therefore, having only "leases" should be safe now.
LeaderElectionResourceLock: resourcelock.LeasesResourceLock,
LeaderElectionID: "hyperconverged-cluster-operator-lock",
Cache: getCacheOption(operatorNamespace, isMonitoringAvailable, isOpenshift),
Cache: getCacheOption(operatorNamespace, ci),
Scheme: scheme,
}
}
Expand Down
40 changes: 39 additions & 1 deletion controllers/commontestutils/testUtils.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
imagev1 "github.com/openshift/api/image/v1"
operatorv1 "github.com/openshift/api/operator/v1"
routev1 "github.com/openshift/api/route/v1"
deschedulerv1 "github.com/openshift/cluster-kube-descheduler-operator/pkg/apis/descheduler/v1"
csvv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -164,6 +165,7 @@ func GetScheme() *runtime.Scheme {
openshiftconfigv1.Install,
csvv1alpha1.AddToScheme,
aaqv1alpha1.AddToScheme,
deschedulerv1.AddToScheme,
} {
Expect(f(testScheme)).ToNot(HaveOccurred())
}
Expand Down Expand Up @@ -307,6 +309,15 @@ func (c ClusterInfoMock) IsConsolePluginImageProvided() bool {
func (c ClusterInfoMock) IsMonitoringAvailable() bool {
return true
}
func (c ClusterInfoMock) IsDeschedulerAvailable() bool {
return true
}
func (c ClusterInfoMock) IsDeschedulerCRDDeployed(_ context.Context, _ client.Client) bool {
return true
}
func (c ClusterInfoMock) IsDeschedulerMisconfigured() bool {
return false
}
func (c ClusterInfoMock) IsSingleStackIPv6() bool {
return true
}
Expand All @@ -330,6 +341,9 @@ func (ClusterInfoMock) GetTLSSecurityProfile(_ *openshiftconfigv1.TLSSecurityPro
func (ClusterInfoMock) RefreshAPIServerCR(_ context.Context, _ client.Client) error {
return nil
}
func (ClusterInfoMock) RefreshDeschedulerCR(_ context.Context, _ client.Client) error {
return nil
}

// ClusterInfoSNOMock mocks Openshift SNO
type ClusterInfoSNOMock struct{}
Expand Down Expand Up @@ -378,12 +392,24 @@ func (ClusterInfoSNOMock) GetTLSSecurityProfile(_ *openshiftconfigv1.TLSSecurity
func (ClusterInfoSNOMock) RefreshAPIServerCR(_ context.Context, _ client.Client) error {
return nil
}
func (ClusterInfoSNOMock) RefreshDeschedulerCR(_ context.Context, _ client.Client) error {
return nil
}
func (ClusterInfoSNOMock) IsConsolePluginImageProvided() bool {
return true
}
func (c ClusterInfoSNOMock) IsMonitoringAvailable() bool {
return true
}
func (c ClusterInfoSNOMock) IsDeschedulerAvailable() bool {
return true
}
func (c ClusterInfoSNOMock) IsDeschedulerCRDDeployed(_ context.Context, _ client.Client) bool {
return true
}
func (c ClusterInfoSNOMock) IsDeschedulerMisconfigured() bool {
return false
}
func (c ClusterInfoSNOMock) IsSingleStackIPv6() bool {
return true
}
Expand Down Expand Up @@ -432,7 +458,16 @@ func (ClusterInfoSRCPHAIMock) IsConsolePluginImageProvided() bool {
func (ClusterInfoSRCPHAIMock) IsMonitoringAvailable() bool {
return true
}
func (m ClusterInfoSRCPHAIMock) IsSingleStackIPv6() bool {
func (ClusterInfoSRCPHAIMock) IsDeschedulerAvailable() bool {
return true
}
func (ClusterInfoSRCPHAIMock) IsDeschedulerCRDDeployed(_ context.Context, _ client.Client) bool {
return true
}
func (ClusterInfoSRCPHAIMock) IsDeschedulerMisconfigured() bool {
return false
}
func (ClusterInfoSRCPHAIMock) IsSingleStackIPv6() bool {
return true
}
func (ClusterInfoSRCPHAIMock) GetTLSSecurityProfile(_ *openshiftconfigv1.TLSSecurityProfile) *openshiftconfigv1.TLSSecurityProfile {
Expand All @@ -444,6 +479,9 @@ func (ClusterInfoSRCPHAIMock) GetTLSSecurityProfile(_ *openshiftconfigv1.TLSSecu
func (ClusterInfoSRCPHAIMock) RefreshAPIServerCR(_ context.Context, _ client.Client) error {
return nil
}
func (ClusterInfoSRCPHAIMock) RefreshDeschedulerCR(_ context.Context, _ client.Client) error {
return nil
}

func KeysFromSSMap(ssmap map[string]string) gstruct.Keys {
keys := gstruct.Keys{}
Expand Down
103 changes: 103 additions & 0 deletions controllers/crd/crd_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package crd

import (
"context"

operatorhandler "github.com/operator-framework/operator-lib/handler"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

hcoutil "github.com/kubevirt/hyperconverged-cluster-operator/pkg/util"
)

var (
log = logf.Log.WithName("controller_crd")
)

// RegisterReconciler creates a new Descheduler Reconciler and registers it into manager.
func RegisterReconciler(mgr manager.Manager, restartCh chan<- struct{}) error {
return add(mgr, newReconciler(mgr, restartCh))
}

// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, restartCh chan<- struct{}) reconcile.Reconciler {

r := &ReconcileCRD{
client: mgr.GetClient(),
restartCh: restartCh,
eventEmitter: hcoutil.GetEventEmitter(),
}

return r
}

// add adds a new Controller to mgr with r as the reconcile.Reconciler
func add(mgr manager.Manager, r reconcile.Reconciler) error {

// Create a new controller
c, err := controller.New("crd-controller", mgr, controller.Options{Reconciler: r})
if err != nil {
return err
}

// Watch for changes to selected (by name) CRDs
// look only at descheduler CRD for now
err = c.Watch(
source.Kind(
mgr.GetCache(), client.Object(&apiextensionsv1.CustomResourceDefinition{}),
&operatorhandler.InstrumentedEnqueueRequestForObject[client.Object]{},
predicate.NewPredicateFuncs(func(object client.Object) bool {
switch object.GetName() {
case hcoutil.DeschedulerCRDName:
return true
}
return false
}),
))
if err != nil {
return err
}

return nil
}

// ReconcileCRD reconciles a CRD object
type ReconcileCRD struct {
// This client, initialized using mgr.Client() above, is a split client
// that reads objects from the cache and writes to the apiserver
client client.Client
eventEmitter hcoutil.EventEmitter
restartCh chan<- struct{}
}

// operatorRestart triggers a restart of the operator:
// the controller-runtime caching client can only handle kinds
// that were already defined when the client cache got initialized.
// If a new relevant kind got deployed at runtime,
// the operator should restart to be able to read it.
// See: https://github.com/kubernetes-sigs/controller-runtime/issues/2456
func (r *ReconcileCRD) operatorRestart() {
r.restartCh <- struct{}{}
}

// Reconcile refreshes KubeDesheduler view on ClusterInfo singleton
func (r *ReconcileCRD) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {

log.Info("Triggered by a CRD")
if !hcoutil.GetClusterInfo().IsDeschedulerAvailable() {
if hcoutil.GetClusterInfo().IsDeschedulerCRDDeployed(ctx, r.client) {
log.Info("KubeDescheduler CRD got deployed, restarting the operator to reconfigure the operator for the new kind")
r.eventEmitter.EmitEvent(nil, corev1.EventTypeNormal, "KubeDescheduler CRD got deployed, restarting the operator to reconfigure the operator for the new kind", "Restarting the operator to be able to read KubeDescheduler CRs ")
r.operatorRestart()
}
}

return reconcile.Result{}, nil
}
Loading

0 comments on commit cb37c49

Please sign in to comment.