Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-2.8] 🤖 Sync from open-cluster-management-io/config-policy-controller: #180 #659

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 56 additions & 37 deletions controllers/configurationpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ type ConfigurationPolicyReconciler struct {
openAPIParser *openapi.CachedOpenAPIParser
// A lock when performing actions that are not thread safe (i.e. reassigning object properties).
lock sync.RWMutex
// When true, the controller has detected it is being uninstalled and only basic cleanup should be performed before
// exiting.
UninstallMode bool
}

//+kubebuilder:rbac:groups=*,resources=*,verbs=*
Expand Down Expand Up @@ -166,7 +169,7 @@ func (r *ConfigurationPolicyReconciler) Reconcile(ctx context.Context, request c
// PeriodicallyExecConfigPolicies loops through all configurationpolicies in the target namespace and triggers
// template handling for each one. This function drives all the work the configuration policy controller does.
func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(
ctx context.Context, freq uint, elected <-chan struct{},
ctx context.Context, freq uint, elected <-chan struct{}, uninstallDetected context.CancelFunc,
) {
log.Info("Waiting for leader election before periodically evaluating configuration policies")

Expand Down Expand Up @@ -212,11 +215,24 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(
_ = r.refreshDiscoveryInfo()
}

cleanupImmediately, err := r.cleanupImmediately()
if err != nil {
log.Error(err, "Failed to determine if it's time to cleanup immediately")
cleanupImmediately := r.UninstallMode

if !r.UninstallMode {
uninstalling, crdDeleting, err := r.cleanupImmediately()
if !uninstalling && !crdDeleting && err != nil {
log.Error(err, "Failed to determine if it's time to cleanup immediately")

skipLoop = true
}

cleanupImmediately = crdDeleting

skipLoop = true
if uninstalling {
r.UninstallMode = uninstalling
cleanupImmediately = true

uninstallDetected()
}
}

if cleanupImmediately || !skipLoop {
Expand Down Expand Up @@ -671,30 +687,27 @@ func (r *ConfigurationPolicyReconciler) cleanUpChildObjects(plc policyv1.Configu
return deletionFailures
}

// cleanupImmediately returns true when the cluster is in a state where configurationpolicies should
// be removed as soon as possible, ignoring the pruneObjectBehavior of the policies. This is the
// case when the controller is being uninstalled or the CRD is being deleted.
func (r *ConfigurationPolicyReconciler) cleanupImmediately() (bool, error) {
beingUninstalled, beingUninstalledErr := r.isBeingUninstalled()
if beingUninstalledErr == nil && beingUninstalled {
return true, nil
}
// cleanupImmediately returns true (i.e. beingUninstalled or crdDeleting) when the cluster is in a state where
// configurationpolicies should be removed as soon as possible, ignoring the pruneObjectBehavior of the policies. This
// is the case when the controller is being uninstalled or the CRD is being deleted.
func (r *ConfigurationPolicyReconciler) cleanupImmediately() (beingUninstalled bool, crdDeleting bool, err error) {
var beingUninstalledErr error

defDeleting, defErr := r.definitionIsDeleting()
if defErr == nil && defDeleting {
return true, nil
}
beingUninstalled, beingUninstalledErr = IsBeingUninstalled(r.Client)

if beingUninstalledErr == nil && defErr == nil {
// if either was deleting, we would've already returned.
return false, nil
var defErr error

crdDeleting, defErr = r.definitionIsDeleting()

if beingUninstalledErr != nil && defErr != nil {
err = fmt.Errorf("%w; %w", beingUninstalledErr, defErr)
} else if beingUninstalledErr != nil {
err = beingUninstalledErr
} else if defErr != nil {
err = defErr
}

// At least one had an unexpected error, so the decision can't be made right now
//nolint:errorlint // we can't choose just one of the errors to "correctly" wrap
return false, fmt.Errorf(
"isBeingUninstalled error: '%v', definitionIsDeleting error: '%v'", beingUninstalledErr, defErr,
)
return
}

func (r *ConfigurationPolicyReconciler) definitionIsDeleting() (bool, error) {
Expand Down Expand Up @@ -765,18 +778,24 @@ func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc policyv1.Confi

// object handling for when configurationPolicy is deleted
if plc.Spec.PruneObjectBehavior == "DeleteIfCreated" || plc.Spec.PruneObjectBehavior == "DeleteAll" {
cleanupNow, err := r.cleanupImmediately()
if err != nil {
log.Error(err, "Error determining whether to cleanup immediately, requeueing policy")
var crdDeleting bool

return
if !r.UninstallMode {
var err error

crdDeleting, err = r.definitionIsDeleting()
if err != nil {
log.Error(err, "Error determining whether to cleanup immediately, requeueing policy")

return
}
}

if cleanupNow {
if r.UninstallMode || crdDeleting {
if objHasFinalizer(&plc, pruneObjectFinalizer) {
patch := removeObjFinalizerPatch(&plc, pruneObjectFinalizer)

err = r.Patch(context.TODO(), &plc, client.RawPatch(types.JSONPatchType, patch))
err := r.Patch(context.TODO(), &plc, client.RawPatch(types.JSONPatchType, patch))
if err != nil {
log.Error(err, "Error removing finalizer for configuration policy")

Expand Down Expand Up @@ -819,7 +838,7 @@ func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc policyv1.Confi

patch := removeObjFinalizerPatch(&plc, pruneObjectFinalizer)

err = r.Patch(context.TODO(), &plc, client.RawPatch(types.JSONPatchType, patch))
err := r.Patch(context.TODO(), &plc, client.RawPatch(types.JSONPatchType, patch))
if err != nil {
log.Error(err, "Error removing finalizer for configuration policy")

Expand Down Expand Up @@ -3068,7 +3087,7 @@ func convertPolicyStatusToString(plc *policyv1.ConfigurationPolicy) string {

// getDeployment gets the Deployment object associated with this controller. If the controller is running outside of
// a cluster, no Deployment object or error will be returned.
func (r *ConfigurationPolicyReconciler) getDeployment() (*appsv1.Deployment, error) {
func getDeployment(client client.Client) (*appsv1.Deployment, error) {
key, err := common.GetOperatorNamespacedName()
if err != nil {
// Running locally
Expand All @@ -3080,15 +3099,15 @@ func (r *ConfigurationPolicyReconciler) getDeployment() (*appsv1.Deployment, err
}

deployment := appsv1.Deployment{}
if err := r.Client.Get(context.TODO(), key, &deployment); err != nil {
if err := client.Get(context.TODO(), key, &deployment); err != nil {
return nil, err
}

return &deployment, nil
}

func (r *ConfigurationPolicyReconciler) isBeingUninstalled() (bool, error) {
deployment, err := r.getDeployment()
func IsBeingUninstalled(client client.Client) (bool, error) {
deployment, err := getDeployment(client)
if deployment == nil || err != nil {
return false, err
}
Expand All @@ -3099,7 +3118,7 @@ func (r *ConfigurationPolicyReconciler) isBeingUninstalled() (bool, error) {
// removeLegacyDeploymentFinalizer removes the policy.open-cluster-management.io/delete-related-objects on the
// Deployment object. This finalizer is no longer needed on the Deployment object, so it is removed.
func (r *ConfigurationPolicyReconciler) removeLegacyDeploymentFinalizer() error {
deployment, err := r.getDeployment()
deployment, err := getDeployment(r.Client)
if deployment == nil || err != nil {
return err
}
Expand Down
62 changes: 43 additions & 19 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,19 @@ func main() {
os.Exit(1)
}

terminatingCtx := ctrl.SetupSignalHandler()

uninstallingCtx, uninstallingCtxCancel := context.WithCancel(terminatingCtx)

beingUninstalled, err := controllers.IsBeingUninstalled(mgr.GetClient())
if err != nil {
log.Error(err, "Failed to determine if the controller is being uninstalled at startup. Will assume it's not.")
}

if beingUninstalled {
uninstallingCtxCancel()
}

var targetK8sClient kubernetes.Interface
var targetK8sDynamicClient dynamic.Interface
var targetK8sConfig *rest.Config
Expand All @@ -296,16 +309,21 @@ func main() {
targetK8sClient = kubernetes.NewForConfigOrDie(targetK8sConfig)
targetK8sDynamicClient = dynamic.NewForConfigOrDie(targetK8sConfig)

nsSelMgr, err = manager.New(targetK8sConfig, manager.Options{
NewCache: cache.BuilderWithOptions(cache.Options{
TransformByObject: map[client.Object]toolscache.TransformFunc{
&corev1.Namespace{}: nsTransform,
},
}),
})
if err != nil {
log.Error(err, "Unable to create manager from target kube config")
os.Exit(1)
// The managed cluster's API server is potentially not the same as the hosting cluster and it could be
// offline already as part of the uninstall process. In this case, the manager's instantiation will fail.
// This controller is not needed in uninstall mode, so just skip it.
if !beingUninstalled {
nsSelMgr, err = manager.New(targetK8sConfig, manager.Options{
NewCache: cache.BuilderWithOptions(cache.Options{
TransformByObject: map[client.Object]toolscache.TransformFunc{
&corev1.Namespace{}: nsTransform,
},
}),
})
if err != nil {
log.Error(err, "Unable to create manager from target kube config")
os.Exit(1)
}
}

log.Info(
Expand All @@ -316,12 +334,16 @@ func main() {

instanceName, _ := os.Hostname() // on an error, instanceName will be empty, which is ok

nsSelReconciler := common.NamespaceSelectorReconciler{
Client: nsSelMgr.GetClient(),
}
if err = nsSelReconciler.SetupWithManager(nsSelMgr); err != nil {
log.Error(err, "Unable to create controller", "controller", "NamespaceSelector")
os.Exit(1)
var nsSelReconciler common.NamespaceSelectorReconciler

if !beingUninstalled {
nsSelReconciler = common.NamespaceSelectorReconciler{
Client: nsSelMgr.GetClient(),
}
if err = nsSelReconciler.SetupWithManager(nsSelMgr); err != nil {
log.Error(err, "Unable to create controller", "controller", "NamespaceSelector")
os.Exit(1)
}
}

discoveryClient := discovery.NewDiscoveryClientForConfigOrDie(targetK8sConfig)
Expand Down Expand Up @@ -355,6 +377,7 @@ func main() {
TargetK8sConfig: targetK8sConfig,
SelectorReconciler: &nsSelReconciler,
EnableMetrics: opts.enableMetrics,
UninstallMode: beingUninstalled,
}
if err = reconciler.SetupWithManager(mgr); err != nil {
log.Error(err, "Unable to create controller", "controller", "ConfigurationPolicy")
Expand All @@ -372,14 +395,13 @@ func main() {
os.Exit(1)
}

terminatingCtx := ctrl.SetupSignalHandler()
managerCtx, managerCancel := context.WithCancel(context.Background())

// PeriodicallyExecConfigPolicies is the go-routine that periodically checks the policies
log.Info("Periodically processing Configuration Policies", "frequency", opts.frequency)

go func() {
reconciler.PeriodicallyExecConfigPolicies(terminatingCtx, opts.frequency, mgr.Elected())
reconciler.PeriodicallyExecConfigPolicies(terminatingCtx, opts.frequency, mgr.Elected(), uninstallingCtxCancel)
managerCancel()
}()

Expand Down Expand Up @@ -440,7 +462,9 @@ func main() {
wg.Add(1)

go func() {
if err := nsSelMgr.Start(managerCtx); err != nil {
// Use the uninstallingCtx so that this shuts down when the controller is being uninstalled. This is
// important since the managed cluster's API server may become unavailable at this time when in hosted mdoe.
if err := nsSelMgr.Start(uninstallingCtx); err != nil {
log.Error(err, "Problem running manager")

managerCancel()
Expand Down