Skip to content

Commit

Permalink
Trigger uninstalls through a new annotation
Browse files Browse the repository at this point in the history
Previous to this, a finalizer on the Deployment was added so that if the
Deployment was deleted, it would handle immediate clean up. This doesn't
handle the common case where the config-policy-controller ManagedClusterAddOn
is deleted, which causes the ManifestWork to be deleted, which triggers
all Configuration Policy controller deployment artifacts, including the
service account.

A new approach is taken so that a new annotation of
policy.open-cluster-management.io/uninstalling=true is set on the
Deployment to indicate that the Configuration Policy controller should
remove all finalizers because it's getting deleted.

The Policy Addon controller will be updated so that when the
config-policy-controller ManagedClusterAddOn object is deleted, a
finalizer will prevent it and a Pod will run on the managed cluster with
the new `trigger-uninstall` subcommand. This sets the uninstalling
annotation on the Deployment and then waits until all
ConfigurationPolicy finalizers have been removed. Once the command ends,
the Pod exits, and the ManagedClusterAddOn object's finalizer is removed
and the uninstall proceeds.

Relates:
https://issues.redhat.com/browse/ACM-3233
https://issues.redhat.com/browse/ACM-2923

Signed-off-by: mprahl <mprahl@users.noreply.github.com>
  • Loading branch information
mprahl committed Feb 8, 2023
1 parent ff83c38 commit 44acaa4
Show file tree
Hide file tree
Showing 15 changed files with 502 additions and 104 deletions.
15 changes: 10 additions & 5 deletions .github/workflows/kind.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,22 @@ jobs:
export GOPATH=$(go env GOPATH)
KUBECONFIG=${PWD}/kubeconfig_managed make e2e-test-hosted-mode-coverage
- name: Verify Deployment Configuration
run: |
make build-images
KUBECONFIG=${PWD}/kubeconfig_managed_e2e make kind-deploy-controller-dev
- name: E2E tests that require the controller running in a cluster
run: |
export GOPATH=$(go env GOPATH)
KUBECONFIG=${PWD}/kubeconfig_managed make e2e-test-running-in-cluster
- name: Test Coverage Verification
if: ${{ github.event_name == 'pull_request' }}
run: |
make test-coverage
make coverage-verify
- name: Verify Deployment Configuration
run: |
make build-images
KUBECONFIG=${PWD}/kubeconfig_managed_e2e make kind-deploy-controller-dev
- name: Debug
if: ${{ failure() }}
run: |
Expand Down
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -334,15 +334,19 @@ e2e-test: e2e-dependencies
$(GINKGO) -v --fail-fast --slow-spec-threshold=10s $(E2E_TEST_ARGS) test/e2e

.PHONY: e2e-test-coverage
e2e-test-coverage: E2E_TEST_ARGS = --json-report=report_e2e.json --label-filter='!hosted-mode' --output-dir=.
e2e-test-coverage: E2E_TEST_ARGS = --json-report=report_e2e.json --label-filter='!hosted-mode && !running-in-cluster' --output-dir=.
e2e-test-coverage: e2e-run-instrumented e2e-test e2e-stop-instrumented

.PHONY: e2e-test-hosted-mode-coverage
e2e-test-hosted-mode-coverage: E2E_TEST_ARGS = --json-report=report_e2e_hosted_mode.json --label-filter="hosted-mode" --output-dir=.
e2e-test-hosted-mode-coverage: E2E_TEST_ARGS = --json-report=report_e2e_hosted_mode.json --label-filter="hosted-mode && !running-in-cluster" --output-dir=.
e2e-test-hosted-mode-coverage: COVERAGE_E2E_OUT = coverage_e2e_hosted_mode.out
e2e-test-hosted-mode-coverage: export TARGET_KUBECONFIG_PATH = $(PWD)/kubeconfig_managed2
e2e-test-hosted-mode-coverage: e2e-run-instrumented e2e-test e2e-stop-instrumented

.PHONY: e2e-test-running-in-cluster
e2e-test-running-in-cluster: E2E_TEST_ARGS = --label-filter="running-in-cluster" --coverprofile=coverage_e2e_uninstall.out --coverpkg=open-cluster-management.io/config-policy-controller/pkg/triggeruninstall
e2e-test-running-in-cluster: e2e-test

.PHONY: e2e-build-instrumented
e2e-build-instrumented:
go test -covermode=atomic -coverpkg=$(shell cat go.mod | head -1 | cut -d ' ' -f 2)/... -c -tags e2e ./ -o build/_output/bin/$(IMG)-instrumented
Expand Down
2 changes: 1 addition & 1 deletion build/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ COPY --from=builder ${REPO_PATH}/build/_output/bin/${COMPONENT} ${OPERATOR}
COPY --from=builder ${REPO_PATH}/build/bin /usr/local/bin
RUN /usr/local/bin/user_setup

ENTRYPOINT ["/usr/local/bin/entrypoint"]
ENTRYPOINT ["/usr/local/bin/entrypoint", "controller"]

RUN microdnf update && \
microdnf clean all
Expand Down
104 changes: 25 additions & 79 deletions controllers/configurationpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,20 +163,11 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(
}

const waiting = 10 * time.Minute
var exiting bool
// Loop twice after exit condition is received to account for race conditions and retries.
loopsAfterExit := 2

for !exiting || (exiting && loopsAfterExit > 0) {
start := time.Now()

select {
case <-ctx.Done():
exiting = true
loopsAfterExit--
default:
}
exiting := false

for !exiting {
start := time.Now()
policiesList := policyv1.ConfigurationPolicyList{}

var skipLoop bool
Expand Down Expand Up @@ -206,28 +197,6 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(
skipLoop = true
}

needDeploymentFinalizer := false

for i := range policiesList.Items {
plc := policiesList.Items[i]

if objHasFinalizer(&plc, pruneObjectFinalizer) {
needDeploymentFinalizer = true

break
}
}

if err := r.manageDeploymentFinalizer(needDeploymentFinalizer); err != nil {
if errors.Is(err, common.ErrNoNamespace) || errors.Is(err, common.ErrRunLocal) {
log.Info("Not managing the controller's deployment finalizer because it is running locally")
} else {
log.Error(err, "Failed to manage the controller's deployment finalizer, skipping loop")

skipLoop = true
}
}

cleanupImmediately, err := r.cleanupImmediately()
if err != nil {
log.Error(err, "Failed to determine if it's time to cleanup immediately")
Expand Down Expand Up @@ -281,6 +250,12 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(
log.V(2).Info("Sleeping before reprocessing the configuration policies", "seconds", sleepTime)
time.Sleep(sleepTime)
}

select {
case <-ctx.Done():
exiting = true
default:
}
}
}

Expand Down Expand Up @@ -618,10 +593,10 @@ func (r *ConfigurationPolicyReconciler) cleanUpChildObjects(plc policyv1.Configu

// cleanupImmediately returns true when the cluster is in a state where configurationpolicies should
// be removed as soon as possible, ignoring the pruneObjectBehavior of the policies. This is the
// case when the CRD or the controller's deployment are already being deleted.
// case when the controller is being uninstalled or the CRD is being deleted.
func (r *ConfigurationPolicyReconciler) cleanupImmediately() (bool, error) {
deployDeleting, deployErr := r.deploymentIsDeleting()
if deployErr == nil && deployDeleting {
beingUninstalled, beingUninstalledErr := r.isBeingUninstalled()
if beingUninstalledErr == nil && beingUninstalled {
return true, nil
}

Expand All @@ -630,36 +605,16 @@ func (r *ConfigurationPolicyReconciler) cleanupImmediately() (bool, error) {
return true, nil
}

if deployErr == nil && defErr == nil {
if beingUninstalledErr == nil && defErr == nil {
// if either was deleting, we would've already returned.
return false, nil
}

// At least one had an unexpected error, so the decision can't be made right now
//nolint:errorlint // we can't choose just one of the errors to "correctly" wrap
return false, fmt.Errorf("deploymentIsDeleting error: '%v', definitionIsDeleting error: '%v'",
deployErr, defErr)
}

func (r *ConfigurationPolicyReconciler) deploymentIsDeleting() (bool, error) {
key, keyErr := common.GetOperatorNamespacedName()
if keyErr != nil {
if errors.Is(keyErr, common.ErrNoNamespace) || errors.Is(keyErr, common.ErrRunLocal) {
// running locally
return false, nil
}

return false, keyErr
}

deployment := appsv1.Deployment{}

err := r.Get(context.TODO(), key, &deployment)
if err != nil {
return false, err
}

return deployment.DeletionTimestamp != nil, nil
return false, fmt.Errorf(
"isBeingUninstalled error: '%v', definitionIsDeleting error: '%v'", beingUninstalledErr, defErr,
)
}

func (r *ConfigurationPolicyReconciler) definitionIsDeleting() (bool, error) {
Expand Down Expand Up @@ -2808,32 +2763,23 @@ func convertPolicyStatusToString(plc *policyv1.ConfigurationPolicy) (results str
return result
}

func (r *ConfigurationPolicyReconciler) manageDeploymentFinalizer(shouldBeSet bool) error {
func (r *ConfigurationPolicyReconciler) isBeingUninstalled() (bool, error) {
key, err := common.GetOperatorNamespacedName()
if err != nil {
return err
// Running locally
if errors.Is(err, common.ErrNoNamespace) || errors.Is(err, common.ErrRunLocal) {
return false, nil
}

return false, err
}

deployment := appsv1.Deployment{}
if err := r.Client.Get(context.TODO(), key, &deployment); err != nil {
return err
}

if objHasFinalizer(&deployment, pruneObjectFinalizer) {
if shouldBeSet {
return nil
}

deployment.SetFinalizers(removeObjFinalizer(&deployment, pruneObjectFinalizer))
} else {
if !shouldBeSet {
return nil
}

deployment.SetFinalizers(addObjFinalizer(&deployment, pruneObjectFinalizer))
return false, err
}

return r.Update(context.TODO(), &deployment)
return deployment.Annotations[common.UninstallingAnnotation] == "true", nil
}

func recoverFlow() {
Expand Down
1 change: 0 additions & 1 deletion controllers/configurationpolicy_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,6 @@ func addObjFinalizer(obj metav1.Object, finalizer string) []string {
return append(obj.GetFinalizers(), finalizer)
}

// nolint: unparam
func removeObjFinalizer(obj metav1.Object, finalizer string) []string {
result := []string{}

Expand Down
1 change: 1 addition & 0 deletions deploy/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ spec:
command:
- config-policy-controller
args:
- "controller"
- "--enable-lease=true"
- "--log-level=2"
- "--v=0"
Expand Down
1 change: 1 addition & 0 deletions deploy/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ spec:
spec:
containers:
- args:
- controller
- --enable-lease=true
- --log-level=2
- --v=0
Expand Down
Loading

0 comments on commit 44acaa4

Please sign in to comment.