Skip to content

Commit

Permalink
Cleanup conversion webhooks when an operator is uninstalled (#2832)
Browse files Browse the repository at this point in the history
Problem: When uninstalling a CSV, OLM has always avoided deleting the
associated CRD as all CRs on cluster are subsequently deleted, possibly
resulting in user dataloss.

OLM supports defining conversion webhooks within the CSV. On cluster,
conversion webhooks are defined with a CRD and point to a service that
handles conversion.  If the service is unable to fulfill the request,
all requests against the CRs associated with the CRD will fail.

When uninstalling a CSV, OLM does not remove the conversion webhook from
the CRD, meaning that all requests against the CRs associated with the
CRD will fail, resulting in at least two concerns:
1. OLM is unable to subsequently reinstall the operator. When installing
   a CSV, if the CRD already exists and instances of CRs exist as well,
   OLM performs a series of checks which ensure that none of the CRs are
   invalidated against the new schema. The existing CRD's conversion
   webhooks points to a non-existant service, causing the check to fail
   and preventing installs.
2. Broken conversion webhooks causes kubernete's garbage collection to
   fail.

Solution: When a CSV is deleted, if no CSV exists that is replacing it,
set the CRD's conversion strategy to None.

Signed-off-by: Alexander Greene <greene.al1991@gmail.com>
  • Loading branch information
awgreene authored Aug 8, 2022
1 parent f982e2f commit 9437498
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 1 deletion.
41 changes: 41 additions & 0 deletions pkg/controller/operators/olm/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
extinf "k8s.io/apiextensions-apiserver/pkg/client/informers/externalversions"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
Expand Down Expand Up @@ -1121,6 +1122,46 @@ func (a *Operator) handleClusterServiceVersionDeletion(obj interface{}) {
logger.WithError(err).Warnf("failed to requeue gc event: %v", webhook)
}
}

// Conversion webhooks are defined within a CRD.
// In an effort to prevent customer dataloss, OLM does not delete CRDs associated with a CSV when it is deleted.
// Deleting a CSV that introduced a conversion webhook removes the deployment that serviced the conversion webhook calls.
// If a conversion webhook is defined and the service isn't available, all requests against the CR associated with the CRD will fail.
// This ultimately breaks kubernetes garbage collection and prevents OLM from reinstalling the CSV as CR validation against the new CRD's
// openapiv3 schema fails.
// As such, when a CSV is deleted OLM will check if it is being replaced. If the CSV is not being replaced, OLM will remove the conversion
// webhook from the CRD definition.
csvs, err := a.lister.OperatorsV1alpha1().ClusterServiceVersionLister().ClusterServiceVersions(clusterServiceVersion.GetNamespace()).List(labels.Everything())
if err != nil {
logger.Errorf("error listing csvs: %v\n", err)
}
for _, csv := range csvs {
if csv.Spec.Replaces == clusterServiceVersion.GetName() {
return
}
}

for _, desc := range clusterServiceVersion.Spec.WebhookDefinitions {
if desc.Type != v1alpha1.ConversionWebhook || len(desc.ConversionCRDs) == 0 {
continue
}

for i, crdName := range desc.ConversionCRDs {
crd, err := a.lister.APIExtensionsV1().CustomResourceDefinitionLister().Get(crdName)
if err != nil {
logger.Errorf("error getting CRD %v which was defined in CSVs spec.WebhookDefinition[%d]: %v\n", crdName, i, err)
continue
}

copy := crd.DeepCopy()
copy.Spec.Conversion.Strategy = apiextensionsv1.NoneConverter
copy.Spec.Conversion.Webhook = nil

if _, err = a.opClient.ApiextensionsInterface().ApiextensionsV1().CustomResourceDefinitions().Update(context.TODO(), copy, metav1.UpdateOptions{}); err != nil {
logger.Errorf("error updating conversion strategy for CRD %v: %v\n", crdName, err)
}
}
}
}

func (a *Operator) removeDanglingChildCSVs(csv *v1alpha1.ClusterServiceVersion) error {
Expand Down
3 changes: 3 additions & 0 deletions test/e2e/csv_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4206,6 +4206,9 @@ func findLastEvent(events *corev1.EventList) (event corev1.Event) {
func buildCSVCleanupFunc(c operatorclient.ClientInterface, crc versioned.Interface, csv operatorsv1alpha1.ClusterServiceVersion, namespace string, deleteCRDs, deleteAPIServices bool) cleanupFunc {
return func() {
err := crc.OperatorsV1alpha1().ClusterServiceVersions(namespace).Delete(context.TODO(), csv.GetName(), metav1.DeleteOptions{})
if err != nil && apierrors.IsNotFound(err) {
err = nil
}
Expect(err).ShouldNot(HaveOccurred())

if deleteCRDs {
Expand Down
22 changes: 21 additions & 1 deletion test/e2e/webhook_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,7 @@ var _ = Describe("CSVs with a Webhook", func() {
}).Should(Equal(2))
})
When("Installed from a catalog Source", func() {
const csvName = "webhook-operator.v0.0.1"
var cleanupCSV cleanupFunc
var cleanupCatSrc cleanupFunc
var cleanupSubscription cleanupFunc
Expand Down Expand Up @@ -687,7 +688,7 @@ var _ = Describe("CSVs with a Webhook", func() {
defer cleanupSubscription()

// Wait for webhook-operator v2 csv to succeed
csv, err := awaitCSV(crc, source.GetNamespace(), "webhook-operator.v0.0.1", csvSucceededChecker)
csv, err := awaitCSV(crc, source.GetNamespace(), csvName, csvSucceededChecker)
require.NoError(GinkgoT(), err)

cleanupCSV = buildCSVCleanupFunc(c, crc, *csv, source.GetNamespace(), true, true)
Expand Down Expand Up @@ -775,6 +776,25 @@ var _ = Describe("CSVs with a Webhook", func() {
require.True(GinkgoT(), ok, "Unable to get spec.conversion.valid of v2 object")
require.True(GinkgoT(), v2SpecConversionMutate)
require.True(GinkgoT(), v2SpecConversionValid)

// Check that conversion strategies are disabled after uninstalling the operator.
err = crc.OperatorsV1alpha1().ClusterServiceVersions(generatedNamespace.GetName()).Delete(context.TODO(), csvName, metav1.DeleteOptions{})
require.NoError(GinkgoT(), err)

Eventually(func() error {
crd, err := c.ApiextensionsInterface().ApiextensionsV1().CustomResourceDefinitions().Get(context.TODO(), "webhooktests.webhook.operators.coreos.io", metav1.GetOptions{})
if err != nil {
return err
}

if crd.Spec.Conversion.Strategy != apiextensionsv1.NoneConverter {
return fmt.Errorf("conversion strategy is not NoneConverter")
}
if crd.Spec.Conversion.Webhook != nil {
return fmt.Errorf("webhook is not nil")
}
return nil
}).Should(Succeed())
})
})
When("WebhookDescription has conversionCRDs field", func() {
Expand Down

0 comments on commit 9437498

Please sign in to comment.