Skip to content

Commit ac3047a

Browse files
committed
Add operand cleanup feature
This commit introduces the operand cleanup feature which enables a CSV to be configured so the OLM operator can clean up CRs on operator uninstall or CSV deletion. Adds the logic for opt-in/opt-out for operand cleanup and adds a finalizer that runs through the cleanup steps for removing CRs and updating the status.
1 parent 3787874 commit ac3047a

File tree

10 files changed

+425
-162
lines changed

10 files changed

+425
-162
lines changed

go.mod

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ replace (
6464
github.com/openshift/api => github.com/openshift/api v0.0.0-20200331152225-585af27e34fd // release-4.5
6565
github.com/openshift/client-go => github.com/openshift/client-go v0.0.0-20200326155132-2a6cd50aedd0 // release-4.5
6666

67+
// TODO: Remove when operator-framework/api has the cleanup API in a tagged release
68+
// pinned for using APIs from feature branch "add-csv-cleanup-api"
69+
github.com/operator-framework/api => github.com/hasbro17/api v0.3.13-0.20210304212710-93ba7a75566c
70+
6771
// pinned because latest etcd does not yet work with the latest grpc version (1.30.0)
6872
go.etcd.io/etcd => go.etcd.io/etcd v0.5.0-alpha.5.0.20200520232829-54ba9589114f
6973
google.golang.org/grpc => google.golang.org/grpc v1.27.0

go.sum

Lines changed: 2 additions & 121 deletions
Large diffs are not rendered by default.

pkg/controller/operators/olm/operator.go

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"errors"
66
"fmt"
7+
"reflect"
78
"strings"
89
"time"
910

@@ -13,6 +14,7 @@ import (
1314
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
1415
corev1 "k8s.io/api/core/v1"
1516
rbacv1 "k8s.io/api/rbac/v1"
17+
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
1618
extinf "k8s.io/apiextensions-apiserver/pkg/client/informers/externalversions"
1719
k8serrors "k8s.io/apimachinery/pkg/api/errors"
1820
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -50,6 +52,16 @@ import (
5052
"github.com/operator-framework/operator-lifecycle-manager/pkg/metrics"
5153
)
5254

55+
const (
56+
CleanupFinalizer = "operatorframework.io/cleanup-apis"
57+
// MaxCRListSize is used to limit the number of CRs displayed
58+
// on the status.cleanup.pendingDeletion block
59+
// This prevents the CSV size from exceeding limits in the event of
60+
// a large number of CRs being cleaned up.
61+
// TODO: Should this be configurable?
62+
MaxCRListSize = 10
63+
)
64+
5365
var (
5466
ErrRequirementsNotMet = errors.New("requirements were not met")
5567
ErrCRDOwnerConflict = errors.New("conflicting CRD owner in namespace")
@@ -1062,6 +1074,189 @@ func (a *Operator) deleteChild(csv *v1alpha1.ClusterServiceVersion, logger *logr
10621074
return a.client.OperatorsV1alpha1().ClusterServiceVersions(csv.GetNamespace()).Delete(context.TODO(), csv.GetName(), *metav1.NewDeleteOptions(0))
10631075
}
10641076

1077+
// updateCleanupFinalizer will set or clear the CSV's cleanup finalizer based on the cleanup spec
1078+
// It also unsets the finalizer when the CSV is in phase=Replacing to prevent CSV deletion from
1079+
// trigerring cleanup during an upgrade.
1080+
// Returns the updated CSV or nil if no update needed
1081+
func (a *Operator) updateCleanupFinalizer(inCSV *v1alpha1.ClusterServiceVersion) *v1alpha1.ClusterServiceVersion {
1082+
outCSV := inCSV.DeepCopy()
1083+
hasFinalizer := inCSV.HasFinalizer(CleanupFinalizer)
1084+
1085+
// If being replaced, remove finalizer and opt-out to prevent cleanup of CRs
1086+
// when this replacing CSV gets deleted
1087+
if hasFinalizer && inCSV.Status.Phase == v1alpha1.CSVPhaseReplacing {
1088+
outCSV.RemoveFinalizer(CleanupFinalizer)
1089+
// We forcefully opt-out of cleanup by updating spec.cleanup to prevent the cleanup finalizer
1090+
// from being attached again on the next reconcile.
1091+
// TODO: Having the controller update the spec is an anti-pattern. So we could avoid updating the spec
1092+
// and always treat a CSV with phase=Replacing as spec.cleanup.enabled=false.
1093+
outCSV.Spec.Cleanup.Enabled = false
1094+
return outCSV
1095+
}
1096+
1097+
// No update if cleanup enabled with finalizer already present, or disabled with no finalizer
1098+
if inCSV.Spec.Cleanup.Enabled && hasFinalizer || !inCSV.Spec.Cleanup.Enabled && !hasFinalizer {
1099+
return nil
1100+
}
1101+
1102+
if inCSV.Spec.Cleanup.Enabled && !hasFinalizer {
1103+
// Add finalizer if missing
1104+
outCSV.ObjectMeta.Finalizers = append(inCSV.ObjectMeta.Finalizers, CleanupFinalizer)
1105+
} else if !inCSV.Spec.Cleanup.Enabled && hasFinalizer {
1106+
// Remove finalizer if not needed
1107+
outCSV.RemoveFinalizer(CleanupFinalizer)
1108+
}
1109+
return outCSV
1110+
}
1111+
1112+
// parseResourceGroup parses a "resource.group" string into "resource" and "group"
1113+
// Returns an error if a malformed string results in an empty resource or group
1114+
// TODO: Move to some util pkg
1115+
func parseResourceGroup(name string) (resourcePlural string, group string, err error) {
1116+
rg := strings.SplitN(name, ".", 2)
1117+
if len(rg) != 2 {
1118+
err = fmt.Errorf("error parsing CSV name %s: should be of the format 'resource.group'", name)
1119+
return
1120+
}
1121+
if len(rg[0]) == 0 || len(rg[1]) == 0 {
1122+
err = fmt.Errorf("error parsing CSV name %s: resource(%s) and group(%s) cannot be empty", name, rg[0], rg[1])
1123+
return
1124+
}
1125+
resourcePlural = rg[0]
1126+
group = rg[1]
1127+
return
1128+
}
1129+
1130+
// getCRNamespaces returns the list of target namespaces to look at when cleaning up CRs
1131+
// A cluster-scoped CRD returns a single item list that represents no namespace: [""]
1132+
// A namespace-scoped CRD results in a single/multi/all namespaces list e.g [ns1, ns2, ...]
1133+
func (a *Operator) getCRNamespaces(crdName string, csv *v1alpha1.ClusterServiceVersion) ([]string, error) {
1134+
var crNamespaces []string
1135+
1136+
crd, err := a.opClient.ApiextensionsInterface().ApiextensionsV1().CustomResourceDefinitions().Get(context.TODO(), crdName, metav1.GetOptions{})
1137+
if err != nil {
1138+
return nil, fmt.Errorf("error getting CRD %s: %v", crdName, err)
1139+
}
1140+
1141+
// CRD is cluster-scoped
1142+
if crd.Spec.Scope == apiextensionsv1.ClusterScoped {
1143+
// We only return the namespace "" for which the CR client uses cluster-scoped requests
1144+
crNamespaces = append(crNamespaces, "")
1145+
return crNamespaces, nil
1146+
}
1147+
1148+
// Parse the namespaces list from the CSV's target namespaces annotation
1149+
targetNamespacesSet := resolver.NewNamespaceSetFromString(csv.Annotations[v1.OperatorGroupTargetsAnnotationKey])
1150+
if !targetNamespacesSet.IsAllNamespaces() {
1151+
for ns := range targetNamespacesSet {
1152+
crNamespaces = append(crNamespaces, ns)
1153+
}
1154+
return crNamespaces, nil
1155+
}
1156+
1157+
// All namespaces means olm.targetNamespaces=""
1158+
// Translate that into the acutal list of all namespaces
1159+
allNamespaces, err := a.lister.CoreV1().NamespaceLister().List(labels.Everything())
1160+
if err != nil {
1161+
return nil, fmt.Errorf("error listing all namespaces: %v", err)
1162+
}
1163+
1164+
for _, ns := range allNamespaces {
1165+
crNamespaces = append(crNamespaces, ns.Name)
1166+
}
1167+
1168+
return crNamespaces, nil
1169+
}
1170+
1171+
// runCleanupFinalizer runs the process of cleaning up CRs for the operator
1172+
// and is called when a CSV is pending deletion on the cleanup finalizer.
1173+
// The finalizer is cleared once cleanup finishes or there is an opt-out of cleanup.
1174+
// Returns the CSV if there is an update to the status or finalizer, and nil if unchanged.
1175+
func (a *Operator) runCleanupFinalizer(inCSV *v1alpha1.ClusterServiceVersion) (*v1alpha1.ClusterServiceVersion, error) {
1176+
outCSV := inCSV.DeepCopy()
1177+
removeCleanupFinalizer := true
1178+
1179+
// For each owned CRD, list and delete all CRs managed by the operator in the operator's target namespaces
1180+
pendingDeletion := []v1alpha1.ResourceList{}
1181+
for _, ownedCRD := range inCSV.Spec.CustomResourceDefinitions.Owned {
1182+
resourcePlural, group, err := parseResourceGroup(ownedCRD.Name)
1183+
if err != nil {
1184+
return nil, err
1185+
}
1186+
1187+
rl := v1alpha1.ResourceList{
1188+
Group: group,
1189+
Version: ownedCRD.Version,
1190+
Kind: ownedCRD.Kind,
1191+
Instances: []v1alpha1.NamespacedName{},
1192+
}
1193+
1194+
// TODO: Add a GVK string method e.g ResourceList.GVK()
1195+
gvk := rl.Group + "/" + rl.Version + " " + rl.Kind
1196+
1197+
// Get the list of target namespaces to look at for cleaning up CRs
1198+
// This translates olm.targetNamespaces="" to the concrete list of all namespaces
1199+
// For cluster-scoped CRDs we get [""] which is handled by the CR client as cluster-scoped
1200+
crNamespaces, err := a.getCRNamespaces(ownedCRD.Name, inCSV)
1201+
if err != nil {
1202+
return nil, fmt.Errorf("failed to execute finalizer: error getting CR namespaces: %v", err)
1203+
}
1204+
1205+
for _, ns := range crNamespaces {
1206+
crList, err := a.opClient.ListCustomResource(group, rl.Version, ns, resourcePlural)
1207+
if err != nil {
1208+
return nil, fmt.Errorf("failed to execute finalizer: error listing CRs for type %s: %v", gvk, err)
1209+
}
1210+
1211+
for _, cr := range crList.Items {
1212+
// Delete the CR if it isn't already pending deletion
1213+
if cr.GetDeletionTimestamp().IsZero() {
1214+
err := a.opClient.DeleteCustomResource(group, rl.Version, ns, resourcePlural, cr.GetName())
1215+
if err != nil && !k8serrors.IsNotFound(err) {
1216+
return nil, fmt.Errorf("failed to execute finalizer: error deleting CR %s/%s of type %s: %v", cr.GetNamespace(), cr.GetName(), gvk, err)
1217+
}
1218+
}
1219+
1220+
// We only append a list of N CRs per GVK to display in the cleanup status block: status.cleanup.pendingDeletion
1221+
// This to prevent the CSV object size from exceeding the etcd enforced limit when there are a significantly large
1222+
// number of CRs present.
1223+
// DEBUG: Won't the previous ListCustomResource() operation fail for a sufficiently large number of CRs anyway?
1224+
if len(rl.Instances) < MaxCRListSize {
1225+
rl.Instances = append(rl.Instances, v1alpha1.NamespacedName{Name: cr.GetName(), Namespace: cr.GetNamespace()})
1226+
}
1227+
}
1228+
1229+
// Keep the cleanup finalizer if there is any CR still pending deletion
1230+
if len(crList.Items) != 0 {
1231+
removeCleanupFinalizer = false
1232+
}
1233+
1234+
}
1235+
1236+
pendingDeletion = append(pendingDeletion, rl)
1237+
}
1238+
1239+
// Clear the cleanup finalizer and status cleanup block if all CRs are deleted
1240+
if removeCleanupFinalizer {
1241+
outCSV.RemoveFinalizer(CleanupFinalizer)
1242+
outCSV.Status.Cleanup.PendingDeletion = []v1alpha1.ResourceList{}
1243+
return outCSV, nil
1244+
}
1245+
1246+
// TODO: Check if we need to add the cleanup status condition if we haven't already done so?
1247+
1248+
// Check if we need to update the cleanup status
1249+
// DEBUG: Are the instance arrays always ordered the same? If not, this could cause perpetual updates.
1250+
// Is the CR list order from List() and the existing status block always the same?
1251+
outCSV.Status.Cleanup.PendingDeletion = pendingDeletion
1252+
if reflect.DeepEqual(inCSV.Status.Cleanup.PendingDeletion, outCSV.Status.Cleanup.PendingDeletion) {
1253+
return nil, nil
1254+
}
1255+
1256+
// Return for status update
1257+
return outCSV, nil
1258+
}
1259+
10651260
// syncClusterServiceVersion is the method that gets called when we see a CSV event in the cluster
10661261
func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error) {
10671262
clusterServiceVersion, ok := obj.(*v1alpha1.ClusterServiceVersion)
@@ -1090,6 +1285,56 @@ func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error)
10901285
return
10911286
}
10921287

1288+
// Add or remove the cleanup finalizer based on the cleanup spec
1289+
// This can also abort an in progress cleanup and unblock CSV deletion
1290+
// If the CSV is being replaced, it will also be opted out of cleanup
1291+
if outCSV := a.updateCleanupFinalizer(clusterServiceVersion); outCSV != nil {
1292+
_, err := a.client.OperatorsV1alpha1().ClusterServiceVersions(outCSV.GetNamespace()).Update(context.TODO(), outCSV, metav1.UpdateOptions{})
1293+
if err != nil {
1294+
syncError = fmt.Errorf("failed to update cleanup finalizer: %v", err)
1295+
}
1296+
return
1297+
}
1298+
1299+
// Check if the CSV is pending deletion
1300+
if !clusterServiceVersion.ObjectMeta.DeletionTimestamp.IsZero() {
1301+
if !clusterServiceVersion.HasFinalizer(CleanupFinalizer) {
1302+
// Stop reconciliation as the deleted CSV is not pending on the cleanup finalizer
1303+
return
1304+
}
1305+
1306+
// CSV deletion is blocked on the cleanup finalizer
1307+
outCSV, err := a.runCleanupFinalizer(clusterServiceVersion)
1308+
if err != nil {
1309+
syncError = fmt.Errorf("failed to run cleanup finalizer: %v", err)
1310+
return
1311+
}
1312+
if outCSV == nil {
1313+
// Still awaiting cleanup and nothing to update on the CSV status
1314+
// TODO: We'll want to requeue again after some time so we can check on the progress of cleanup
1315+
// When the CRs we're waiting for finish deletion we won't see a CSV reconcile event
1316+
// so the CSV finalizer is kept for longer than it needs to be.
1317+
return
1318+
}
1319+
1320+
// Update the CSV if the cleanup finalizer has been removed
1321+
if !outCSV.HasFinalizer(CleanupFinalizer) {
1322+
_, err := a.client.OperatorsV1alpha1().ClusterServiceVersions(outCSV.GetNamespace()).Update(context.TODO(), outCSV, metav1.UpdateOptions{})
1323+
if err != nil {
1324+
syncError = fmt.Errorf("error updating ClusterServiceVersion: %v ", err)
1325+
return
1326+
}
1327+
return
1328+
}
1329+
1330+
// Otherwise update the cleanup status
1331+
_, err = a.client.OperatorsV1alpha1().ClusterServiceVersions(outCSV.GetNamespace()).UpdateStatus(context.TODO(), outCSV, metav1.UpdateOptions{})
1332+
if err != nil {
1333+
syncError = fmt.Errorf("error updating ClusterServiceVersion status: %v ", err)
1334+
}
1335+
return
1336+
}
1337+
10931338
outCSV, syncError := a.transitionCSVState(*clusterServiceVersion)
10941339

10951340
if outCSV == nil {

pkg/lib/operatorclient/client.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,17 +36,17 @@ type ClientInterface interface {
3636

3737
// CustomResourceClient contains methods for the Custom Resource.
3838
type CustomResourceClient interface {
39-
GetCustomResource(apiGroup, version, namespace, resourceKind, resourceName string) (*unstructured.Unstructured, error)
40-
GetCustomResourceRaw(apiGroup, version, namespace, resourceKind, resourceName string) ([]byte, error)
39+
GetCustomResource(apiGroup, version, namespace, resourcePlural, resourceName string) (*unstructured.Unstructured, error)
40+
GetCustomResourceRaw(apiGroup, version, namespace, resourcePlural, resourceName string) ([]byte, error)
4141
CreateCustomResource(item *unstructured.Unstructured) error
4242
CreateCustomResourceRaw(apiGroup, version, namespace, kind string, data []byte) error
4343
CreateCustomResourceRawIfNotFound(apiGroup, version, namespace, kind, name string, data []byte) (bool, error)
4444
UpdateCustomResource(item *unstructured.Unstructured) error
45-
UpdateCustomResourceRaw(apiGroup, version, namespace, resourceKind, resourceName string, data []byte) error
46-
CreateOrUpdateCustomeResourceRaw(apiGroup, version, namespace, resourceKind, resourceName string, data []byte) error
47-
DeleteCustomResource(apiGroup, version, namespace, resourceKind, resourceName string) error
48-
AtomicModifyCustomResource(apiGroup, version, namespace, resourceKind, resourceName string, f CustomResourceModifier, data interface{}) error
49-
ListCustomResource(apiGroup, version, namespace, resourceKind string) (*CustomResourceList, error)
45+
UpdateCustomResourceRaw(apiGroup, version, namespace, resourcePlural, resourceName string, data []byte) error
46+
CreateOrUpdateCustomeResourceRaw(apiGroup, version, namespace, resourcePlural, resourceName string, data []byte) error
47+
DeleteCustomResource(apiGroup, version, namespace, resourcePlural, resourceName string) error
48+
AtomicModifyCustomResource(apiGroup, version, namespace, resourcePlural, resourceName string, f CustomResourceModifier, data interface{}) error
49+
ListCustomResource(apiGroup, version, namespace, resourcePlural string) (*CustomResourceList, error)
5050
}
5151

5252
// APIServiceClient contains methods for manipulating APIServiceBindings.

0 commit comments

Comments
 (0)