From dbd66ed877345629678338ed60d2bc53adb4d86d Mon Sep 17 00:00:00 2001 From: Steve Kuznetsov Date: Wed, 31 Jan 2024 07:45:56 -0700 Subject: [PATCH 1/3] *: don't duplicate owner references Signed-off-by: Steve Kuznetsov --- pkg/lib/ownerutil/util.go | 25 ++++++++++++------------- pkg/lib/testobj/runtime.go | 11 +++++++++-- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pkg/lib/ownerutil/util.go b/pkg/lib/ownerutil/util.go index dafaeaf8b0..ebdd73a315 100644 --- a/pkg/lib/ownerutil/util.go +++ b/pkg/lib/ownerutil/util.go @@ -167,20 +167,13 @@ func AddNonBlockingOwner(object metav1.Object, owner Owner) { ownerRefs = []metav1.OwnerReference{} } - // Infer TypeMeta for the target - if err := InferGroupVersionKind(owner); err != nil { - log.Warn(err.Error()) - } - gvk := owner.GetObjectKind().GroupVersionKind() - + nonBlockingOwner := NonBlockingOwner(owner) for _, item := range ownerRefs { - if item.Kind == gvk.Kind { - if item.Name == owner.GetName() && item.UID == owner.GetUID() { - return - } + if item.Kind == nonBlockingOwner.Kind && item.Name == nonBlockingOwner.Name && item.UID == nonBlockingOwner.UID { + return } } - ownerRefs = append(ownerRefs, NonBlockingOwner(owner)) + ownerRefs = append(ownerRefs, nonBlockingOwner) object.SetOwnerReferences(ownerRefs) } @@ -284,14 +277,20 @@ func AddOwner(object metav1.Object, owner Owner, blockOwnerDeletion, isControlle } gvk := owner.GetObjectKind().GroupVersionKind() apiVersion, kind := gvk.ToAPIVersionAndKind() - ownerRefs = append(ownerRefs, metav1.OwnerReference{ + ownerRef := metav1.OwnerReference{ APIVersion: apiVersion, Kind: kind, Name: owner.GetName(), UID: owner.GetUID(), BlockOwnerDeletion: &blockOwnerDeletion, Controller: &isController, - }) + } + for _, ref := range ownerRefs { + if ref.Kind == ownerRef.Kind && ref.Name == ownerRef.Name && ref.UID == ownerRef.UID { + return + } + } + ownerRefs = append(ownerRefs, ownerRef) object.SetOwnerReferences(ownerRefs) } diff --git a/pkg/lib/testobj/runtime.go b/pkg/lib/testobj/runtime.go index 70da4f8923..6fd7c8b722 100644 --- a/pkg/lib/testobj/runtime.go +++ b/pkg/lib/testobj/runtime.go @@ -135,14 +135,21 @@ func WithOwner(owner, obj RuntimeMetaObject) RuntimeMetaObject { out := obj.DeepCopyObject().(RuntimeMetaObject) gvk := owner.GetObjectKind().GroupVersionKind() apiVersion, kind := gvk.ToAPIVersionAndKind() - refs := append(out.GetOwnerReferences(), metav1.OwnerReference{ + + nonBlockingOwner := metav1.OwnerReference{ APIVersion: apiVersion, Kind: kind, Name: owner.GetName(), UID: owner.GetUID(), BlockOwnerDeletion: &dontBlockOwnerDeletion, Controller: ¬Controller, - }) + } + for _, item := range out.GetOwnerReferences() { + if item.Kind == nonBlockingOwner.Kind && item.Name == nonBlockingOwner.Name && item.UID == nonBlockingOwner.UID { + return out + } + } + refs := append(out.GetOwnerReferences(), nonBlockingOwner) out.SetOwnerReferences(refs) return out From 3f66a6931c7a7fdd35e77af414df0c3d64d91a15 Mon Sep 17 00:00:00 2001 From: Steve Kuznetsov Date: Wed, 31 Jan 2024 07:58:31 -0700 Subject: [PATCH 2/3] olm,catalog: only validate the resources we label Each controller can see (and, therefore, can label) a separate set of GVRs. When we start up, detecting if any OLM-related resource needs labelling means that one controller may start, detect a need for labelling a resource it cannot itself label, detect that it's labelled everything it can, and restart. If the other operator is stuck for whatever reason, this leads the first controller to enter CrashLoopBackOff and break OCP upgrade. Signed-off-by: Steve Kuznetsov --- pkg/controller/operators/catalog/operator.go | 2 +- pkg/controller/operators/labeller/filters.go | 38 +++++++++++++++++++- pkg/controller/operators/olm/operator.go | 2 +- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pkg/controller/operators/catalog/operator.go b/pkg/controller/operators/catalog/operator.go index 53b76c6272..375f087c2d 100644 --- a/pkg/controller/operators/catalog/operator.go +++ b/pkg/controller/operators/catalog/operator.go @@ -186,7 +186,7 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo return nil, err } - canFilter, err := labeller.Validate(ctx, logger, metadataClient, crClient) + canFilter, err := labeller.Validate(ctx, logger, metadataClient, crClient, labeller.IdentityCatalogOperator) if err != nil { return nil, err } diff --git a/pkg/controller/operators/labeller/filters.go b/pkg/controller/operators/labeller/filters.go index 2167d34544..333bc2b9ea 100644 --- a/pkg/controller/operators/labeller/filters.go +++ b/pkg/controller/operators/labeller/filters.go @@ -80,7 +80,39 @@ var filters = map[schema.GroupVersionResource]func(metav1.Object) bool{ }, } -func Validate(ctx context.Context, logger *logrus.Logger, metadataClient metadata.Interface, operatorClient operators.Interface) (bool, error) { +type Identity string + +const ( + IdentityCatalogOperator = "catalog-operator" + IdentityOLMOperator = "olm-operator" +) + +func gvrRelevant(identity Identity) func(schema.GroupVersionResource) bool { + switch identity { + case IdentityCatalogOperator: + return sets.New( + rbacv1.SchemeGroupVersion.WithResource("roles"), + rbacv1.SchemeGroupVersion.WithResource("rolebindings"), + corev1.SchemeGroupVersion.WithResource("serviceaccounts"), + corev1.SchemeGroupVersion.WithResource("services"), + corev1.SchemeGroupVersion.WithResource("pods"), + corev1.SchemeGroupVersion.WithResource("configmaps"), + batchv1.SchemeGroupVersion.WithResource("jobs"), + apiextensionsv1.SchemeGroupVersion.WithResource("customresourcedefinitions"), + ).Has + case IdentityOLMOperator: + return sets.New( + appsv1.SchemeGroupVersion.WithResource("deployments"), + rbacv1.SchemeGroupVersion.WithResource("clusterroles"), + rbacv1.SchemeGroupVersion.WithResource("clusterrolebindings"), + apiextensionsv1.SchemeGroupVersion.WithResource("customresourcedefinitions"), + ).Has + default: + panic("programmer error: invalid identity") + } +} + +func Validate(ctx context.Context, logger *logrus.Logger, metadataClient metadata.Interface, operatorClient operators.Interface, identity Identity) (bool, error) { okLock := sync.Mutex{} ok := true g, ctx := errgroup.WithContext(ctx) @@ -125,6 +157,10 @@ func Validate(ctx context.Context, logger *logrus.Logger, metadataClient metadat }) for gvr, filter := range allFilters { + if !gvrRelevant(identity)(gvr) { + logger.WithField("gvr", gvr.String()).Info("skipping irrelevant gvr") + continue + } gvr, filter := gvr, filter g.Go(func() error { list, err := metadataClient.Resource(gvr).List(ctx, metav1.ListOptions{}) diff --git a/pkg/controller/operators/olm/operator.go b/pkg/controller/operators/olm/operator.go index 44d3645ac8..ee75f84c0f 100644 --- a/pkg/controller/operators/olm/operator.go +++ b/pkg/controller/operators/olm/operator.go @@ -183,7 +183,7 @@ func newOperatorWithConfig(ctx context.Context, config *operatorConfig) (*Operat return nil, err } - canFilter, err := labeller.Validate(ctx, config.logger, config.metadataClient, config.externalClient) + canFilter, err := labeller.Validate(ctx, config.logger, config.metadataClient, config.externalClient, labeller.IdentityOLMOperator) if err != nil { return nil, err } From da9593a5791c8e38bccc27e8a0cac26e0606c186 Mon Sep 17 00:00:00 2001 From: Steve Kuznetsov Date: Wed, 31 Jan 2024 09:03:25 -0700 Subject: [PATCH 3/3] catalog: prune duplicate OwnerReferences on Services Signed-off-by: Steve Kuznetsov --- pkg/controller/operators/catalog/operator.go | 47 +++++++++++++++++++ .../operators/catalog/ownerrefs_test.go | 42 +++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 pkg/controller/operators/catalog/ownerrefs_test.go diff --git a/pkg/controller/operators/catalog/operator.go b/pkg/controller/operators/catalog/operator.go index 375f087c2d..18b9d92e64 100644 --- a/pkg/controller/operators/catalog/operator.go +++ b/pkg/controller/operators/catalog/operator.go @@ -541,6 +541,49 @@ func NewOperator(ctx context.Context, kubeconfigPath string, clock utilclock.Clo return nil, err } + { + gvr := servicesgvk + informer := serviceInformer.Informer() + + logger := op.logger.WithFields(logrus.Fields{"gvr": gvr.String()}) + logger.Info("registering owner reference fixer") + + queue := workqueue.NewRateLimitingQueueWithConfig(workqueue.DefaultControllerRateLimiter(), workqueue.RateLimitingQueueConfig{ + Name: gvr.String(), + }) + queueInformer, err := queueinformer.NewQueueInformer( + ctx, + queueinformer.WithQueue(queue), + queueinformer.WithLogger(op.logger), + queueinformer.WithInformer(informer), + queueinformer.WithSyncer(queueinformer.LegacySyncHandler(func(obj interface{}) error { + service, ok := obj.(*corev1.Service) + if !ok { + err := fmt.Errorf("wrong type %T, expected %T: %#v", obj, new(*corev1.Service), obj) + logger.WithError(err).Error("casting failed") + return fmt.Errorf("casting failed: %w", err) + } + + deduped := deduplicateOwnerReferences(service.OwnerReferences) + if len(deduped) != len(service.OwnerReferences) { + localCopy := service.DeepCopy() + localCopy.OwnerReferences = deduped + if _, err := op.opClient.KubernetesInterface().CoreV1().Services(service.Namespace).Update(ctx, localCopy, metav1.UpdateOptions{}); err != nil { + return err + } + } + return nil + }).ToSyncer()), + ) + if err != nil { + return nil, err + } + + if err := op.RegisterQueueInformer(queueInformer); err != nil { + return nil, err + } + } + // Wire Pods for CatalogSource catsrcReq, err := labels.NewRequirement(reconciler.CatalogSourceLabelKey, selection.Exists, nil) if err != nil { @@ -2860,3 +2903,7 @@ func (o *Operator) apiresourceFromGVK(gvk schema.GroupVersionKind) (metav1.APIRe logger.Info("couldn't find GVK in api discovery") return metav1.APIResource{}, olmerrors.GroupVersionKindNotFoundError{Group: gvk.Group, Version: gvk.Version, Kind: gvk.Kind} } + +func deduplicateOwnerReferences(refs []metav1.OwnerReference) []metav1.OwnerReference { + return sets.New(refs...).UnsortedList() +} diff --git a/pkg/controller/operators/catalog/ownerrefs_test.go b/pkg/controller/operators/catalog/ownerrefs_test.go new file mode 100644 index 0000000000..097a8409b3 --- /dev/null +++ b/pkg/controller/operators/catalog/ownerrefs_test.go @@ -0,0 +1,42 @@ +package catalog + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestDedupe(t *testing.T) { + yes := true + refs := []metav1.OwnerReference{ + { + APIVersion: "apiversion", + Kind: "kind", + Name: "name", + UID: "uid", + Controller: &yes, + BlockOwnerDeletion: &yes, + }, + { + APIVersion: "apiversion", + Kind: "kind", + Name: "name", + UID: "uid", + Controller: &yes, + BlockOwnerDeletion: &yes, + }, + { + APIVersion: "apiversion", + Kind: "kind", + Name: "name", + UID: "uid", + Controller: &yes, + BlockOwnerDeletion: &yes, + }, + } + deduped := deduplicateOwnerReferences(refs) + t.Logf("got %d deduped from %d", len(deduped), len(refs)) + if len(deduped) == len(refs) { + t.Errorf("didn't dedupe: %#v", deduped) + } +}