From 8f18c31867bdf0f7e54a2bce6f9f196a502146a4 Mon Sep 17 00:00:00 2001 From: Jason Zhang Date: Wed, 31 Jan 2024 11:21:26 -0500 Subject: [PATCH] Enable status reporting for CatalogSource in OperatorPolicy Following the schema of implementing multiple `handle` functions, the CatalogSource status reporting logic utilizes the same `updateStatus` function to facilitate the `conditions` and `relatedObject` field updates on the related operator policy. The controller directly checks if the CatalogSource object exists on the cluster to propagate relevant health information up to the operator policy. The specific field that is checked in the catalog source is `.status.connectionState.lastObservedState`. Any value other than `READY` is considered as failing. ref: https://issues.redhat.com/browse/ACM-9285 Signed-off-by: Jason Zhang --- controllers/operatorpolicy_controller.go | 82 +++++++++- controllers/operatorpolicy_status.go | 94 ++++++++++++ test/e2e/case38_install_operator_test.go | 145 +++++++++++++++++- .../case38_operator_install/case38_OpPlc.yaml | 34 ---- .../case38_OpPlc_default_og.yaml | 28 ---- test/resources/operatorpolicy.yaml | 34 ---- 6 files changed, 317 insertions(+), 100 deletions(-) delete mode 100644 test/resources/case38_operator_install/case38_OpPlc.yaml delete mode 100644 test/resources/case38_operator_install/case38_OpPlc_default_og.yaml delete mode 100644 test/resources/operatorpolicy.yaml diff --git a/controllers/operatorpolicy_controller.go b/controllers/operatorpolicy_controller.go index 9a66f190..45bbd2c7 100644 --- a/controllers/operatorpolicy_controller.go +++ b/controllers/operatorpolicy_controller.go @@ -35,6 +35,7 @@ import ( const ( OperatorControllerName string = "operator-policy-controller" + CatalogSourceReady string = "READY" ) var ( @@ -58,6 +59,11 @@ var ( Version: "v1", Kind: "Deployment", } + catalogSrcGVK = schema.GroupVersionKind{ + Group: "operators.coreos.com", + Version: "v1alpha1", + Kind: "CatalogSource", + } ) // OperatorPolicyReconciler reconciles a OperatorPolicy object @@ -165,9 +171,83 @@ func (r *OperatorPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Reque return reconcile.Result{}, err } + // handle catalogsource + if err := r.handleCatalogSource(ctx, policy, subscription); err != nil { + OpLog.Error(err, "Error handling CatalogSource") + + return reconcile.Result{}, err + } + return reconcile.Result{}, nil } +func (r *OperatorPolicyReconciler) handleCatalogSource( + ctx context.Context, + policy *policyv1beta1.OperatorPolicy, + subscription *operatorv1alpha1.Subscription, +) error { + watcher := opPolIdentifier(policy.Namespace, policy.Name) + + var catalogName string + var catalogNS string + + if subscription == nil { + sub, err := buildSubscription(policy) + if err != nil { + return fmt.Errorf("error building Subscription: %w", err) + } + + catalogName = sub.Spec.CatalogSource + catalogNS = sub.Spec.CatalogSourceNamespace + } else { + catalogName = subscription.Spec.CatalogSource + catalogNS = subscription.Spec.CatalogSourceNamespace + } + + // Check if CatalogSource exists + foundCatalogSrc, err := r.DynamicWatcher.Get(watcher, catalogSrcGVK, + catalogNS, catalogName) + if err != nil { + return fmt.Errorf("error getting CatalogSource: %w", err) + } + + isMissing := foundCatalogSrc == nil + isUnhealthy := isMissing + + if !isMissing { + // CatalogSource is found, initiate health check + catalogSrcUnstruct := foundCatalogSrc.DeepCopy() + catalogSrc := new(operatorv1alpha1.CatalogSource) + + err := runtime.DefaultUnstructuredConverter. + FromUnstructured(catalogSrcUnstruct.Object, catalogSrc) + if err != nil { + return fmt.Errorf("error converting the retrieved CatalogSource to the Go type: %w", err) + } + + if catalogSrc.Status.GRPCConnectionState == nil { + // Unknown State + err := r.updateStatus(ctx, policy, catalogSourceUnknownCond, catalogSrcUnknownObj(catalogName, catalogNS)) + if err != nil { + return fmt.Errorf("error retrieving the status for a CatalogSource: %w", err) + } + + return nil + } + + CatalogSrcState := catalogSrc.Status.GRPCConnectionState.LastObservedState + isUnhealthy = (CatalogSrcState != CatalogSourceReady) + } + + err = r.updateStatus(ctx, policy, catalogSourceFindCond(isUnhealthy, isMissing), + catalogSourceObj(catalogName, catalogNS, isUnhealthy, isMissing)) + if err != nil { + return fmt.Errorf("error updating the status for a CatalogSource: %w", err) + } + + return nil +} + func (r *OperatorPolicyReconciler) handleOpGroup(ctx context.Context, policy *policyv1beta1.OperatorPolicy) error { watcher := opPolIdentifier(policy.Namespace, policy.Name) @@ -453,7 +533,7 @@ func (r *OperatorPolicyReconciler) handleSubscription( // For now, just mark it as compliant err := r.updateStatus(ctx, policy, matchesCond("Subscription"), matchedObj(foundSub)) if err != nil { - return nil, fmt.Errorf("error updating the status for an OperatorGroup that matches: %w", err) + return nil, fmt.Errorf("error updating the status for a Subscription that matches: %w", err) } return mergedSub, nil diff --git a/controllers/operatorpolicy_status.go b/controllers/operatorpolicy_status.go index 4afc25b8..03fada5e 100644 --- a/controllers/operatorpolicy_status.go +++ b/controllers/operatorpolicy_status.go @@ -226,6 +226,17 @@ func calculateComplianceCondition(policy *policyv1beta1.OperatorPolicy) metav1.C } // FUTURE: check additional conditions + idx, cond = policy.Status.GetCondition(catalogSrcConditionType) + if idx == -1 { + messages = append(messages, "the status of the CatalogSource is unknown") + foundNonCompliant = true + } else { + messages = append(messages, cond.Message) + + if cond.Status != metav1.ConditionFalse { + foundNonCompliant = true + } + } if foundNonCompliant { return metav1.Condition{ @@ -305,6 +316,7 @@ const ( subConditionType = "SubscriptionCompliant" csvConditionType = "ClusterServiceVersionCompliant" deploymentConditionType = "DeploymentCompliant" + catalogSrcConditionType = "CatalogSourcesUnhealthy" ) func condType(kind string) string { @@ -468,6 +480,88 @@ var noDeploymentsCond = metav1.Condition{ Message: "The ClusterServiceVersion is missing, thus meaning there are no relevant deployments", } +// catalogSourceFindCond is a conditionally compliant condition with reason +// based on the `isUnhealthy` and `isMissing` parameters +func catalogSourceFindCond(isUnhealthy bool, isMissing bool) metav1.Condition { + status := metav1.ConditionFalse + reason := "CatalogSourcesFound" + message := "CatalogSource was found" + + if isUnhealthy { + status = metav1.ConditionTrue + reason = "CatalogSourcesFoundUnhealthy" + message = "CatalogSource was found but is unhealthy" + } + + if isMissing { + status = metav1.ConditionTrue + reason = "CatalogSourcesNotFound" + message = "CatalogSource was not found" + } + + return metav1.Condition{ + Type: "CatalogSourcesUnhealthy", + Status: status, + Reason: reason, + Message: message, + } +} + +// catalogSourceUnknownCond is a NonCompliant condition +var catalogSourceUnknownCond = metav1.Condition{ + Type: "CatalogSourcesUnknownState", + Status: metav1.ConditionTrue, + Reason: "LastObservedUnknown", + Message: "Could not determine last observed state of CatalogSource", +} + +// catalogSourceObj returns a conditionally compliant RelatedObject with reason based on the +// `isUnhealthy` and `isMissing` parameters +func catalogSourceObj(catalogName string, catalogNS string, isUnhealthy bool, isMissing bool) policyv1.RelatedObject { + compliance := string(policyv1.Compliant) + reason := reasonWantFoundExists + + if isUnhealthy { + compliance = string(policyv1.NonCompliant) + reason = reasonWantFoundExists + " but is unhealthy" + } + + if isMissing { + compliance = string(policyv1.NonCompliant) + reason = reasonWantFoundDNE + } + + return policyv1.RelatedObject{ + Object: policyv1.ObjectResource{ + Kind: catalogSrcGVK.Kind, + APIVersion: catalogSrcGVK.GroupVersion().String(), + Metadata: policyv1.ObjectMetadata{ + Name: catalogName, + Namespace: catalogNS, + }, + }, + Compliant: compliance, + Reason: reason, + } +} + +// catalogSrcUnknownObj returns a NonCompliant RelatedObject with +// reason = 'Resource found but current state is unknown' +func catalogSrcUnknownObj(catalogName string, catalogNS string) policyv1.RelatedObject { + return policyv1.RelatedObject{ + Object: policyv1.ObjectResource{ + Kind: catalogSrcGVK.Kind, + APIVersion: catalogSrcGVK.GroupVersion().String(), + Metadata: policyv1.ObjectMetadata{ + Name: catalogName, + Namespace: catalogNS, + }, + }, + Compliant: string(policyv1.NonCompliant), + Reason: "Resource found but current state is unknown", + } +} + // missingWantedObj returns a NonCompliant RelatedObject with reason = 'Resource not found but should exist' func missingWantedObj(obj client.Object) policyv1.RelatedObject { return policyv1.RelatedObject{ diff --git a/test/e2e/case38_install_operator_test.go b/test/e2e/case38_install_operator_test.go index 0406679d..277d6b39 100644 --- a/test/e2e/case38_install_operator_test.go +++ b/test/e2e/case38_install_operator_test.go @@ -496,7 +496,6 @@ var _ = Describe("Test installing an operator from OperatorPolicy", Ordered, fun createObjWithParent(parentPolicyYAML, parentPolicyName, opPolYAML, opPolTestNS, gvrPolicy, gvrOperatorPolicy) }) - It("Should initially notice the matching Subscription", func() { check( opPolName, @@ -550,7 +549,6 @@ var _ = Describe("Test installing an operator from OperatorPolicy", Ordered, fun ) }) }) - Describe("Test health checks on OLM resources after OperatorPolicy operator installation", Ordered, func() { const ( opPolYAML = "../resources/case38_operator_install/operator-policy-no-group-enforce.yaml" @@ -610,7 +608,6 @@ var _ = Describe("Test installing an operator from OperatorPolicy", Ordered, fun ) }) }) - Describe("Test health checks on OLM resources on OperatorPolicy with failed CSV", Ordered, func() { const ( opPolYAML = "../resources/case38_operator_install/operator-policy-no-group-csv-fail.yaml" @@ -672,4 +669,146 @@ var _ = Describe("Test installing an operator from OperatorPolicy", Ordered, fun ) }) }) + Describe("Test status reporting for CatalogSource", Ordered, func() { + const ( + OpPlcYAML = "../resources/case38_operator_install/operator-policy-with-group.yaml" + OpPlcName = "oppol-with-group" + subName = "project-quay" + catSrcName = "operatorhubio-catalog" + ) + + BeforeAll(func() { + By("Applying creating a ns and the test policy") + utils.Kubectl("create", "ns", opPolTestNS) + DeferCleanup(func() { + utils.Kubectl("patch", "catalogsource", catSrcName, "-n", "olm", "--type=json", "-p", + `[{"op": "replace", "path": "/spec/image", "value": "quay.io/operatorhubio/catalog:latest"}]`) + utils.Kubectl("delete", "ns", opPolTestNS) + }) + + createObjWithParent(parentPolicyYAML, parentPolicyName, + OpPlcYAML, opPolTestNS, gvrPolicy, gvrOperatorPolicy) + }) + + It("Should initially show the CatalogSource is compliant", func() { + By("Checking the condition fields") + check( + OpPlcName, + false, + []policyv1.RelatedObject{{ + Object: policyv1.ObjectResource{ + Kind: "CatalogSource", + APIVersion: "operators.coreos.com/v1alpha1", + Metadata: policyv1.ObjectMetadata{ + Name: catSrcName, + Namespace: opPolTestNS, + }, + }, + Compliant: "Compliant", + Reason: "Resource found as expected", + }}, + metav1.Condition{ + Type: "CatalogSourcesUnhealthy", + Status: metav1.ConditionFalse, + Reason: "CatalogSourcesFound", + Message: "CatalogSource was found", + }, + "CatalogSource was found", + ) + }) + It("Should remain compliant when policy is enforced", func() { + By("Enforcing the policy") + utils.Kubectl("patch", "operatorpolicy", OpPlcName, "-n", opPolTestNS, "--type=json", "-p", + `[{"op": "replace", "path": "/spec/remediationAction", "value": "enforce"}]`) + + By("Checking the condition fields") + check( + OpPlcName, + false, + []policyv1.RelatedObject{{ + Object: policyv1.ObjectResource{ + Kind: "CatalogSource", + APIVersion: "operators.coreos.com/v1alpha1", + Metadata: policyv1.ObjectMetadata{ + Name: catSrcName, + Namespace: opPolTestNS, + }, + }, + Compliant: "Compliant", + Reason: "Resource found as expected", + }}, + metav1.Condition{ + Type: "CatalogSourcesUnhealthy", + Status: metav1.ConditionFalse, + Reason: "CatalogSourcesFound", + Message: "CatalogSource was found", + }, + "CatalogSource was found", + ) + }) + It("Should become NonCompliant when CatalogSource DNE", func() { + By("Patching the policy to reference a CatalogSource that DNE to emulate failure") + utils.Kubectl("patch", "operatorpolicy", OpPlcName, "-n", opPolTestNS, "--type=json", "-p", + `[{"op": "replace", "path": "/spec/subscription/source", "value": "fakeName"}]`) + + By("Checking the conditions and relatedObj in the policy") + check( + OpPlcName, + true, + []policyv1.RelatedObject{{ + Object: policyv1.ObjectResource{ + Kind: "CatalogSource", + APIVersion: "operators.coreos.com/v1alpha1", + Metadata: policyv1.ObjectMetadata{ + Name: "fakeName", + Namespace: opPolTestNS, + }, + }, + Compliant: "NonCompliant", + Reason: "Resource not found but should exist", + }}, + metav1.Condition{ + Type: "CatalogSourcesUnhealthy", + Status: metav1.ConditionTrue, + Reason: "CatalogSourcesNotFound", + Message: "CatalogSource was not found", + }, + "CatalogSource was not found", + ) + }) + It("Should remain NonCompliant when CatalogSource fails", func() { + By("Patching the policy to point to an existing CatalogSource") + utils.Kubectl("patch", "operatorpolicy", OpPlcName, "-n", opPolTestNS, "--type=json", "-p", + `[{"op": "replace", "path": "/spec/subscription/source", "value": "operatorhubio-catalog"}]`) + + By("Patching the CatalogSource to reference a broken image link") + utils.Kubectl("patch", "catalogsource", catSrcName, "-n", "olm", "--type=json", "-p", + `[{"op": "replace", "path": "/spec/image", "value": "quay.io/operatorhubio/fakecatalog:latest"}]`) + + By("Checking the conditions and relatedObj in the policy") + check( + OpPlcName, + true, + []policyv1.RelatedObject{{ + Object: policyv1.ObjectResource{ + Kind: "CatalogSource", + APIVersion: "operators.coreos.com/v1alpha1", + Metadata: policyv1.ObjectMetadata{ + Name: catSrcName, + Namespace: opPolTestNS, + }, + }, + Compliant: "NonCompliant", + Reason: "Resource found as expected but is unhealthy", + }}, + metav1.Condition{ + Type: "CatalogSourcesUnhealthy", + Status: metav1.ConditionTrue, + Reason: "CatalogSourcesFoundUnhealthy", + Message: "CatalogSource was found but is unhealthy", + }, + "CatalogSource was found but is unhealthy", + ) + }) + }) }) diff --git a/test/resources/case38_operator_install/case38_OpPlc.yaml b/test/resources/case38_operator_install/case38_OpPlc.yaml deleted file mode 100644 index 4a643b12..00000000 --- a/test/resources/case38_operator_install/case38_OpPlc.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: policy.open-cluster-management.io/v1beta1 -kind: OperatorPolicy -metadata: - name: test-operator-policy -spec: - remediationAction: enforce # or inform - severity: medium - complianceType: musthave # or mustnothave - operatorGroup: # optional - name: og-single - namespace: op-1 - target: - namespaces: - - op-1 - subscription: - channel: stable-3.8 - name: project-quay - namespace: op-1 - installPlanApproval: Automatic # may be overridden to Manual by other settings - source: operatorhubio-catalog - sourceNamespace: olm - startingCSV: quay-operator.v3.8.1 - removalBehavior: - operatorGroups: DeleteIfUnused - subscriptions: Delete - clusterServiceVersions: Delete - installPlans: Keep - customResourceDefinitions: Keep - apiServiceDefinitions: Keep - statusConfig: - catalogSourceUnhealthy: StatusMessageOnly - deploymentsUnavailable: NonCompliant - upgradesAvailable: StatusMessageOnly - upgradesProgressing: NonCompliant \ No newline at end of file diff --git a/test/resources/case38_operator_install/case38_OpPlc_default_og.yaml b/test/resources/case38_operator_install/case38_OpPlc_default_og.yaml deleted file mode 100644 index cd57496d..00000000 --- a/test/resources/case38_operator_install/case38_OpPlc_default_og.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: policy.open-cluster-management.io/v1beta1 -kind: OperatorPolicy -metadata: - name: test-operator-policy-no-og -spec: - remediationAction: enforce # or inform - severity: medium - complianceType: musthave # or mustnothave - subscription: - channel: stable-3.8 - name: project-quay - namespace: op-2 - installPlanApproval: Automatic # may be overridden to Manual by other settings - source: operatorhubio-catalog - sourceNamespace: olm - startingCSV: quay-operator.v3.8.1 - removalBehavior: - operatorGroups: DeleteIfUnused - subscriptions: Delete - clusterServiceVersions: Delete - installPlans: Keep - customResourceDefinitions: Keep - apiServiceDefinitions: Keep - statusConfig: - catalogSourceUnhealthy: StatusMessageOnly - deploymentsUnavailable: NonCompliant - upgradesAvailable: StatusMessageOnly - upgradesProgressing: NonCompliant \ No newline at end of file diff --git a/test/resources/operatorpolicy.yaml b/test/resources/operatorpolicy.yaml deleted file mode 100644 index eb6209ce..00000000 --- a/test/resources/operatorpolicy.yaml +++ /dev/null @@ -1,34 +0,0 @@ -apiVersion: policy.open-cluster-management.io/v1beta1 -kind: OperatorPolicy -metadata: - name: test-operator-policy -spec: - remediationAction: enforce # or inform - severity: medium - complianceType: musthave # or mustnothave - operatorGroup: # optional - name: og-single - namespace: default - target: - namespaces: - - default - subscription: - channel: stable-3.8 - name: project-quay - namespace: default - installPlanApproval: Automatic # may be overridden to Manual by other settings - source: operatorhubio-catalog - sourceNamespace: olm - startingCSV: quay-operator.v3.8.1 - removalBehavior: - operatorGroups: DeleteIfUnused - subscriptions: Delete - clusterServiceVersions: Delete - installPlans: Keep - customResourceDefinitions: Keep - apiServiceDefinitions: Keep - statusConfig: - catalogSourceUnhealthy: StatusMessageOnly - deploymentsUnavailable: NonCompliant - upgradesAvailable: StatusMessageOnly - upgradesProgressing: NonCompliant