From ceb7b63920fbf3a6a3cfa665580038c601c04c3e Mon Sep 17 00:00:00 2001 From: Jeffrey Luo Date: Mon, 5 Feb 2024 04:23:51 -0500 Subject: [PATCH] Implement OperatorPolicy health checks for Deployments Following the 'handle*' function pattern that is used by the subscription and operatorgroup checks, similar functionality was added to handle Deployments. Ref: https://issues.redhat.com/browse/ACM-6598 Signed-off-by: Jeffrey Luo --- controllers/operatorpolicy_controller.go | 80 ++++++++++++++- controllers/operatorpolicy_status.go | 118 +++++++++++++++++++---- test/e2e/case38_install_operator_test.go | 24 ++++- 3 files changed, 202 insertions(+), 20 deletions(-) diff --git a/controllers/operatorpolicy_controller.go b/controllers/operatorpolicy_controller.go index ae55925c..bc56ef3b 100644 --- a/controllers/operatorpolicy_controller.go +++ b/controllers/operatorpolicy_controller.go @@ -13,6 +13,7 @@ import ( operatorv1 "github.com/operator-framework/api/pkg/operators/v1" operatorv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" depclient "github.com/stolostron/kubernetes-dependency-watches/client" + appsv1 "k8s.io/api/apps/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -28,6 +29,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" "sigs.k8s.io/controller-runtime/pkg/source" + v1 "open-cluster-management.io/config-policy-controller/api/v1" policyv1beta1 "open-cluster-management.io/config-policy-controller/api/v1beta1" ) @@ -48,6 +50,10 @@ var ( Group: "operators.coreos.com", Version: "v1alpha1", Kind: "ClusterServiceVersion", } + deploymentGVK = schema.GroupVersionKind{ + Group: "apps", + Version: "v1", Kind: "Deployment", + } ) // OperatorPolicyReconciler reconciles a OperatorPolicy object @@ -142,16 +148,88 @@ func (r *OperatorPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Reque return reconcile.Result{}, err } - _, err = r.handleCSV(ctx, policy, subscription) + csv, err := r.handleCSV(ctx, policy, subscription) if err != nil { OpLog.Error(err, "Error handling CSVs") return reconcile.Result{}, err } + if err := r.handleDeployment(ctx, policy, csv); err != nil { + OpLog.Error(err, "Error handling Deployments") + + return reconcile.Result{}, err + } + return reconcile.Result{}, nil } +func (r *OperatorPolicyReconciler) handleDeployment( + ctx context.Context, + policy *policyv1beta1.OperatorPolicy, + csv *operatorv1alpha1.ClusterServiceVersion, +) error { + // case where csv is nil + if csv == nil { + return nil + } + + watcher := opPolIdentifier(policy.Namespace, policy.Name) + + var deployments []appsv1.Deployment + + for _, dep := range csv.Spec.InstallStrategy.StrategySpec.DeploymentSpecs { + foundDep, err := r.DynamicWatcher.Get(watcher, deploymentGVK, csv.Namespace, dep.Name) + if err != nil { + return fmt.Errorf("error getting the Deployment: %w", err) + } + + // report missing deployment + if foundDep == nil { + err := r.updateStatus(ctx, policy, missingWantedCond("Deployment"), + missingDeploymentObj(dep.Name, csv.Namespace)) + if err != nil { + return fmt.Errorf("error updating the status for a missing Deployment: %w", err) + } + + continue + } + + unstructured := foundDep.UnstructuredContent() + var dep appsv1.Deployment + err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructured, &dep) + + if err != nil { + return err + } + + deployments = append(deployments, dep) + } + + unavailable := false + var unavailableDeployments []appsv1.Deployment + var relatedObjects []v1.RelatedObject + + // check for unavailable deployments and build relatedobjects list + for _, dep := range deployments { + if dep.Status.UnavailableReplicas > 0 { + unavailable = true + + unavailableDeployments = append(unavailableDeployments, dep) + } + + relatedObjects = append(relatedObjects, existingDeploymentObj(&dep)) + } + + err := r.updateStatus(ctx, policy, + buildDeploymentCond(unavailable, unavailableDeployments), relatedObjects...) + if err != nil { + return fmt.Errorf("error updating the status for Deployments: %w", err) + } + + return nil +} + func (r *OperatorPolicyReconciler) handleOpGroup(ctx context.Context, policy *policyv1beta1.OperatorPolicy) error { watcher := opPolIdentifier(policy.Namespace, policy.Name) diff --git a/controllers/operatorpolicy_status.go b/controllers/operatorpolicy_status.go index 309ca8ea..0338d979 100644 --- a/controllers/operatorpolicy_status.go +++ b/controllers/operatorpolicy_status.go @@ -8,6 +8,7 @@ import ( "time" operatorv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -212,6 +213,18 @@ func calculateComplianceCondition(policy *policyv1beta1.OperatorPolicy) metav1.C } } + idx, cond = policy.Status.GetCondition(deploymentConditionType) + if idx == -1 { + messages = append(messages, "the status of the Deployments are unknown") + foundNonCompliant = true + } else { + messages = append(messages, cond.Message) + + if cond.Status != metav1.ConditionTrue { + foundNonCompliant = true + } + } + // FUTURE: check additional conditions if foundNonCompliant { @@ -287,10 +300,11 @@ func (r *OperatorPolicyReconciler) emitComplianceEvent( } const ( - compliantConditionType = "Compliant" - opGroupConditionType = "OperatorGroupCompliant" - subConditionType = "SubscriptionCompliant" - csvConditionType = "CSVCompliant" + compliantConditionType = "Compliant" + opGroupConditionType = "OperatorGroupCompliant" + subConditionType = "SubscriptionCompliant" + csvConditionType = "CSVCompliant" + deploymentConditionType = "DeploymentCompliant" ) func condType(kind string) string { @@ -301,6 +315,8 @@ func condType(kind string) string { return subConditionType case "ClusterServiceVersion": return csvConditionType + case "Deployment": + return deploymentConditionType default: panic("Unknown condition type for kind " + kind) } @@ -372,6 +388,23 @@ func updatedCond(kind string) metav1.Condition { } } +var opGroupPreexistingCond = metav1.Condition{ + Type: opGroupConditionType, + Status: metav1.ConditionTrue, + Reason: "PreexistingOperatorGroupFound", + Message: "the policy does not specify an OperatorGroup but one already exists in the namespace - " + + "assuming that OperatorGroup is correct", +} + +// opGroupTooManyCond is a NonCompliant condition with a Reason like 'TooManyOperatorGroups', +// and a Message like 'there is more than one OperatorGroup in the namespace' +var opGroupTooManyCond = metav1.Condition{ + Type: opGroupConditionType, + Status: metav1.ConditionFalse, + Reason: "TooManyOperatorGroups", + Message: "there is more than one OperatorGroup in the namespace", +} + // buildCSVCond takes a csv and returns a shortened version of its most recent Condition func buildCSVCond(csv *operatorv1alpha1.ClusterServiceVersion) metav1.Condition { status := metav1.ConditionFalse @@ -387,21 +420,29 @@ func buildCSVCond(csv *operatorv1alpha1.ClusterServiceVersion) metav1.Condition } } -var opGroupPreexistingCond = metav1.Condition{ - Type: opGroupConditionType, - Status: metav1.ConditionTrue, - Reason: "PreexistingOperatorGroupFound", - Message: "the policy does not specify an OperatorGroup but one already exists in the namespace - " + - "assuming that OperatorGroup is correct", -} +func buildDeploymentCond(unavailable bool, unavailableDeps []appsv1.Deployment) metav1.Condition { + status := metav1.ConditionTrue + reason := "DeploymentsAvailable" + message := "All operator Deployments have their minimum availability" -// opGroupTooManyCond is a NonCompliant condition with a Reason like 'TooManyOperatorGroups', -// and a Message like 'there is more than one OperatorGroup in the namespace' -var opGroupTooManyCond = metav1.Condition{ - Type: opGroupConditionType, - Status: metav1.ConditionFalse, - Reason: "TooManyOperatorGroups", - Message: "there is more than one OperatorGroup in the namespace", + if unavailable { + status = metav1.ConditionFalse + reason = "DeploymentsUnavailable" + + message = "" + for _, dep := range unavailableDeps { + message = message + dep.Name + ", " + } + + message += "do not have their minimum availability" + } + + return metav1.Condition{ + Type: condType(deploymentGVK.Kind), + Status: status, + Reason: reason, + Message: message, + } } // missingWantedObj returns a NonCompliant RelatedObject with reason = 'Resource not found but should exist' @@ -520,3 +561,44 @@ func existingCSVObj(csv *operatorv1alpha1.ClusterServiceVersion) policyv1.Relate }, } } + +func missingDeploymentObj(name string, namespace string) policyv1.RelatedObject { + return policyv1.RelatedObject{ + Object: policyv1.ObjectResource{ + Kind: deploymentGVK.Kind, + APIVersion: deploymentGVK.GroupVersion().String(), + Metadata: policyv1.ObjectMetadata{ + Name: name, + Namespace: namespace, + }, + }, + Compliant: string(policyv1.NonCompliant), + Reason: reasonWantFoundDNE, + } +} + +func existingDeploymentObj(dep *appsv1.Deployment) policyv1.RelatedObject { + compliance := policyv1.NonCompliant + reason := "Deployment Unavailable" + + if dep.Status.UnavailableReplicas == 0 { + compliance = policyv1.Compliant + reason = "Deployment Available" + } + + return policyv1.RelatedObject{ + Object: policyv1.ObjectResource{ + Kind: deploymentGVK.Kind, + APIVersion: deploymentGVK.GroupVersion().String(), + Metadata: policyv1.ObjectMetadata{ + Name: dep.Name, + Namespace: dep.Namespace, + }, + }, + Compliant: string(compliance), + Reason: reason, + Properties: &policyv1.ObjectProperties{ + UID: string(dep.GetUID()), + }, + } +} diff --git a/test/e2e/case38_install_operator_test.go b/test/e2e/case38_install_operator_test.go index 0ef4fed9..f6bfbbc5 100644 --- a/test/e2e/case38_install_operator_test.go +++ b/test/e2e/case38_install_operator_test.go @@ -551,7 +551,7 @@ var _ = Describe("Test installing an operator from OperatorPolicy", Ordered, fun }) }) - Describe("Test health checks on CSV after installing an operator with OperatorPolicy", Ordered, func() { + Describe("Test health checks on OLM resources after OperatorPolicy operator installation", Ordered, func() { const ( opPolYAML = "../resources/case38_operator_install/operator-policy-no-group-enforce.yaml" opPolName = "oppol-no-group-enforce" @@ -587,5 +587,27 @@ var _ = Describe("Test installing an operator from OperatorPolicy", Ordered, fun "ClusterServiceVersion - install strategy completed with no errors", ) }) + + It("Should generate conditions and relatedobjects of Deployments", func() { + check( + opPolName, + false, + []policyv1.RelatedObject{{ + Object: policyv1.ObjectResource{ + Kind: "Deployment", + APIVersion: "apps/v1", + }, + Compliant: "Compliant", + Reason: "Deployment Available", + }}, + metav1.Condition{ + Type: "DeploymentCompliant", + Status: metav1.ConditionTrue, + Reason: "DeploymentsAvailable", + Message: "All operator Deployments have their minimum availability", + }, + "All operator Deployments have their minimum availability", + ) + }) }) })