Skip to content

Commit

Permalink
Implement OperatorPolicy health checks for Deployments
Browse files Browse the repository at this point in the history
Following the 'handle*' function pattern that is
used by the subscription and operatorgroup checks,
similar functionality was added to handle Deployments.

Ref: https://issues.redhat.com/browse/ACM-6598

Signed-off-by: Jeffrey Luo <jeluo@redhat.com>
  • Loading branch information
JeffeyL committed Feb 5, 2024
1 parent 666fc41 commit badaf01
Show file tree
Hide file tree
Showing 3 changed files with 202 additions and 20 deletions.
80 changes: 79 additions & 1 deletion controllers/operatorpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
operatorv1 "github.com/operator-framework/api/pkg/operators/v1"
operatorv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
depclient "github.com/stolostron/kubernetes-dependency-watches/client"
appsv1 "k8s.io/api/apps/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
Expand All @@ -28,6 +29,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/source"

v1 "open-cluster-management.io/config-policy-controller/api/v1"
policyv1beta1 "open-cluster-management.io/config-policy-controller/api/v1beta1"
)

Expand All @@ -48,6 +50,10 @@ var (
Group: "operators.coreos.com",
Version: "v1alpha1", Kind: "ClusterServiceVersion",
}
deploymentGVK = schema.GroupVersionKind{
Group: "apps",
Version: "v1", Kind: "Deployment",
}
)

// OperatorPolicyReconciler reconciles a OperatorPolicy object
Expand Down Expand Up @@ -142,16 +148,88 @@ func (r *OperatorPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Reque
return reconcile.Result{}, err
}

_, err = r.handleCSV(ctx, policy, subscription)
csv, err := r.handleCSV(ctx, policy, subscription)
if err != nil {
OpLog.Error(err, "Error handling CSVs")

return reconcile.Result{}, err
}

if err := r.handleDeployment(ctx, policy, csv); err != nil {
OpLog.Error(err, "Error handling Deployments")

return reconcile.Result{}, err
}

return reconcile.Result{}, nil
}

func (r *OperatorPolicyReconciler) handleDeployment(
ctx context.Context,
policy *policyv1beta1.OperatorPolicy,
csv *operatorv1alpha1.ClusterServiceVersion,
) error {
// case where csv is nil
if csv == nil {
return nil
}

watcher := opPolIdentifier(policy.Namespace, policy.Name)

var deployments []appsv1.Deployment

for _, dep := range csv.Spec.InstallStrategy.StrategySpec.DeploymentSpecs {
foundDep, err := r.DynamicWatcher.Get(watcher, deploymentGVK, csv.Namespace, dep.Name)
if err != nil {
return fmt.Errorf("error getting the Deployment: %w", err)
}

// report missing deployment
if foundDep == nil {
err := r.updateStatus(ctx, policy, missingWantedCond("Deployment"),
missingDeploymentObj(dep.Name, csv.Namespace))
if err != nil {
return fmt.Errorf("error updating the status for a missing Deployment: %w", err)
}

continue
}

unstructured := foundDep.UnstructuredContent()
var dep appsv1.Deployment
err = runtime.DefaultUnstructuredConverter.FromUnstructured(unstructured, &dep)

if err != nil {
return err
}

deployments = append(deployments, dep)
}

unavailable := false
var unavailableDeployments []appsv1.Deployment
var relatedObjects []v1.RelatedObject

// check for unavailable deployments and build relatedobjects list
for _, dep := range deployments {
if dep.Status.UnavailableReplicas > 0 {
unavailable = true

unavailableDeployments = append(unavailableDeployments, dep)
}

relatedObjects = append(relatedObjects, existingDeploymentObj(&dep))
}

err := r.updateStatus(ctx, policy,
buildDeploymentCond(unavailable, unavailableDeployments), relatedObjects...)
if err != nil {
return fmt.Errorf("error updating the status for Deployments: %w", err)
}

return nil
}

func (r *OperatorPolicyReconciler) handleOpGroup(ctx context.Context, policy *policyv1beta1.OperatorPolicy) error {
watcher := opPolIdentifier(policy.Namespace, policy.Name)

Expand Down
118 changes: 100 additions & 18 deletions controllers/operatorpolicy_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

operatorv1alpha1 "github.com/operator-framework/api/pkg/operators/v1alpha1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
Expand Down Expand Up @@ -212,6 +213,18 @@ func calculateComplianceCondition(policy *policyv1beta1.OperatorPolicy) metav1.C
}
}

idx, cond = policy.Status.GetCondition(deploymentConditionType)
if idx == -1 {
messages = append(messages, "the status of the Deployments are unknown")
foundNonCompliant = true
} else {
messages = append(messages, cond.Message)

if cond.Status != metav1.ConditionTrue {
foundNonCompliant = true
}
}

// FUTURE: check additional conditions

if foundNonCompliant {
Expand Down Expand Up @@ -287,10 +300,11 @@ func (r *OperatorPolicyReconciler) emitComplianceEvent(
}

const (
compliantConditionType = "Compliant"
opGroupConditionType = "OperatorGroupCompliant"
subConditionType = "SubscriptionCompliant"
csvConditionType = "CSVCompliant"
compliantConditionType = "Compliant"
opGroupConditionType = "OperatorGroupCompliant"
subConditionType = "SubscriptionCompliant"
csvConditionType = "CSVCompliant"
deploymentConditionType = "DeploymentCompliant"
)

func condType(kind string) string {
Expand All @@ -301,6 +315,8 @@ func condType(kind string) string {
return subConditionType
case "ClusterServiceVersion":
return csvConditionType
case "Deployment":
return deploymentConditionType
default:
panic("Unknown condition type for kind " + kind)
}
Expand Down Expand Up @@ -372,6 +388,23 @@ func updatedCond(kind string) metav1.Condition {
}
}

var opGroupPreexistingCond = metav1.Condition{
Type: opGroupConditionType,
Status: metav1.ConditionTrue,
Reason: "PreexistingOperatorGroupFound",
Message: "the policy does not specify an OperatorGroup but one already exists in the namespace - " +
"assuming that OperatorGroup is correct",
}

// opGroupTooManyCond is a NonCompliant condition with a Reason like 'TooManyOperatorGroups',
// and a Message like 'there is more than one OperatorGroup in the namespace'
var opGroupTooManyCond = metav1.Condition{
Type: opGroupConditionType,
Status: metav1.ConditionFalse,
Reason: "TooManyOperatorGroups",
Message: "there is more than one OperatorGroup in the namespace",
}

// buildCSVCond takes a csv and returns a shortened version of its most recent Condition
func buildCSVCond(csv *operatorv1alpha1.ClusterServiceVersion) metav1.Condition {
status := metav1.ConditionFalse
Expand All @@ -387,21 +420,29 @@ func buildCSVCond(csv *operatorv1alpha1.ClusterServiceVersion) metav1.Condition
}
}

var opGroupPreexistingCond = metav1.Condition{
Type: opGroupConditionType,
Status: metav1.ConditionTrue,
Reason: "PreexistingOperatorGroupFound",
Message: "the policy does not specify an OperatorGroup but one already exists in the namespace - " +
"assuming that OperatorGroup is correct",
}
func buildDeploymentCond(unavailable bool, unavailableDeps []appsv1.Deployment) metav1.Condition {
status := metav1.ConditionTrue
reason := "DeploymentsAvailable"
message := "All operator Deployments have their minimum availability"

// opGroupTooManyCond is a NonCompliant condition with a Reason like 'TooManyOperatorGroups',
// and a Message like 'there is more than one OperatorGroup in the namespace'
var opGroupTooManyCond = metav1.Condition{
Type: opGroupConditionType,
Status: metav1.ConditionFalse,
Reason: "TooManyOperatorGroups",
Message: "there is more than one OperatorGroup in the namespace",
if unavailable {
status = metav1.ConditionFalse
reason = "DeploymentsUnavailable"

message = ""
for _, dep := range unavailableDeps {
message = message + dep.Name + ", "
}

message += "do not have their minimum availability"
}

return metav1.Condition{
Type: condType(deploymentGVK.Kind),
Status: status,
Reason: reason,
Message: message,
}
}

// missingWantedObj returns a NonCompliant RelatedObject with reason = 'Resource not found but should exist'
Expand Down Expand Up @@ -520,3 +561,44 @@ func existingCSVObj(csv *operatorv1alpha1.ClusterServiceVersion) policyv1.Relate
},
}
}

func missingDeploymentObj(name string, namespace string) policyv1.RelatedObject {
return policyv1.RelatedObject{
Object: policyv1.ObjectResource{
Kind: deploymentGVK.Kind,
APIVersion: deploymentGVK.GroupVersion().String(),
Metadata: policyv1.ObjectMetadata{
Name: name,
Namespace: namespace,
},
},
Compliant: string(policyv1.NonCompliant),
Reason: reasonWantFoundDNE,
}
}

func existingDeploymentObj(dep *appsv1.Deployment) policyv1.RelatedObject {
compliance := policyv1.NonCompliant
reason := "Deployment Unavailable"

if dep.Status.UnavailableReplicas == 0 {
compliance = policyv1.Compliant
reason = "Deployment Available"
}

return policyv1.RelatedObject{
Object: policyv1.ObjectResource{
Kind: deploymentGVK.Kind,
APIVersion: deploymentGVK.GroupVersion().String(),
Metadata: policyv1.ObjectMetadata{
Name: dep.Name,
Namespace: dep.Namespace,
},
},
Compliant: string(compliance),
Reason: reason,
Properties: &policyv1.ObjectProperties{
UID: string(dep.GetUID()),
},
}
}
24 changes: 23 additions & 1 deletion test/e2e/case38_install_operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ var _ = Describe("Test installing an operator from OperatorPolicy", Ordered, fun
})
})

Describe("Test health checks on CSV after installing an operator with OperatorPolicy", Ordered, func() {
FDescribe("Test health checks on OLM resources after OperatorPolicy operator installation", Ordered, func() {
const (
opPolYAML = "../resources/case38_operator_install/operator-policy-no-group-enforce.yaml"
opPolName = "oppol-no-group-enforce"
Expand Down Expand Up @@ -587,5 +587,27 @@ var _ = Describe("Test installing an operator from OperatorPolicy", Ordered, fun
"ClusterServiceVersion - install strategy completed with no errors",
)
})

It("Should generate conditions and relatedobjects of Deployments", func() {
check(
opPolName,
false,
[]policyv1.RelatedObject{{
Object: policyv1.ObjectResource{
Kind: "Deployment",
APIVersion: "apps/v1",
},
Compliant: "Compliant",
Reason: "Deployment Available",
}},
metav1.Condition{
Type: "DeploymentCompliant",
Status: metav1.ConditionTrue,
Reason: "DeploymentsAvailable",
Message: "All operator Deployments have their minimum availability",
},
"All operator Deployments have their minimum availability",
)
})
})
})

0 comments on commit badaf01

Please sign in to comment.