diff --git a/.github/workflows/kind.yml b/.github/workflows/kind.yml index ee0e33f5..441dfb5d 100644 --- a/.github/workflows/kind.yml +++ b/.github/workflows/kind.yml @@ -47,11 +47,12 @@ jobs: run: | go mod verify - - name: Verify format + - name: Some quality checks run: | make fmt git diff --exit-code make lint + make gosec-scan - name: Verify deploy/operator.yaml run: | diff --git a/Makefile b/Makefile index d9a02fac..433c8264 100644 --- a/Makefile +++ b/Makefile @@ -177,7 +177,7 @@ gosec: .PHONY: gosec-scan gosec-scan: gosec - $(GOSEC) -fmt sonarqube -out gosec.json -no-fail -exclude-dir=.go ./... + $(GOSEC) -fmt sonarqube -out gosec.json -stdout -exclude-dir=.go -exclude-dir=test ./... ############################################################ # build section @@ -337,6 +337,15 @@ e2e-stop-instrumented: e2e-debug: @echo local controller log: -cat build/_output/controller.log + @echo pods on hub cluster + -kubectl get pods -A --kubeconfig=$(HUB_CONFIG) + -kubectl get pods -A -o yaml --kubeconfig=$(HUB_CONFIG) + @echo pods on managed cluster + -kubectl get pods -A --kubeconfig=$(MANAGED_CONFIG) + -kubectl get pods -A -o yaml --kubeconfig=$(MANAGED_CONFIG) + @echo gatekeeper logs on managed cluster + -kubectl logs -n gatekeeper-system -l control-plane=audit-controller --prefix=true --since=5m --kubeconfig=$(MANAGED_CONFIG) + -kubectl logs -n gatekeeper-system -l control-plane=controller-manager --prefix=true --since=5m --kubeconfig=$(MANAGED_CONFIG) @echo remote controller log: -kubectl logs $$(kubectl get pods -n $(KIND_NAMESPACE) -o name --kubeconfig=$(MANAGED_CONFIG) | grep $(IMG)) -n $(KIND_NAMESPACE) --kubeconfig=$(MANAGED_CONFIG) diff --git a/controllers/gatekeepersync/gatekeeper_constraint_sync.go b/controllers/gatekeepersync/gatekeeper_constraint_sync.go index e94751a2..6d6ee2f6 100644 --- a/controllers/gatekeepersync/gatekeeper_constraint_sync.go +++ b/controllers/gatekeepersync/gatekeeper_constraint_sync.go @@ -396,7 +396,9 @@ func (r *GatekeeperConstraintReconciler) sendComplianceEvent( return nil } - err := r.SendEvent(ctx, constraint, owner, msg, compliance) + reason := utils.EventReason(constraint.GetNamespace(), constraint.GetName()) + + err := r.SendEvent(ctx, constraint, owner, reason, msg, compliance) if err != nil { return err } diff --git a/controllers/templatesync/template_sync.go b/controllers/templatesync/template_sync.go index 88850a0a..bdb1c25b 100644 --- a/controllers/templatesync/template_sync.go +++ b/controllers/templatesync/template_sync.go @@ -80,6 +80,8 @@ type PolicyReconciler struct { Config *rest.Config Recorder record.EventRecorder ClusterNamespace string + Clientset *kubernetes.Clientset + InstanceName string DisableGkSync bool createdGkConstraint *bool } @@ -135,8 +137,7 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ var dClient dynamic.Interface if len(instance.Spec.PolicyTemplates) > 0 { - clientset := kubernetes.NewForConfigOrDie(r.Config) - discoveryClient = clientset.Discovery() + discoveryClient = r.Clientset.Discovery() // initialize dynamic client dClient, err = dynamic.NewForConfig(r.Config) @@ -262,7 +263,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ resultError = err errMsg := fmt.Sprintf("Failed to decode policy template with err: %s", err) - r.emitTemplateError(instance, tIndex, fmt.Sprintf("[template %v]", tIndex), false, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, fmt.Sprintf("[template %v]", tIndex), false, errMsg) + reqLogger.Error(resultError, "Failed to decode the policy template", "templateIndex", tIndex) policyUserErrorsCounter.WithLabelValues(instance.Name, "", "format-error").Inc() @@ -310,7 +312,9 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ resultError = fmt.Errorf("dependency on %s has conflicting compliance states", dep.Name) errMsg := fmt.Sprintf("Failed to decode policy template with err: %s", resultError) - r.emitTemplateError(instance, tIndex, fmt.Sprintf("[template %v]", tIndex), isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, + fmt.Sprintf("[template %v]", tIndex), isClusterScoped, errMsg) + reqLogger.Error(resultError, "Failed to decode the policy template", "templateIndex", tIndex) depConflictErr = true @@ -338,7 +342,9 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ errMsg := fmt.Sprintf("Failed to get name from policy template at index %v", tIndex) resultError = k8serrors.NewBadRequest(errMsg) - r.emitTemplateError(instance, tIndex, fmt.Sprintf("[template %v]", tIndex), isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, + fmt.Sprintf("[template %v]", tIndex), isClusterScoped, errMsg) + reqLogger.Error(resultError, "Failed to process the policy template", "templateIndex", tIndex) policyUserErrorsCounter.WithLabelValues(instance.Name, "", "format-error").Inc() @@ -367,7 +373,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ errMsg += fmt.Sprintf(": %s", err) - r.emitTemplateError(instance, tIndex, tName, isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, tName, isClusterScoped, errMsg) + tLogger.Error(err, "Could not find an API mapping for the object definition", "group", gvk.Group, "version", gvk.Version, @@ -415,7 +422,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ resultError = err - r.emitTemplateError(instance, tIndex, tName, isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, tName, isClusterScoped, errMsg) + tLogger.Error(err, "Unsupported policy-template kind found in object definition", "group", gvk.Group, "version", gvk.Version, @@ -435,7 +443,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ errMsg := fmt.Sprintf("Templates are not supported for kind : %s", gvk.Kind) resultError = k8serrors.NewBadRequest(errMsg) - r.emitTemplateError(instance, tIndex, tName, isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, tName, isClusterScoped, errMsg) + tLogger.Error(resultError, "Failed to process the policy template") policyUserErrorsCounter.WithLabelValues(instance.Name, tName, "format-error").Inc() @@ -464,7 +473,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ resultError = err errMsg := fmt.Sprintf("Failed to unmarshal the policy template: %s", err) - r.emitTemplateError(instance, tIndex, tName, isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, tName, isClusterScoped, errMsg) + tLogger.Error(resultError, "Failed to unmarshal the policy template") policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "unmarshal-error").Inc() @@ -486,7 +496,14 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ if err != nil { if len(dependencyFailures) > 0 { // template must be pending, do not create it - r.emitTemplatePending(instance, tIndex, tName, isClusterScoped, generatePendingMsg(dependencyFailures)) + emitErr := r.emitTemplatePending(ctx, instance, tIndex, tName, isClusterScoped, + generatePendingMsg(dependencyFailures)) + if emitErr != nil { + resultError = emitErr + + continue + } + tLogger.Info("Dependencies were not satisfied for the policy template", "namespace", instance.GetNamespace(), "kind", gvk.Kind, @@ -530,7 +547,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ resultError = err errMsg := fmt.Sprintf("Failed to create policy template: %s", err) - r.emitTemplateError(instance, tIndex, tName, isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, tName, isClusterScoped, errMsg) + tLogger.Error(resultError, "Failed to create policy template") policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "create-error").Inc() @@ -555,7 +573,11 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ if isGkConstraintTemplate { tLogger.Info("Emitting status event for " + gvk.Kind) msg := fmt.Sprintf("%s %s was created successfully", gvk.Kind, tName) - r.emitTemplateSuccess(instance, tIndex, tName, isClusterScoped, msg) + + emitErr := r.emitTemplateSuccess(ctx, instance, tIndex, tName, isClusterScoped, msg) + if emitErr != nil { + resultError = emitErr + } } } @@ -574,7 +596,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ resultError = err errMsg := fmt.Sprintf("Failed to get the object in the policy template: %s", err) - r.emitTemplateError(instance, tIndex, tName, isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, tName, isClusterScoped, errMsg) + tLogger.Error(err, "Failed to get the object in the policy template", "namespace", instance.GetNamespace(), "kind", gvk.Kind, @@ -588,12 +611,17 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ if len(dependencyFailures) > 0 { // template must be pending, need to delete it and error - r.emitTemplatePending(instance, tIndex, tName, isClusterScoped, generatePendingMsg(dependencyFailures)) tLogger.Info("Dependencies were not satisfied for the policy template", "namespace", instance.GetNamespace(), "kind", gvk.Kind, ) + emitErr := r.emitTemplatePending(ctx, instance, tIndex, tName, + isClusterScoped, generatePendingMsg(dependencyFailures)) + if emitErr != nil { + resultError = err + } + err = res.Delete(ctx, tName, metav1.DeleteOptions{}) if err != nil { tLogger.Error(err, "Failed to delete a template that entered pending state", @@ -665,7 +693,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ resultError = k8serrors.NewBadRequest(errMsg) - r.emitTemplateError(instance, tIndex, tName, isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, tName, isClusterScoped, errMsg) + tLogger.Error(resultError, "Failed to create the policy template") policyUserErrorsCounter.WithLabelValues(instance.Name, tName, "format-error").Inc() @@ -711,7 +740,8 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ resultError = err errMsg := fmt.Sprintf("Failed to update policy template %s: %s", tName, err) - r.emitTemplateError(instance, tIndex, tName, isClusterScoped, errMsg) + _ = r.emitTemplateError(ctx, instance, tIndex, tName, isClusterScoped, errMsg) + tLogger.Error(err, "Failed to update the policy template") policySystemErrorsCounter.WithLabelValues(instance.Name, tName, "patch-error").Inc() @@ -734,7 +764,11 @@ func (r *PolicyReconciler) Reconcile(ctx context.Context, request reconcile.Requ if isGkConstraintTemplate { tLogger.Info("Emitting status event for " + gvk.Kind) msg := fmt.Sprintf("%s %s was updated successfully", gvk.Kind, tName) - r.emitTemplateSuccess(instance, tIndex, tName, isClusterScoped, msg) + + emitErr := r.emitTemplateSuccess(ctx, instance, tIndex, tName, isClusterScoped, msg) + if emitErr != nil { + resultError = emitErr + } } } @@ -1186,26 +1220,39 @@ func overrideRemediationAction(instance *policiesv1.Policy, tObjectUnstructured // policy framework. If the policy's status already reflects the current message, then no actions // are taken. func (r *PolicyReconciler) emitTemplateSuccess( - pol *policiesv1.Policy, tIndex int, tName string, clusterScoped bool, msg string, -) { - r.emitTemplateEvent(pol, tIndex, tName, clusterScoped, "Normal", "Compliant; ", msg) + ctx context.Context, pol *policiesv1.Policy, tIndex int, tName string, clusterScoped bool, msg string, +) error { + err := r.emitTemplateEvent(ctx, pol, tIndex, tName, clusterScoped, "Normal", "Compliant; ", msg) + if err != nil { + tlog := log.WithValues("Policy.Namespace", pol.Namespace, "Policy.Name", pol.Name, "template", tName) + tlog.Error(err, "Failed to emit template success event") + } + + return err } // emitTemplateError performs actions that ensure correct reporting of template errors in the // policy framework. If the policy's status already reflects the current error, then no actions // are taken. func (r *PolicyReconciler) emitTemplateError( - pol *policiesv1.Policy, tIndex int, tName string, clusterScoped bool, errMsg string, -) { - r.emitTemplateEvent(pol, tIndex, tName, clusterScoped, "Warning", "NonCompliant; template-error; ", errMsg) + ctx context.Context, pol *policiesv1.Policy, tIndex int, tName string, clusterScoped bool, errMsg string, +) error { + err := r.emitTemplateEvent(ctx, pol, tIndex, tName, clusterScoped, + "Warning", "NonCompliant; template-error; ", errMsg) + if err != nil { + tlog := log.WithValues("Policy.Namespace", pol.Namespace, "Policy.Name", pol.Name, "template", tName) + tlog.Error(err, "Failed to emit template error event") + } + + return err } // emitTemplatePending performs actions that ensure correct reporting of pending dependencies in the // policy framework. If the policy's status already reflects the current status, then no actions // are taken. func (r *PolicyReconciler) emitTemplatePending( - pol *policiesv1.Policy, tIndex int, tName string, clusterScoped bool, msg string, -) { + ctx context.Context, pol *policiesv1.Policy, tIndex int, tName string, clusterScoped bool, msg string, +) error { msgMeta := "Pending; " eventType := "Warning" @@ -1215,33 +1262,62 @@ func (r *PolicyReconciler) emitTemplatePending( eventType = "Normal" } - r.emitTemplateEvent(pol, tIndex, tName, clusterScoped, eventType, msgMeta, msg) + err := r.emitTemplateEvent(ctx, pol, tIndex, tName, clusterScoped, eventType, msgMeta, msg) + if err != nil { + tlog := log.WithValues("Policy.Namespace", pol.Namespace, "Policy.Name", pol.Name, "template", tName) + tlog.Error(err, "Failed to emit template pending event") + } + + return err } // emitTemplateEvent performs actions that ensure correct reporting of template sync events. If the // policy's status already reflects the current status, then no actions are taken. The msgMeta and // msg are concatenated without spaces, so any spacing should be included inside the msgMeta string. func (r *PolicyReconciler) emitTemplateEvent( - pol *policiesv1.Policy, tIndex int, tName string, clusterScoped bool, + ctx context.Context, pol *policiesv1.Policy, tIndex int, tName string, clusterScoped bool, eventType string, msgMeta string, msg string, -) { +) error { // check if the error is already present in the policy status - if so, return early if strings.Contains(getLatestStatusMessage(pol, tIndex), msgMeta+msg) { - return + return nil } - // emit the non-compliance event + // emit an informational event + r.Recorder.Event(pol, eventType, "PolicyTemplateSync", msg) + + // emit the compliance event var policyComplianceReason string if clusterScoped { - policyComplianceReason = fmt.Sprintf(utils.PolicyClusterScopedFmtStr, tName) + policyComplianceReason = utils.EventReason("", tName) } else { - policyComplianceReason = fmt.Sprintf(utils.PolicyFmtStr, pol.GetNamespace(), tName) + policyComplianceReason = utils.EventReason(pol.GetNamespace(), tName) } - r.Recorder.Event(pol, eventType, policyComplianceReason, msgMeta+msg) + sender := utils.ComplianceEventSender{ + ClusterNamespace: pol.Namespace, + InstanceName: r.InstanceName, + ClientSet: r.Clientset, + ControllerName: ControllerName, + } - // emit an informational event - r.Recorder.Event(pol, eventType, "PolicyTemplateSync", msg) + ownerref := metav1.OwnerReference{ + APIVersion: pol.APIVersion, + Kind: pol.Kind, + Name: pol.Name, + UID: pol.UID, + } + + var compState policiesv1.ComplianceState + if strings.HasPrefix(msgMeta, "Compliant") { + compState = policiesv1.Compliant + } else if strings.HasPrefix(msgMeta, "NonCompliant") { + compState = policiesv1.NonCompliant + } else if strings.HasPrefix(msgMeta, "Pending") { + compState = policiesv1.Pending + } + + return sender.SendEvent(ctx, nil, ownerref, policyComplianceReason, msgMeta+msg, compState) } // handleSyncSuccess performs common actions that should be run whenever a template is in sync, diff --git a/controllers/utils/events.go b/controllers/utils/events.go index a55c421e..d9a31c32 100644 --- a/controllers/utils/events.go +++ b/controllers/utils/events.go @@ -28,6 +28,7 @@ func (c *ComplianceEventSender) SendEvent( ctx context.Context, instance client.Object, owner metav1.OwnerReference, + reason string, msg string, compliance policyv1.ComplianceState, ) error { @@ -37,15 +38,6 @@ func (c *ComplianceEventSender) SendEvent( } now := time.Now() - var reason string - - if instance.GetNamespace() == "" { - reason = "policy: " + instance.GetName() - } else { - reason = fmt.Sprintf("policy: %s/%s", instance.GetNamespace(), instance.GetName()) - } - - gvk := instance.GetObjectKind().GroupVersionKind() event := &corev1.Event{ ObjectMeta: metav1.ObjectMeta{ @@ -60,31 +52,36 @@ func (c *ComplianceEventSender) SendEvent( UID: owner.UID, APIVersion: owner.APIVersion, }, - Reason: reason, Message: msg, Source: corev1.EventSource{ Component: c.ControllerName, Host: c.InstanceName, }, - FirstTimestamp: metav1.NewTime(now), - LastTimestamp: metav1.NewTime(now), - Count: 1, - Type: "Normal", - EventTime: metav1.NewMicroTime(now), - Action: "ComplianceStateUpdate", - Related: &corev1.ObjectReference{ + FirstTimestamp: metav1.NewTime(now), + LastTimestamp: metav1.NewTime(now), + Count: 1, + EventTime: metav1.NewMicroTime(now), + Action: "ComplianceStateUpdate", + ReportingController: c.ControllerName, + ReportingInstance: c.InstanceName, + } + + if instance != nil { + gvk := instance.GetObjectKind().GroupVersionKind() + + event.Related = &corev1.ObjectReference{ Kind: gvk.Kind, Name: instance.GetName(), Namespace: instance.GetNamespace(), UID: instance.GetUID(), APIVersion: gvk.GroupVersion().String(), - }, - ReportingController: c.ControllerName, - ReportingInstance: c.InstanceName, + } } - if compliance == policyv1.NonCompliant { + if compliance == policyv1.Compliant { + event.Type = "Normal" + } else { event.Type = "Warning" } @@ -93,3 +90,11 @@ func (c *ComplianceEventSender) SendEvent( return err } + +func EventReason(ns, name string) string { + if ns == "" { + return fmt.Sprintf(PolicyClusterScopedFmtStr, name) + } + + return fmt.Sprintf(PolicyFmtStr, ns, name) +} diff --git a/main.go b/main.go index 27142d77..cfedc4e7 100644 --- a/main.go +++ b/main.go @@ -451,6 +451,8 @@ func getManager( os.Exit(1) } + instanceName, _ := os.Hostname() // on an error, instanceName will be empty, which is ok + templateReconciler := &templatesync.PolicyReconciler{ Client: mgr.GetClient(), DynamicWatcher: watcher, @@ -458,6 +460,8 @@ func getManager( Config: mgr.GetConfig(), Recorder: mgr.GetEventRecorderFor(templatesync.ControllerName), ClusterNamespace: tool.Options.ClusterNamespace, + Clientset: kubernetes.NewForConfigOrDie(mgr.GetConfig()), + InstanceName: instanceName, DisableGkSync: tool.Options.DisableGkSync, } diff --git a/test/e2e/case12_ordering_test.go b/test/e2e/case12_ordering_test.go index 8a4177a2..981c80d1 100644 --- a/test/e2e/case12_ordering_test.go +++ b/test/e2e/case12_ordering_test.go @@ -13,6 +13,8 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" policiesv1 "open-cluster-management.io/governance-policy-propagator/api/v1" "open-cluster-management.io/governance-policy-propagator/test/utils" + + fwutils "open-cluster-management.io/governance-policy-framework-addon/controllers/utils" ) // Helper function to create events @@ -47,6 +49,7 @@ func generateEventOnPolicy(plcName string, cfgPlcName string, msg string, compli Name: managedPlc.GetName(), UID: managedPlc.GetUID(), }, + fwutils.EventReason(clusterNamespace, cfgPlcName), msg, policiesv1.ComplianceState(complianceState), ) diff --git a/test/e2e/case17_gatekeeper_sync_test.go b/test/e2e/case17_gatekeeper_sync_test.go index fdb52d0a..cefe9328 100644 --- a/test/e2e/case17_gatekeeper_sync_test.go +++ b/test/e2e/case17_gatekeeper_sync_test.go @@ -297,9 +297,9 @@ var _ = Describe("Test Gatekeeper ConstraintTemplate and constraint sync", Order fmt.Sprintf("Got %s but expected one of %v", history[0].Message, validMsgs), ) - // Verify that there are no duplicate status messages. + // Verify that there are no duplicate gatekeeper status messages. for i, historyEvent := range managedPolicy.Status.Details[1].History { - if i == 0 || strings.HasPrefix(historyEvent.Message, "NonCompliant; template-error;") { + if i == 0 || strings.Contains(historyEvent.Message, "NonCompliant; template-error;") { continue }