Skip to content

Commit

Permalink
Add common_related_objects
Browse files Browse the repository at this point in the history
This gauge records any related objects monitored by multiple policies.

ref: stolostron/backlog#25357

Signed-off-by: Dale Haiducek <dhaiduce@redhat.com>
  • Loading branch information
dhaiducek committed Nov 17, 2022
1 parent ff9c306 commit 98d1586
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 6 deletions.
49 changes: 47 additions & 2 deletions controllers/configurationpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ type ConfigurationPolicyReconciler struct {
// where the controller is running.
TargetK8sClient kubernetes.Interface
TargetK8sConfig *rest.Config
// Whether custom metrics collection is enabled
EnableMetrics bool
discoveryInfo
// This is used to fetch and parse OpenAPI documents to perform client-side validation of object definitions.
openAPIParser *openapi.CachedOpenAPIParser
Expand All @@ -127,6 +129,8 @@ func (r *ConfigurationPolicyReconciler) Reconcile(ctx context.Context, request c
_ = policyEvalCounter.DeleteLabelValues(request.Name)
_ = compareObjEvalCounter.DeletePartialMatch(prometheus.Labels{"config_policy_name": request.Name})
_ = compareObjSecondsCounter.DeletePartialMatch(prometheus.Labels{"config_policy_name": request.Name})
_ = policyRelatedObjectGauge.DeletePartialMatch(
prometheus.Labels{"policy": fmt.Sprintf("%s/%s", request.Namespace, request.Name)})
}

return reconcile.Result{}, nil
Expand Down Expand Up @@ -180,6 +184,9 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint
if !skipLoop {
log.Info("Processing the policies", "count", len(policiesList.Items))

// Initialize the related object map
policyRelatedObjectMap = map[string][]string{}

for i := 0; i < int(r.EvaluationConcurrency); i++ {
wg.Add(1)

Expand All @@ -200,6 +207,11 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint
close(policyQueue)
wg.Wait()

// Update the related object metric after policy processing
if r.EnableMetrics {
updateRelatedObjectMetric()
}
// Update the evaluation histogram with the elapsed time
elapsed := time.Since(start).Seconds()
evalLoopHistogram.Observe(elapsed)
// making sure that if processing is > freq we don't sleep
Expand Down Expand Up @@ -919,14 +931,14 @@ func (r *ConfigurationPolicyReconciler) handleObjectTemplates(plc policyv1.Confi
func (r *ConfigurationPolicyReconciler) checkRelatedAndUpdate(
plc policyv1.ConfigurationPolicy, related, oldRelated []policyv1.RelatedObject, sendEvent bool,
) {
sortRelatedObjectsAndUpdate(&plc, related, oldRelated)
sortRelatedObjectsAndUpdate(&plc, related, oldRelated, r.EnableMetrics)
// An update always occurs to account for the lastEvaluated status field
r.addForUpdate(&plc, sendEvent)
}

// helper function to check whether related objects has changed
func sortRelatedObjectsAndUpdate(
plc *policyv1.ConfigurationPolicy, related, oldRelated []policyv1.RelatedObject,
plc *policyv1.ConfigurationPolicy, related, oldRelated []policyv1.RelatedObject, collectMetrics bool,
) {
sort.SliceStable(related, func(i, j int) bool {
if related[i].Object.Kind != related[j].Object.Kind {
Expand All @@ -941,7 +953,26 @@ func sortRelatedObjectsAndUpdate(

update := false

// Instantiate found objects for the related object metric
found := map[string]bool{}

if collectMetrics {
for _, obj := range oldRelated {
found[getObjectString(obj)] = false
}
}

// Format policy for related object metric
policyVal := fmt.Sprintf("%s/%s", plc.Namespace, plc.Name)

for i, newEntry := range related {
var objKey string
// Collect the policy and related object for related object metric
if collectMetrics {
objKey = getObjectString(newEntry)
policyRelatedObjectMap[objKey] = append(policyRelatedObjectMap[objKey], policyVal)
}

for _, oldEntry := range oldRelated {
// Get matching objects
if gocmp.Equal(newEntry.Object, oldEntry.Object) {
Expand All @@ -952,12 +983,26 @@ func sortRelatedObjectsAndUpdate(
// Use the old properties if they existed and this is not a newly created resource
related[i].Properties = oldEntry.Properties

if collectMetrics {
found[objKey] = true
}

break
}
}
}
}

// Clean up old related object metrics if the related object list changed
if collectMetrics {
for _, obj := range oldRelated {
objString := getObjectString(obj)
if !found[objString] {
_ = policyRelatedObjectGauge.DeleteLabelValues(objString, policyVal)
}
}
}

if len(oldRelated) == len(related) {
for i, entry := range oldRelated {
if !gocmp.Equal(entry, related[i]) {
Expand Down
6 changes: 3 additions & 3 deletions controllers/configurationpolicy_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -412,14 +412,14 @@ func TestSortRelatedObjectsAndUpdate(t *testing.T) {

empty := []policyv1.RelatedObject{}

sortRelatedObjectsAndUpdate(policy, relatedList, empty)
sortRelatedObjectsAndUpdate(policy, relatedList, empty, false)
assert.True(t, relatedList[0].Object.Metadata.Name == "bar")

// append another object named bar but also with namespace bar
relatedList = append(relatedList, addRelatedObjects(true, rsrc, "ConfigurationPolicy", "bar",
true, []string{name}, "reason", nil)...)

sortRelatedObjectsAndUpdate(policy, relatedList, empty)
sortRelatedObjectsAndUpdate(policy, relatedList, empty, false)
assert.True(t, relatedList[0].Object.Metadata.Namespace == "bar")

// clear related objects and test sorting with no namespace
Expand All @@ -430,7 +430,7 @@ func TestSortRelatedObjectsAndUpdate(t *testing.T) {
relatedList = append(relatedList, addRelatedObjects(true, rsrc, "ConfigurationPolicy", "",
false, []string{name}, "reason", nil)...)

sortRelatedObjectsAndUpdate(policy, relatedList, empty)
sortRelatedObjectsAndUpdate(policy, relatedList, empty, false)
assert.True(t, relatedList[0].Object.Metadata.Name == "bar")
}

Expand Down
50 changes: 50 additions & 0 deletions controllers/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (

"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"

policyv1 "open-cluster-management.io/config-policy-controller/api/v1"
)

var (
Expand Down Expand Up @@ -47,6 +49,20 @@ var (
},
[]string{"config_policy_name", "namespace", "object"},
)
// The policyRelatedObjectMap collects a map of related objects to policies
// in order to populate the gauge:
// <kind.version/namespace/name>: []<policy-namespace/policy-name>
policyRelatedObjectMap map[string][]string
policyRelatedObjectGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "common_related_objects",
Help: "A gauge vector of related objects managed by multiple policies.",
},
[]string{
"relatedObject",
"policy",
},
)
)

func init() {
Expand All @@ -58,3 +74,37 @@ func init() {
metrics.Registry.MustRegister(compareObjEvalCounter)
metrics.Registry.MustRegister(policyRelatedObjectGauge)
}

// updateRelatedObjectMetric iterates through the collected related object map, deletes any metrics
// that aren't duplications, and sets a metric for any related object that is handled by multiple
// policies to the number of policies that currently handles it.
func updateRelatedObjectMetric() {
log.V(3).Info("Updating common_related_objects metric ...")

for relatedObj, policies := range policyRelatedObjectMap {
for _, policy := range policies {
if len(policies) == 1 {
policyRelatedObjectGauge.DeleteLabelValues(relatedObj, policy)

continue
}

gaugeInstance, err := policyRelatedObjectGauge.GetMetricWithLabelValues(relatedObj, policy)
if err != nil {
log.V(3).Error(err, "Failed to retrieve related object gauge")

continue
}

gaugeInstance.Set(float64(len(policies)))
}
}
}

// getObjectString returns a string formatted as:
// <kind>.<version>/<namespace>/<name>
func getObjectString(obj policyv1.RelatedObject) string {
return fmt.Sprintf("%s.%s/%s/%s",
obj.Object.Kind, obj.Object.APIVersion,
obj.Object.Metadata.Namespace, obj.Object.Metadata.Name)
}
4 changes: 3 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func main() {
var clusterName, hubConfigPath, targetKubeConfig, metricsAddr, probeAddr string
var frequency uint
var decryptionConcurrency, evaluationConcurrency uint8
var enableLease, enableLeaderElection, legacyLeaderElection bool
var enableLease, enableLeaderElection, legacyLeaderElection, enableMetrics bool

pflag.UintVar(&frequency, "update-frequency", 10,
"The status update frequency (in seconds) of a mutation policy")
Expand Down Expand Up @@ -109,6 +109,7 @@ func main() {
2,
"The max number of concurrent configuration policy evaluations",
)
pflag.BoolVar(&enableMetrics, "enable-metrics", true, "Disable custom metrics collection")

pflag.Parse()

Expand Down Expand Up @@ -252,6 +253,7 @@ func main() {
InstanceName: instanceName,
TargetK8sClient: targetK8sClient,
TargetK8sConfig: targetK8sConfig,
EnableMetrics: enableMetrics,
}
if err = reconciler.SetupWithManager(mgr); err != nil {
log.Error(err, "Unable to create controller", "controller", "ConfigurationPolicy")
Expand Down
78 changes: 78 additions & 0 deletions test/e2e/case25_related_object_metric_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package e2e

import (
"fmt"
"os/exec"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"open-cluster-management.io/config-policy-controller/test/utils"
)

var _ = Describe("Test related object metrics", Ordered, func() {
const (
policy1Name = "case25-test-policy-1"
policy2Name = "case25-test-policy-2"
relatedObject = "case25-configmap"
policyYaml = "../resources/case25_related_object_metric/case25-test-policy.yaml"
)
It("should create policies and related objects", func() {
By("Creating " + policyYaml)
utils.Kubectl("apply",
"-f", policyYaml,
"-n", testNamespace)
By("Verifying the policies were created")
plc1 := utils.GetWithTimeout(
clientManagedDynamic, gvrConfigPolicy, policy1Name, testNamespace, true, defaultTimeoutSeconds,
)
Expect(plc1).NotTo(BeNil())
plc2 := utils.GetWithTimeout(
clientManagedDynamic, gvrConfigPolicy, policy2Name, testNamespace, true, defaultTimeoutSeconds,
)
By("Verifying the related object was created")
Expect(plc2).NotTo(BeNil())
obj := utils.GetWithTimeout(
clientManagedDynamic, gvrConfigMap, relatedObject, "default", true, defaultTimeoutSeconds,
)
Expect(obj).NotTo(BeNil())
})

It("should correctly report common related objects", func() {
By("Checking metric endpoint for relate object gauge for policy " + policy1Name)
Eventually(func() interface{} {
return utils.GetMetrics(
"common_related_objects", fmt.Sprintf(`policy=\"%s/%s\"`, testNamespace, policy1Name))
}, defaultTimeoutSeconds, 1).Should(Equal([]string{"2"}))
By("Checking metric endpoint for relate object gauge for policy " + policy2Name)
Eventually(func() interface{} {
return utils.GetMetrics(
"common_related_objects", fmt.Sprintf(`policy=\"%s/%s\"`, testNamespace, policy2Name))
}, defaultTimeoutSeconds, 1).Should(Equal([]string{"2"}))
})

cleanup := func() {
// Delete the policies and ignore any errors (in case it was deleted previously)
cmd := exec.Command("kubectl", "delete",
"-f", policyYaml,
"-n", testNamespace)
_, _ = cmd.CombinedOutput()
opt := metav1.ListOptions{}
utils.ListWithTimeout(
clientManagedDynamic, gvrConfigPolicy, opt, 0, false, defaultTimeoutSeconds)
utils.GetWithTimeout(
clientManagedDynamic, gvrConfigMap, relatedObject, "default", false, defaultTimeoutSeconds)
}

It("should clean up", cleanup)

It("should have no common related object metrics after clean up", func() {
By("Checking metric endpoint for related object gauges")
Eventually(func() interface{} {
return utils.GetMetrics("common_related_objects")
}, defaultTimeoutSeconds, 1).Should(Equal([]string{}))
})

AfterAll(cleanup)
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
---
apiVersion: policy.open-cluster-management.io/v1
kind: ConfigurationPolicy
metadata:
name: case25-test-policy-1
spec:
remediationAction: enforce
pruneObjectBehavior: DeleteAll
object-templates:
- complianceType: musthave
objectDefinition:
apiVersion: v1
kind: ConfigMap
metadata:
name: case25-configmap
namespace: default
data:
name: testvalue
---
apiVersion: policy.open-cluster-management.io/v1
kind: ConfigurationPolicy
metadata:
name: case25-test-policy-2
spec:
remediationAction: enforce
pruneObjectBehavior: DeleteAll
namespaceSelector:
include: ["default"]
object-templates:
- complianceType: musthave
objectDefinition:
apiVersion: v1
kind: ConfigMap
metadata:
name: case25-configmap
data:
name: testvalue
Loading

0 comments on commit 98d1586

Please sign in to comment.