Skip to content

Commit

Permalink
Add related_object_gauge
Browse files Browse the repository at this point in the history
This gauge records any related objects monitored by multiple policies.

ref: stolostron/backlog#25357
Signed-off-by: Dale Haiducek <dhaiduce@redhat.com>
  • Loading branch information
dhaiducek committed Nov 4, 2022
1 parent e1d796b commit a7030a1
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 0 deletions.
30 changes: 30 additions & 0 deletions controllers/configurationpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ func (r *ConfigurationPolicyReconciler) Reconcile(ctx context.Context, request c
_ = policyEvalCounter.DeleteLabelValues(request.Name)
_ = compareObjEvalCounter.DeletePartialMatch(prometheus.Labels{"config_policy_name": request.Name})
_ = compareObjSecondsCounter.DeletePartialMatch(prometheus.Labels{"config_policy_name": request.Name})
_ = policyRelatedObjectGauge.DeletePartialMatch(
prometheus.Labels{"policy": fmt.Sprintf("%s/%s", request.Namespace, request.Name)})
}

return reconcile.Result{}, nil
Expand Down Expand Up @@ -180,6 +182,9 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint
if !skipLoop {
log.Info("Processing the policies", "count", len(policiesList.Items))

// Initialize the related object map
policyRelatedObjectMap = map[string][]string{}

for i := 0; i < int(r.EvaluationConcurrency); i++ {
wg.Add(1)

Expand Down Expand Up @@ -221,6 +226,8 @@ func (r *ConfigurationPolicyReconciler) PeriodicallyExecConfigPolicies(freq uint
func (r *ConfigurationPolicyReconciler) handlePolicyWorker(
policyQueue <-chan *policyv1.ConfigurationPolicy, wg *sync.WaitGroup,
) {
// Update the related object metric after policy processing
defer updateRelatedObjectMetric()
defer wg.Done()

for policy := range policyQueue {
Expand Down Expand Up @@ -941,7 +948,21 @@ func sortRelatedObjectsAndUpdate(

update := false

// Instantiate found objects for the related object metric
found := map[string]bool{}

for _, obj := range oldRelated {
found[getObjectString(obj)] = false
}

// Format policy for related object metric
policyVal := fmt.Sprintf("%s/%s", plc.Namespace, plc.Name)

for i, newEntry := range related {
// Collect the policy and related object for related object metric
objKey := getObjectString(newEntry)
policyRelatedObjectMap[objKey] = append(policyRelatedObjectMap[objKey], policyVal)

for _, oldEntry := range oldRelated {
// Get matching objects
if gocmp.Equal(newEntry.Object, oldEntry.Object) {
Expand All @@ -951,13 +972,22 @@ func sortRelatedObjectsAndUpdate(
!(*newEntry.Properties.CreatedByPolicy) {
// Use the old properties if they existed and this is not a newly created resource
related[i].Properties = oldEntry.Properties
found[objKey] = true

break
}
}
}
}

// Clean up old related object metrics if the related object list changed
for _, obj := range oldRelated {
objString := getObjectString(obj)
if !found[objString] {
_ = policyRelatedObjectGauge.DeleteLabelValues(objString, policyVal)
}
}

if len(oldRelated) == len(related) {
for i, entry := range oldRelated {
if !gocmp.Equal(entry, related[i]) {
Expand Down
44 changes: 44 additions & 0 deletions controllers/metric.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"

"github.com/prometheus/client_golang/prometheus"
policyv1 "open-cluster-management.io/config-policy-controller/api/v1"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

Expand Down Expand Up @@ -47,6 +48,20 @@ var (
},
[]string{"config_policy_name", "namespace", "object"},
)
// The policyRelatedObjectMap collects a map of related objects to policies
// in order to populate the gauge:
// <kind.version/namespace/name>: []<policy-namespace/policy-name>
policyRelatedObjectMap map[string][]string
policyRelatedObjectGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "related_object_gauge",
Help: "A gauge vector of related objects managed by multiple policies.",
},
[]string{
"relatedObject",
"policy",
},
)
)

func init() {
Expand All @@ -58,3 +73,32 @@ func init() {
metrics.Registry.MustRegister(compareObjEvalCounter)
metrics.Registry.MustRegister(policyRelatedObjectGauge)
}

// updateRelatedObjectMetric iterates through the collected related object map, deletes any metrics
// that aren't duplications, and sets a metric for any related object that is handled by multiple
// policies to the number of policies that currently handles it.
func updateRelatedObjectMetric() {
log.Info("Updating related_object_gauge metric ...")
for relatedObj, policies := range policyRelatedObjectMap {
for _, policy := range policies {
if len(policies) == 1 {
policyRelatedObjectGauge.DeleteLabelValues(relatedObj, policy)
continue
}
gaugeInstance, err := policyRelatedObjectGauge.GetMetricWithLabelValues(relatedObj, policy)
if err != nil {
log.Error(err, "Failed to retrieve related object gauge")
continue
}
gaugeInstance.Set(float64(len(policies)))
}
}
}

// getObjectString returns a string formatted as:
// <kind>.<version>/<namespace>/<name>
func getObjectString(obj policyv1.RelatedObject) string {
return fmt.Sprintf("%s.%s/%s/%s",
obj.Object.Kind, obj.Object.APIVersion,
obj.Object.Metadata.Namespace, obj.Object.Metadata.Name)
}
76 changes: 76 additions & 0 deletions test/e2e/case25_related_object_metric_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package e2e

import (
"fmt"
"os/exec"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"open-cluster-management.io/config-policy-controller/test/utils"
)

var _ = Describe("Test policy template metrics", Ordered, func() {
const (
policy1Name = "case25-test-policy-1"
policy2Name = "case25-test-policy-2"
relatedObject = "case25-configmap"
policyYaml = "../resources/case25_related_object_metric/case25-test-policy.yaml"
)

Describe("Create policy, placement and referenced resource in ns:"+testNamespace, func() {
It("should be created in user ns", func() {
By("Creating " + policyYaml)
utils.Kubectl("apply",
"-f", policyYaml,
"-n", testNamespace)
By("Verifying the policies were created")
plc1 := utils.GetWithTimeout(
clientManagedDynamic, gvrConfigPolicy, policy1Name, testNamespace, true, defaultTimeoutSeconds,
)
Expect(plc1).NotTo(BeNil())
plc2 := utils.GetWithTimeout(
clientManagedDynamic, gvrConfigPolicy, policy2Name, testNamespace, true, defaultTimeoutSeconds,
)
By("Verifying the related object was created")
Expect(plc2).NotTo(BeNil())
obj := utils.GetWithTimeout(
clientManagedDynamic, gvrConfigMap, relatedObject, "default", true, defaultTimeoutSeconds,
)
Expect(obj).NotTo(BeNil())
})

It("should correctly report common related objects", func() {
By("Checking metric endpoint for relate object gauge for policy " + policy1Name)
Eventually(func() interface{} {
return utils.GetMetrics("related_object_gauge", fmt.Sprintf(`policy=\"%s/%s\"`, testNamespace, policy1Name))
}, defaultTimeoutSeconds, 1).Should(Equal([]string{"2"}))
By("Checking metric endpoint for relate object gauge for policy " + policy2Name)
Eventually(func() interface{} {
return utils.GetMetrics("related_object_gauge", fmt.Sprintf(`policy=\"%s/%s\"`, testNamespace, policy2Name))
}, defaultTimeoutSeconds, 1).Should(Equal([]string{"2"}))
})

cleanup := func() {
// Delete the policies and ignore any errors (in case it was deleted previously)
cmd := exec.Command("kubectl", "delete",
"-f", policyYaml,
"-n", testNamespace)
_, _ = cmd.CombinedOutput()
opt := metav1.ListOptions{}
utils.ListWithTimeout(clientManagedDynamic, gvrConfigPolicy, opt, 0, false, defaultTimeoutSeconds)
utils.GetWithTimeout(clientManagedDynamic, gvrConfigMap, relatedObject, "default", false, defaultTimeoutSeconds)
}

It("should clean up", cleanup)

It("should have no common related object metrics after clean up", func() {
By("Checking metric endpoint for related object gauges")
Eventually(func() interface{} {
return utils.GetMetrics("related_object_gauge")
}, defaultTimeoutSeconds, 1).Should(Equal([]string{}))
})

AfterAll(cleanup)
})
})
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
---
apiVersion: policy.open-cluster-management.io/v1
kind: ConfigurationPolicy
metadata:
name: case25-test-policy-1
spec:
remediationAction: enforce
pruneObjectBehavior: DeleteAll
object-templates:
- complianceType: musthave
objectDefinition:
apiVersion: v1
kind: ConfigMap
metadata:
name: case25-configmap
namespace: default
data:
name: testvalue
---
apiVersion: policy.open-cluster-management.io/v1
kind: ConfigurationPolicy
metadata:
name: case25-test-policy-2
spec:
remediationAction: enforce
pruneObjectBehavior: DeleteAll
namespaceSelector:
include: ["default"]
object-templates:
- complianceType: musthave
objectDefinition:
apiVersion: v1
kind: ConfigMap
metadata:
name: case25-configmap
data:
name: testvalue
49 changes: 49 additions & 0 deletions test/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"regexp"
"strings"
"time"

"github.com/ghodss/yaml"
Expand Down Expand Up @@ -242,3 +243,51 @@ func GetLastEvaluated(configPolicy *unstructured.Unstructured) (string, int64) {

return lastEvaluated, lastEvaluatedGeneration
}

// GetMetrics execs into the propagator pod and curls the metrics endpoint, filters
// the response with the given patterns, and returns the value(s) for the matching
// metric(s).
func GetMetrics(metricPatterns ...string) []string {
podCmd := exec.Command("kubectl", "get", "pod", "-n=open-cluster-management-agent-addon",
"-l=name=config-policy-controller", "--no-headers")
propPodInfo, err := podCmd.Output()
if err != nil {
return []string{err.Error()}
}

var cmd *exec.Cmd

metricFilter := " | grep " + strings.Join(metricPatterns, " | grep ")
metricsCmd := `curl localhost:8383/metrics` + metricFilter

// The pod name is "No" when the response is "No resources found"
propPodName := strings.Split(string(propPodInfo), " ")[0]
if propPodName == "No" {
// A missing pod could mean the controller is running locally
cmd = exec.Command("bash", "-c", metricsCmd)
} else {
cmd = exec.Command("kubectl", "exec", "-n=open-cluster-management-agent-addon", propPodName, "-c",
"config-policy-controller", "--", "bash", "-c", metricsCmd)
}

matchingMetricsRaw, err := cmd.Output()
if err != nil {
if err.Error() == "exit status 1" {
return []string{} // exit 1 indicates that grep couldn't find a match.
}

return []string{err.Error()}
}

matchingMetrics := strings.Split(strings.TrimSpace(string(matchingMetricsRaw)), "\n")
values := make([]string, len(matchingMetrics))

for i, metric := range matchingMetrics {
fields := strings.Fields(metric)
if len(fields) > 0 {
values[i] = fields[len(fields)-1]
}
}

return values
}

0 comments on commit a7030a1

Please sign in to comment.