Skip to content

Commit

Permalink
Merge pull request #46 from heliapb/feat/add_prom_agent
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolastakashi authored Dec 21, 2024
2 parents a68c961 + 1f2037a commit 07e44b9
Show file tree
Hide file tree
Showing 6 changed files with 784 additions and 144 deletions.
18 changes: 17 additions & 1 deletion Documentation/commands/analyze/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ The Prometheus server requires proper RBAC (Role-Based Access Control) rules to

Since Prometheus just reads Objects in the Kubernetes API, it requires the get, list, and watch actions. As Prometheus can also be used to scrape metrics from the Kubernetes apiserver, it also requires access to the /metrics/ endpoint of it. In addition to the rules for Prometheus itself, the Prometheus needs to be able to get configmaps to be able to pull in rule files from configmap objects.

### Prometheus Namespace Selectors and Service Selectors
### Prometheus Namespace Selectors and Monitors Selectors

The Prometheus server relies on proper service discovery to function correctly. To achieve this, we must ensure that any defined Namespace Selector corresponds to an existing namespace. Similarly, for Service Selectors, it is crucial that they align with existing resources. Whether using ServiceMonitor, PodMonitor, ScrapeConfig, Probe, or PrometheusRule, the respective Custom Resource (CR) must exist and be properly matched.

Expand All @@ -91,3 +91,19 @@ Alertmanager configuration must be provided in one of the following ways:
* As a Kubernetes secret provided by the user, that needs to ensure the data is stored in a file called alertmanager.yaml
* The Operator will provide a default generated Kubernetes secret to use
* Via the AlertmanagerConfig CRDs (Custom Resource Definitions), that should be matched by a Namespace selector in a given namespace, a ConfigSelector or the ConfigSelector Name

## Analyze Prometheus Agent

### Prometheus Agent Existence

The Prometheus Agent object must exist in the Kubernetes cluster, which can be confirmed by checking for the presence of the Prometheus Agent CR (Custom Resource) in the specified namespace and under the given name.

### Prometheus Agent RBAC Rules

The Prometheus Agent server requires proper RBAC (Role-Based Access Control) rules to function correctly. This means the service account associated with the Prometheus Agent must have permissions aligned with the Prometheus Agent CRDs (Custom Resource Definitions) present in the cluster.

Since Prometheus Agent just reads Objects in the Kubernetes API, it requires the get, list, and watch actions. As Prometheus Agent can also be used to scrape metrics from the Kubernetes apiserver, it also requires access to the /metrics/ endpoint of it. In addition to the rules for Prometheus Agent itself, the Prometheus Agent needs to be able to get configmaps to be able to pull in rule files from configmap objects.

### Prometheus Agent Namespace Selectors and Monitors Selectors

The Prometheus Agent server relies on proper service discovery to function correctly. To achieve this, we must ensure that any defined Namespace Selector corresponds to an existing namespace. Similarly, for Service Selectors, it is crucial that they align with existing resources. Whether using ServiceMonitor, PodMonitor, ScrapeConfig or Probe, the respective Custom Resource (CR) must exist and be properly matched.
11 changes: 7 additions & 4 deletions cmd/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,11 @@ import (
type AnalyzeKind string

const (
ServiceMonitor AnalyzeKind = "servicemonitor"
Operator AnalyzeKind = "operator"
Prometheus AnalyzeKind = "prometheus"
Alertmanager AnalyzeKind = "alertmanager"
ServiceMonitor AnalyzeKind = "servicemonitor"
Operator AnalyzeKind = "operator"
Prometheus AnalyzeKind = "prometheus"
Alertmanager AnalyzeKind = "alertmanager"
PrometheusAgent AnalyzeKind = "prometheusagent"
)

type AnalyzeFlags struct {
Expand Down Expand Up @@ -84,6 +85,8 @@ func run(cmd *cobra.Command, _ []string) error {
return analyzers.RunPrometheusAnalyzer(cmd.Context(), clientSets, analyzerFlags.Name, analyzerFlags.Namespace)
case Alertmanager:
return analyzers.RunAlertmanagerAnalyzer(cmd.Context(), clientSets, analyzerFlags.Name, analyzerFlags.Namespace)
case PrometheusAgent:
return analyzers.RunPrometheusAgentAnalyzer(cmd.Context(), clientSets, analyzerFlags.Name, analyzerFlags.Namespace)
default:
return fmt.Errorf("kind %s not supported", analyzerFlags.Kind)
}
Expand Down
145 changes: 6 additions & 139 deletions internal/analyzers/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,10 @@ import (
"context"
"fmt"
"log/slog"
"strings"

"github.com/prometheus-operator/poctl/internal/k8sutil"
v1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)

const (
ServiceMonitor = "ServiceMonitor"
PodMonitor = "PodMonitor"
Probe = "Probe"
ScrapeConfig = "ScrapeConfig"
PrometheusRule = "PrometheusRule"
)

func RunPrometheusAnalyzer(ctx context.Context, clientSets *k8sutil.ClientSets, name, namespace string) error {
Expand Down Expand Up @@ -61,7 +50,7 @@ func RunPrometheusAnalyzer(ctx context.Context, clientSets *k8sutil.ClientSets,
return fmt.Errorf("failed to get ClusterRole %s", crb.RoleRef.Name)
}

err = checkClusterRoleRules(crb, cr)
err = k8sutil.CheckPrometheusClusterRoleRules(crb, cr)
if err != nil {
return err
}
Expand All @@ -87,148 +76,26 @@ func RunPrometheusAnalyzer(ctx context.Context, clientSets *k8sutil.ClientSets,
return fmt.Errorf("ruleNamespaceSelector is not properly defined: %s", err)
}

if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.ServiceMonitorSelector, ServiceMonitor, namespace); err != nil {
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.ServiceMonitorSelector, k8sutil.ServiceMonitor, namespace); err != nil {
return fmt.Errorf("serviceMonitorSelector is not properly defined: %s", err)
}

if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.PodMonitorSelector, PodMonitor, namespace); err != nil {
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.PodMonitorSelector, k8sutil.PodMonitor, namespace); err != nil {
return fmt.Errorf("podMonitorSelector is not properly defined: %s", err)
}

if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.ProbeSelector, Probe, namespace); err != nil {
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.ProbeSelector, k8sutil.Probe, namespace); err != nil {
return fmt.Errorf("probeSelector is not properly defined: %s", err)
}

if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.ScrapeConfigSelector, ScrapeConfig, namespace); err != nil {
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.ScrapeConfigSelector, k8sutil.ScrapeConfig, namespace); err != nil {
return fmt.Errorf("scrapeConfigSelector is not properly defined: %s", err)
}

if err := checkResourceLabelSelectors(ctx, clientSets, prometheus.Spec.RuleSelector, PrometheusRule, namespace); err != nil {
if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheus.Spec.RuleSelector, k8sutil.PrometheusRule, namespace); err != nil {
return fmt.Errorf("ruleSelector is not properly defined: %s", err)
}

slog.Info("Prometheus is compliant, no issues found", "name", name, "namespace", namespace)
return nil
}

func checkClusterRoleRules(crb v1.ClusterRoleBinding, cr *v1.ClusterRole) error {
var errs []string
verbsToCheck := []string{"get", "list", "watch"}
missingVerbs := []string{}

for _, rule := range cr.Rules {
for _, resource := range rule.Resources {
found := false
if resource == "configmaps" {
for _, verb := range rule.Verbs {
if verb == "get" {
found = true
break
}
}
if !found {
errs = append(errs, fmt.Sprintf("ClusterRole %s does not include 'configmaps' with 'get' in its verbs", crb.RoleRef.Name))
}
continue
}
for range rule.APIGroups {
for _, requiredVerb := range verbsToCheck {
found := false
for _, verb := range rule.Verbs {
if verb == requiredVerb {
found = true
break
}
}
if !found {
missingVerbs = append(missingVerbs, requiredVerb)
}
}
if len(missingVerbs) > 0 {
errs = append(errs, fmt.Sprintf("ClusterRole %s is missing necessary verbs for APIGroups: %v", crb.RoleRef.Name, missingVerbs))
}
}
}
for _, nonResource := range rule.NonResourceURLs {
if nonResource == "/metrics" {
hasGet := false
for _, verb := range rule.Verbs {
if verb == "get" {
hasGet = true
break
}
}
if !hasGet {
errs = append(errs, fmt.Sprintf("ClusterRole %s does not include 'get' verb for NonResourceURL '/metrics'", crb.RoleRef.Name))
}
}
}
}

if len(errs) > 0 {
return fmt.Errorf("multiple errors found:\n%s", strings.Join(errs, "\n"))
}
return nil
}

func checkResourceLabelSelectors(ctx context.Context, clientSets *k8sutil.ClientSets, labelSelector *metav1.LabelSelector, resourceName, namespace string) error {
if labelSelector == nil {
return fmt.Errorf("%s selector is not defined", resourceName)
}

if len(labelSelector.MatchLabels) == 0 && len(labelSelector.MatchExpressions) == 0 {
return nil
}

labelMap, err := metav1.LabelSelectorAsMap(labelSelector)
if err != nil {
return fmt.Errorf("invalid label selector format in %s: %v", resourceName, err)
}

switch resourceName {
case ServiceMonitor:
serviceMonitors, err := clientSets.MClient.MonitoringV1().ServiceMonitors(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
if err != nil {
return fmt.Errorf("failed to list ServiceMonitors in %s: %v", namespace, err)
}
if len(serviceMonitors.Items) == 0 {
return fmt.Errorf("no ServiceMonitors match the provided selector in Prometheus %s", namespace)
}
case PodMonitor:
podMonitors, err := clientSets.MClient.MonitoringV1().PodMonitors(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
if err != nil {
return fmt.Errorf("failed to list PodMonitor in %s: %v", namespace, err)
}
if len(podMonitors.Items) == 0 {
return fmt.Errorf("no PodMonitors match the provided selector in Prometheus %s", namespace)
}
case Probe:
probes, err := clientSets.MClient.MonitoringV1().Probes(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
if err != nil {
return fmt.Errorf("failed to list Probes in %s: %v", namespace, err)
}
if len(probes.Items) == 0 {
return fmt.Errorf("no Probes match the provided selector in Prometheus %s", namespace)
}
case ScrapeConfig:
scrapeConfigs, err := clientSets.MClient.MonitoringV1alpha1().ScrapeConfigs(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
if err != nil {
return fmt.Errorf("failed to list ScrapeConfigs in %s: %v", namespace, err)
}
if len(scrapeConfigs.Items) == 0 {
return fmt.Errorf("no ScrapeConfigs match the provided selector in Prometheus %s", namespace)
}
case PrometheusRule:
promRules, err := clientSets.MClient.MonitoringV1().PrometheusRules(namespace).List(ctx, metav1.ListOptions{LabelSelector: labels.SelectorFromSet(labelMap).String()})
if err != nil {
return fmt.Errorf("failed to list Probes in %s: %v", namespace, err)
}
if len(promRules.Items) == 0 {
return fmt.Errorf("no PrometheusRules match the provided selector in Prometheus %s", namespace)
}
default:
return fmt.Errorf("unknown selector type: %s", resourceName)
}

return nil
}
93 changes: 93 additions & 0 deletions internal/analyzers/prometheusagent.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2024 The prometheus-operator Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package analyzers

import (
"context"
"fmt"
"log/slog"

"github.com/prometheus-operator/poctl/internal/k8sutil"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func RunPrometheusAgentAnalyzer(ctx context.Context, clientSets *k8sutil.ClientSets, name, namespace string) error {
prometheusagent, err := clientSets.MClient.MonitoringV1alpha1().PrometheusAgents(namespace).Get(ctx, name, metav1.GetOptions{})
if err != nil {
if errors.IsNotFound(err) {
return fmt.Errorf("prometheus %s not found in namespace %s", name, namespace)
}
return fmt.Errorf("error while getting Prometheus: %v", err)
}

cRb, err := clientSets.KClient.RbacV1().ClusterRoleBindings().List(ctx, metav1.ListOptions{
LabelSelector: "name=prometheus-agent",
})
if err != nil {
return fmt.Errorf("failed to list RoleBindings: %w", err)
}

if !k8sutil.IsServiceAccountBoundToRoleBindingList(cRb, prometheusagent.Spec.ServiceAccountName) {
return fmt.Errorf("serviceAccount %s is not bound to any RoleBindings", prometheusagent.Spec.ServiceAccountName)
}

for _, crb := range cRb.Items {
cr, err := clientSets.KClient.RbacV1().ClusterRoles().Get(ctx, crb.RoleRef.Name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get ClusterRole %s", crb.RoleRef.Name)
}

err = k8sutil.CheckPrometheusClusterRoleRules(crb, cr)
if err != nil {
return err
}
}

if err := k8sutil.CheckResourceNamespaceSelectors(ctx, *clientSets, prometheusagent.Spec.PodMonitorNamespaceSelector); err != nil {
return fmt.Errorf("podMonitorNamespaceSelector is not properly defined: %s", err)
}

if err := k8sutil.CheckResourceNamespaceSelectors(ctx, *clientSets, prometheusagent.Spec.ProbeNamespaceSelector); err != nil {
return fmt.Errorf("probeNamespaceSelector is not properly defined: %s", err)
}

if err := k8sutil.CheckResourceNamespaceSelectors(ctx, *clientSets, prometheusagent.Spec.ServiceMonitorNamespaceSelector); err != nil {
return fmt.Errorf("serviceMonitorNamespaceSelector is not properly defined: %s", err)
}

if err := k8sutil.CheckResourceNamespaceSelectors(ctx, *clientSets, prometheusagent.Spec.ScrapeConfigNamespaceSelector); err != nil {
return fmt.Errorf("scrapeConfigNamespaceSelector is not properly defined: %s", err)
}

if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheusagent.Spec.ServiceMonitorSelector, k8sutil.ServiceMonitor, namespace); err != nil {
return fmt.Errorf("serviceMonitorSelector is not properly defined: %s", err)
}

if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheusagent.Spec.PodMonitorSelector, k8sutil.PodMonitor, namespace); err != nil {
return fmt.Errorf("podMonitorSelector is not properly defined: %s", err)
}

if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheusagent.Spec.ProbeSelector, k8sutil.Probe, namespace); err != nil {
return fmt.Errorf("probeSelector is not properly defined: %s", err)
}

if err := k8sutil.CheckResourceLabelSelectors(ctx, *clientSets, prometheusagent.Spec.ScrapeConfigSelector, k8sutil.ScrapeConfig, namespace); err != nil {
return fmt.Errorf("scrapeConfigSelector is not properly defined: %s", err)
}

slog.Info("prometheusagent Agent is compliant, no issues found", "name", name, "namespace", namespace)
return nil
}
Loading

0 comments on commit 07e44b9

Please sign in to comment.