Skip to content

Commit

Permalink
add initial ideas
Browse files Browse the repository at this point in the history
  • Loading branch information
NiniOak committed Aug 12, 2024
1 parent ede17f0 commit eb18887
Show file tree
Hide file tree
Showing 11 changed files with 341 additions and 42 deletions.
3 changes: 3 additions & 0 deletions .changelog/4212.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
Expose prometheus scrape metrics on sync-catalog pods
```
26 changes: 25 additions & 1 deletion charts/consul/templates/sync-catalog-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ spec:
"vault.hashicorp.com/namespace": "{{ .Values.global.secretsBackend.vault.vaultNamespace }}"
{{- end }}
{{- end }}
{{- if or .Values.global.metrics.enabled .Values.syncCatalog.metrics.enabled }}
"prometheus.io/scrape": "true"
{{- if not (hasKey (default "" .Values.syncCatalog.annotations | fromYaml) "prometheus.io/path")}}
"prometheus.io/path": {{ default "/metrics" .Values.syncCatalog.metrics.path }}
{{- end }}
"prometheus.io/port": {{ .Values.syncCatalog.metrics.port | default "20200" | quote }}
{{- end }}
spec:
serviceAccountName: {{ template "consul.fullname" . }}-sync-catalog
volumes:
Expand Down Expand Up @@ -196,6 +203,18 @@ spec:
{{- if .Values.syncCatalog.syncLoadBalancerEndpoints }}
-sync-lb-services-endpoints=true \
{{- end }}
{{- if (or (and (ne (.Values.syncCatalog.metrics.enabled | toString) "-") .Values.syncCatalog.metrics.enabled) (and (eq (.Values.syncCatalog.metrics.enabled | toString) "-") .Values.global.metrics.enabled)) }}
-enable-metrics=true \
{{- else }}
-enable-metrics=false \
{{- end }}
{{- if .Values.syncCatalog.metrics.path }}
-metrics-path={{ .Values.syncCatalog.metrics.path }} \
{{- end }}
{{- if .Values.syncCatalog.metrics.port }}
-metrics-port={{ .Values.syncCatalog.metrics.port }} \
{{- end }}
-prometheus-retention-time={{ .Values.global.metrics.agentMetricsRetentionTime }} \
livenessProbe:
httpGet:
path: /health/ready
Expand All @@ -215,11 +234,16 @@ spec:
initialDelaySeconds: 10
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 5
timeoutSeconds: 5
{{- with .Values.syncCatalog.resources }}
resources:
{{- toYaml . | nindent 10 }}
{{- end }}
{{- if or (eq (.Values.syncCatalog.metrics.enabled | toString) "-") .Values.syncCatalog.metrics.enabled .Values.global.metrics.enabled }}
ports:
- name: prometheus
containerPort: {{ .Values.syncCatalog.metrics.port | default "20200" | int }}
{{- end }}
{{- if .Values.syncCatalog.priorityClassName }}
priorityClassName: {{ .Values.syncCatalog.priorityClassName | quote }}
{{- end }}
Expand Down
13 changes: 13 additions & 0 deletions charts/consul/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2203,6 +2203,19 @@ syncCatalog:
# If false, LoadBalancer endpoints are not synced to Consul.
syncLoadBalancerEndpoints: false

# Metrics settings for syncCatalog
metrics:
# This value enables or disables metrics collection for registered services, overriding the global metrics collection settings.
# @type: boolean
enabled: "-"
# This value sets the port to use for scraping syncCatalog metrics via prometheus, defaults to 20200 if not set. Must be in the port
# range of 1024-65535.
# @type: int
port: null
# This value sets the path to use for scraping syncCatalog metrics via prometheus, defaults to /metrics if not set.
# @type: string
path: null

ingress:
# Syncs the hostname from a Kubernetes Ingress resource to service registrations
# when a rule matched a service. Currently only supports host based routing and
Expand Down
67 changes: 67 additions & 0 deletions control-plane/catalog/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package metrics

import (
"strconv"

metricsutil "github.com/hashicorp/consul-k8s/control-plane/subcommand/common"
)

const (
defaultScrapePort = 20200
defaultScrapePath = "/metrics"
)

type Config struct {
// EnableSyncCatalogMetrics indicates whether or not SyncCatalog metrics should be enabled
// by default on a deployed consul-sync-catalog, passed from the helm chart via command-line flags to our controller.
EnableSyncCatalogMetrics bool

// The default path to use for scraping prometheus metrics, passed from the helm chart via command-line flags to our controller.
DefaultPrometheusScrapePath string

// The default port to use for scraping prometheus metrics, passed from the helm chart via command-line flags to our controller.
DefaultPrometheusScrapePort int

// Configures the retention time for metrics in the metrics store, passed from the helm chart via command-line flags to our controller.
PrometheusMetricsRetentionTime string
}

func syncCatalogMetricsEnabled(path string) bool {
if enabled, isSet := metricsutil.GetMetricsEnabled(path); isSet {
return enabled
}

// otherwise, fallback to the global helm setting
return false
}

func syncCatalogMetricsPort(portString string) int {
port, err := strconv.Atoi(portString)
if err != nil {
return defaultScrapePort
}

if port < 1024 || port > 65535 {
// if we requested a privileged port, use the default
return defaultScrapePort
}

return port
}

func syncCatalogMetricsPath(path string) string {
if path, isSet := metricsutil.GetScrapePath(path); isSet {
return path
}

// otherwise, fallback to the global helm setting
return defaultScrapePath
}

func SyncCatalogMetricsConfig(enableMetrics, metricsPort, metricsPath string) Config {
return Config{
EnableSyncCatalogMetrics: syncCatalogMetricsEnabled(enableMetrics),
DefaultPrometheusScrapePort: syncCatalogMetricsPort(metricsPort),
DefaultPrometheusScrapePath: syncCatalogMetricsPath(metricsPath),
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ type RegistrationsController struct {

func (r *RegistrationsController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := r.Log.V(1).WithValues("registration", req.NamespacedName)
log.Info("Reconciling Registaration")
log.Info("Reconciling Registration")

registration := &v1alpha1.Registration{}
// get the registration
Expand Down
6 changes: 6 additions & 0 deletions control-plane/catalog/to-consul/resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"sync"

mapset "github.com/deckarep/golang-set"
"github.com/hashicorp/consul-k8s/control-plane/catalog/metrics"
"github.com/hashicorp/consul-k8s/control-plane/helper/controller"
"github.com/hashicorp/consul-k8s/control-plane/helper/parsetags"
"github.com/hashicorp/consul-k8s/control-plane/namespaces"
Expand Down Expand Up @@ -102,6 +103,11 @@ type ServiceResource struct {
// LoadBalancerEndpointsSync set to true (default false) will sync ServiceTypeLoadBalancer endpoints.
LoadBalancerEndpointsSync bool

// MetricsConfig contains metrics configuration and has methods to determine whether
// configuration should come from the default flags or annotations. The syncCatalog uses this to configure prometheus
// annotations.
MetricsConfig metrics.Config

// NodeExternalIPSync set to true (the default) syncs NodePort services
// using the node's external ip address. When false, the node's internal
// ip address will be used instead.
Expand Down
58 changes: 57 additions & 1 deletion control-plane/catalog/to-consul/syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"sync"
"time"

"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/cenkalti/backoff"
mapset "github.com/deckarep/golang-set"
"github.com/hashicorp/consul-k8s/control-plane/consul"
Expand All @@ -17,6 +19,25 @@ import (
"github.com/hashicorp/go-hclog"
)

var SyncToConsulCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "sync_catalog", "to_consul"},
Help: "Increments for each service registered to Consul via catalog sync",
},
{
Name: []string{"consul", "sync_catalog", "to_consul", "deregister"},
Help: "Increments for each service deregistered from Consul via catalog sync",
},
{
Name: []string{"consul", "sync_catalog", "to_consul", "error"},
Help: "Increments whenever a Consul api client returns an error for a catalog sync register request",
},
{
Name: []string{"consul", "sync_catalog", "to_consul", "deregister", "error"},
Help: "Increments whenever a Consul api client returns an error for a catalog sync deregister request request",
},
}

const (
// ConsulSyncPeriod is how often the syncer will attempt to
// reconcile the expected service states with the remote Consul server.
Expand Down Expand Up @@ -101,6 +122,8 @@ type ConsulSyncer struct {
// watchers is all namespaces mapped to a map of Consul service
// names mapped to a cancel function for watcher routines
watchers map[string]map[string]context.CancelFunc

PrometheusSink *prometheus.PrometheusSink
}

// Sync implements Syncer.
Expand Down Expand Up @@ -206,7 +229,7 @@ func (s *ConsulSyncer) watchReapableServices(ctx context.Context) {
if err != nil {
s.Log.Warn("error querying services, will retry", "err", err)
} else {
s.Log.Debug("[watchReapableServices] services returned from catalog",
s.Log.Debug("[watchReapableServices] servicePrometheusSinks returned from catalog",
"services", services)
}

Expand Down Expand Up @@ -433,14 +456,28 @@ func (s *ConsulSyncer) syncFull(ctx context.Context) {
"node-name", r.Node,
"service-id", r.ServiceID,
"service-consul-namespace", r.Namespace)

_, err = consulClient.Catalog().Deregister(r, nil)
if err != nil {
// metric count for error syncing K8S services with Consul via calatog sync
labels := []metrics.Label{
{Name: "error", Value: err.Error()},
}
s.PrometheusSink.IncrCounterWithLabels([]string{"consul", "sync_catalog", "to_consul", "deregister", "error"}, 1, labels)

s.Log.Warn("error deregistering service",
"node-name", r.Node,
"service-id", r.ServiceID,
"service-consul-namespace", r.Namespace,
"err", err)
continue
}

// metric count for calatog sync process to unsync K8S services from Consul
labels := []metrics.Label{
{Name: "id", Value: r.ServiceID},
}
s.PrometheusSink.IncrCounterWithLabels([]string{"consul", "sync_catalog", "to_consul", "deregister"}, 1, labels)
}

// Always clear deregistrations, they'll repopulate if we had errors
Expand All @@ -465,6 +502,12 @@ func (s *ConsulSyncer) syncFull(ctx context.Context) {
// Register the service.
_, err = consulClient.Catalog().Register(r, nil)
if err != nil {
// metric count for error syncing K8S services to Consul
labels := []metrics.Label{
{Name: "error", Value: err.Error()},
}
s.PrometheusSink.IncrCounterWithLabels([]string{"consul", "sync_catalog", "to_consul", "error"}, 1, labels)

s.Log.Warn("error registering service",
"node-name", r.Node,
"service-name", r.Service.Service,
Expand All @@ -478,6 +521,19 @@ func (s *ConsulSyncer) syncFull(ctx context.Context) {
"service-name", r.Service.Service,
"consul-namespace-name", r.Service.Namespace,
"service", r.Service)

// metric count and service metadata for calatog sync process to sync K8S services with Consul
labels := []metrics.Label{
{Name: "id", Value: r.Service.ID},
{Name: "service", Value: r.Service.Service},
{Name: "external_k8s_ref_name", Value: r.Service.Meta["external-k8s-ref-name"]},
{Name: "namespace", Value: r.Service.Namespace},
{Name: "datacenter", Value: r.Datacenter},
}
if r.Check != nil {
labels = append(labels, metrics.Label{Name: "status", Value: r.Check.Status})
}
s.PrometheusSink.IncrCounterWithLabels([]string{"consul", "sync_catalog", "to_consul"}, 1, labels)
}
}
}
Expand Down
43 changes: 43 additions & 0 deletions control-plane/catalog/to-k8s/sink.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import (
"sync"
"time"

"github.com/armon/go-metrics"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/consul-k8s/control-plane/helper/coalesce"
"github.com/hashicorp/go-hclog"
apiv1 "k8s.io/api/core/v1"
Expand All @@ -19,6 +21,25 @@ import (
"k8s.io/client-go/tools/cache"
)

var SyncToK8sCounters = []prometheus.CounterDefinition{
{
Name: []string{"consul", "sync_catalog", "to_k8s"},
Help: "Increments for each service registered to kubernetes from Consul via catalog sync",
},
{
Name: []string{"consul", "sync_catalog", "to_k8s", "deregister"},
Help: "Increments for each service deregistered to kubernetes from Consul via catalog sync",
},
{
Name: []string{"consul", "sync_catalog", "to_k8s", "error"},
Help: "Increments whenever a Consul api client returns an error for a catalog sync register request",
},
{
Name: []string{"consul", "sync_catalog", "to_k8s", "deregister", "error"},
Help: "Increments whenever a Consul api client returns an error for a catalog sync deregister request request",
},
}

const (
// K8SQuietPeriod is the time to wait for no service changes before syncing.
K8SQuietPeriod = 1 * time.Second
Expand Down Expand Up @@ -83,6 +104,8 @@ type K8SSink struct {
// It's populated from Kubernetes data.
serviceMapConsul map[string]*apiv1.Service
triggerCh chan struct{}

PrometheusSink *prometheus.PrometheusSink
}

// SetServices implements Sink.
Expand Down Expand Up @@ -232,7 +255,18 @@ func (s *K8SSink) Run(ch <-chan struct{}) {
for _, name := range delete {
if err := svcClient.Delete(s.Ctx, name, metav1.DeleteOptions{}); err != nil {
s.Log.Warn("error deleting service", "name", name, "error", err)

// metric count for error syncing Consul services to K8s
labels := []metrics.Label{
{Name: "error", Value: err.Error()},
}
s.PrometheusSink.IncrCounterWithLabels([]string{"consul", "sync_catalog", "to_k8s", "deregister", "error"}, 1, labels)
}
// metric count for error syncing Consul services to k8s
labels := []metrics.Label{
{Name: "service_name", Value: name},
}
s.PrometheusSink.IncrCounterWithLabels([]string{"consul", "sync_catalog", "to_k8s", "deregister"}, 1, labels)
}

for _, svc := range update {
Expand All @@ -246,7 +280,16 @@ func (s *K8SSink) Run(ch <-chan struct{}) {
_, err := svcClient.Create(s.Ctx, svc, metav1.CreateOptions{})
if err != nil {
s.Log.Warn("error creating service", "name", svc.Name, "error", err)
labels := []metrics.Label{
{Name: "error", Value: err.Error()},
}
s.PrometheusSink.IncrCounterWithLabels([]string{"consul", "sync_catalog", "to_k8s", "error"}, 1, labels)
}
// metric count for error syncing Consul services to k8s
labels := []metrics.Label{
{Name: "service_name", Value: svc.Name},
}
s.PrometheusSink.IncrCounterWithLabels([]string{"consul", "sync_catalog", "to_k8s"}, 1, labels)
}
}
}
Expand Down
Loading

0 comments on commit eb18887

Please sign in to comment.