Skip to content

Commit

Permalink
Merged metrics with consul-dataplane (#1635)
Browse files Browse the repository at this point in the history
Changes proposed in this PR:
- Add support for merged metrics with consul-dataplane.
- Merging metrics now takes place in the consul-dataplane itself instead of relying on the consul-sidecar
- Pairs with a new flag added to consul-dataplane in PR: Add telemetry-prom-merge-port flag for merged metrics
  • Loading branch information
curtbushko authored and Thomas Eckert committed Oct 31, 2022
1 parent 1526251 commit a5d7115
Show file tree
Hide file tree
Showing 22 changed files with 293 additions and 2,110 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ IMPROVEMENTS:
* Remove deprecated annotation `service.alpha.kubernetes.io/tolerate-unready-endpoints: "true"` in the `server-service` template. [[GH-1619](https://github.com/hashicorp/consul-k8s/pull/1619)]
* Support `minAvailable` on connect injector `PodDisruptionBudget`. [[GH-1557](https://github.com/hashicorp/consul-k8s/pull/1557)]
* Add `tolerations` and `nodeSelector` to Server ACL init jobs and `nodeSelector` to Webhook cert manager. [[GH-1581](https://github.com/hashicorp/consul-k8s/pull/1581)]
* Control plane
* Support merged metrics with consul-dataplane. [[GH-1635](https://github.com/hashicorp/consul-k8s/pull/1635)]

BREAKING_CHANGES:

* Helm:
* Removal of `global.consulSidecarContainer` from values file as there is no longer a consul sidecar. [[GH-1635](https://github.com/hashicorp/consul-k8s/pull/1635)]

## 1.0.0-beta3 (October 12, 2022)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ spec:
- name: METRICS_ENABLE_PROMETHEUS
value: "true"
ports:
- containerPort: 9090
- containerPort: 9090
43 changes: 23 additions & 20 deletions acceptance/tests/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/hashicorp/consul/sdk/testutil/retry"

"github.com/hashicorp/consul-k8s/acceptance/framework/consul"
"github.com/hashicorp/consul-k8s/acceptance/framework/environment"
"github.com/hashicorp/consul-k8s/acceptance/framework/helpers"
"github.com/hashicorp/consul-k8s/acceptance/framework/k8s"
"github.com/hashicorp/consul-k8s/acceptance/framework/logger"
Expand All @@ -21,7 +22,6 @@ const StaticClientName = "static-client"
// Test that prometheus metrics, when enabled, are accessible from the
// endpoints that have been exposed on the server, client and gateways.
func TestComponentMetrics(t *testing.T) {
t.Skipf("Skipping this test because it's not yet supported with agentless")
env := suite.Environment()
cfg := suite.Config()
ctx := env.DefaultContext(t)
Expand All @@ -31,6 +31,10 @@ func TestComponentMetrics(t *testing.T) {
"global.datacenter": "dc1",
"global.metrics.enabled": "true",
"global.metrics.enableAgentMetrics": "true",
// Agents have been removed but there could potentially be customers that are still running them. We
// are using client.enabled to cover that scenario and to make sure agent metrics still works with
// consul-dataplane.
"client.enabled": "true",

"connectInject.enabled": "true",
"controller.enabled": "true",
Expand Down Expand Up @@ -77,20 +81,19 @@ func TestComponentMetrics(t *testing.T) {
require.NoError(t, err)
require.Contains(t, metricsOutput, `consul_acl_ResolveToken{quantile="0.5"}`)

//// Ingress Gateway Metrics
//assertGatewayMetricsEnabled(t, ctx, ns, "ingress-gateway", `envoy_cluster_assignment_stale{local_cluster="ingress-gateway",consul_source_service="ingress-gateway"`)
//
//// Terminating Gateway Metrics
//assertGatewayMetricsEnabled(t, ctx, ns, "terminating-gateway", `envoy_cluster_assignment_stale{local_cluster="terminating-gateway",consul_source_service="terminating-gateway"`)
//
//// Mesh Gateway Metrics
//assertGatewayMetricsEnabled(t, ctx, ns, "mesh-gateway", `envoy_cluster_assignment_stale{local_cluster="mesh-gateway",consul_source_service="mesh-gateway"`)
logger.Log(t, "ingress gateway metrics")
assertGatewayMetricsEnabled(t, ctx, ns, "ingress-gateway", `envoy_cluster_assignment_stale{local_cluster="ingress-gateway",consul_source_service="ingress-gateway"`)

logger.Log(t, "terminating gateway metrics")
assertGatewayMetricsEnabled(t, ctx, ns, "terminating-gateway", `envoy_cluster_assignment_stale{local_cluster="terminating-gateway",consul_source_service="terminating-gateway"`)

logger.Log(t, "mesh gateway metrics")
assertGatewayMetricsEnabled(t, ctx, ns, "mesh-gateway", `envoy_cluster_assignment_stale{local_cluster="mesh-gateway",consul_source_service="mesh-gateway"`)
}

// Test that merged service and envoy metrics are accessible from the
// endpoints that have been exposed on the service.
func TestAppMetrics(t *testing.T) {
t.Skipf("Skipping this test because it's not yet supported with agentless")
env := suite.Environment()
cfg := suite.Config()
ctx := env.DefaultContext(t)
Expand Down Expand Up @@ -138,13 +141,13 @@ func TestAppMetrics(t *testing.T) {
})
}

//func assertGatewayMetricsEnabled(t *testing.T, ctx environment.TestContext, ns, label, metricsAssertion string) {
// pods, err := ctx.KubernetesClient(t).CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{LabelSelector: fmt.Sprintf("component=%s", label)})
// require.NoError(t, err)
// for _, pod := range pods.Items {
// podIP := pod.Status.PodIP
// metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "--", "curl", "--silent", "--show-error", fmt.Sprintf("http://%s:20200/metrics", podIP))
// require.NoError(t, err)
// require.Contains(t, metricsOutput, metricsAssertion)
// }
//}
func assertGatewayMetricsEnabled(t *testing.T, ctx environment.TestContext, ns, label, metricsAssertion string) {
pods, err := ctx.KubernetesClient(t).CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{LabelSelector: fmt.Sprintf("component=%s", label)})
require.NoError(t, err)
for _, pod := range pods.Items {
podIP := pod.Status.PodIP
metricsOutput, err := k8s.RunKubectlAndGetOutputE(t, ctx.KubectlOptions(t), "exec", "deploy/"+StaticClientName, "--", "curl", "--silent", "--show-error", fmt.Sprintf("http://%s:20200/metrics", podIP))
require.NoError(t, err)
require.Contains(t, metricsOutput, metricsAssertion)
}
}
5 changes: 4 additions & 1 deletion charts/consul/templates/ingress-gateways-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,10 @@ spec:
-service-partition={{ $root.Values.global.adminPartitions.name }} \
{{- end }}
-log-level={{ default $root.Values.global.logLevel }} \
-log-json={{ $root.Values.global.logJSON }}
-log-json={{ $root.Values.global.logJSON }} \
{{- if (and $root.Values.global.metrics.enabled $root.Values.global.metrics.enableGatewayMetrics) }}
-telemetry-prom-scrape-path="/metrics"
{{- end }}
livenessProbe:
tcpSocket:
port: 21000
Expand Down
5 changes: 4 additions & 1 deletion charts/consul/templates/mesh-gateway-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,10 @@ spec:
-service-partition={{ .Values.global.adminPartitions.name }} \
{{- end }}
-log-level={{ default .Values.global.logLevel }} \
-log-json={{ .Values.global.logJSON }}
-log-json={{ .Values.global.logJSON }} \
{{- if (and .Values.global.metrics.enabled .Values.global.metrics.enableGatewayMetrics) }}
-telemetry-prom-scrape-path="/metrics"
{{- end }}
livenessProbe:
tcpSocket:
port: {{ .Values.meshGateway.containerPort }}
Expand Down
5 changes: 4 additions & 1 deletion charts/consul/templates/terminating-gateways-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,10 @@ spec:
-service-partition={{ $root.Values.global.adminPartitions.name }} \
{{- end }}
-log-level={{ default $root.Values.global.logLevel }} \
-log-json={{ $root.Values.global.logJSON }}
-log-json={{ $root.Values.global.logJSON }} \
{{- if (and $root.Values.global.metrics.enabled $root.Values.global.metrics.enableGatewayMetrics) }}
-telemetry-prom-scrape-path="/metrics"
{{- end }}
livenessProbe:
tcpSocket:
port: 8443
Expand Down
141 changes: 0 additions & 141 deletions charts/consul/test/unit/connect-inject-deployment.bats
Original file line number Diff line number Diff line change
Expand Up @@ -1075,147 +1075,6 @@ load _helpers
[ "${actual}" = "false" ]
}

#--------------------------------------------------------------------
# consul sidecar resources

@test "connectInject/Deployment: default consul sidecar container resources" {
cd `chart_dir`
local cmd=$(helm template \
-s templates/connect-inject-deployment.yaml \
--set 'connectInject.enabled=true' \
. | tee /dev/stderr |
yq '.spec.template.spec.containers[0].command' | tee /dev/stderr)

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-request=25Mi"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-request=20m"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-limit=50Mi"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-limit=20m"))' | tee /dev/stderr)
[ "${actual}" = "true" ]
}

@test "connectInject/Deployment: consul sidecar container resources can be set" {
cd `chart_dir`
local cmd=$(helm template \
-s templates/connect-inject-deployment.yaml \
--set 'connectInject.enabled=true' \
--set 'global.consulSidecarContainer.resources.requests.memory=100Mi' \
--set 'global.consulSidecarContainer.resources.requests.cpu=100m' \
--set 'global.consulSidecarContainer.resources.limits.memory=200Mi' \
--set 'global.consulSidecarContainer.resources.limits.cpu=200m' \
. | tee /dev/stderr |
yq '.spec.template.spec.containers[0].command' | tee /dev/stderr)

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-request=100Mi"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-request=100m"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-limit=200Mi"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-limit=200m"))' | tee /dev/stderr)
[ "${actual}" = "true" ]
}

@test "connectInject/Deployment: consul sidecar container resources can be set explicitly to 0" {
cd `chart_dir`
local cmd=$(helm template \
-s templates/connect-inject-deployment.yaml \
--set 'connectInject.enabled=true' \
--set 'global.consulSidecarContainer.resources.requests.memory=0' \
--set 'global.consulSidecarContainer.resources.requests.cpu=0' \
--set 'global.consulSidecarContainer.resources.limits.memory=0' \
--set 'global.consulSidecarContainer.resources.limits.cpu=0' \
. | tee /dev/stderr |
yq '.spec.template.spec.containers[0].command' | tee /dev/stderr)

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-request=0"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-request=0"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-limit=0"))' | tee /dev/stderr)
[ "${actual}" = "true" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-limit=0"))' | tee /dev/stderr)
[ "${actual}" = "true" ]
}

@test "connectInject/Deployment: consul sidecar container resources can be individually set to null" {
cd `chart_dir`
local cmd=$(helm template \
-s templates/connect-inject-deployment.yaml \
--set 'connectInject.enabled=true' \
--set 'global.consulSidecarContainer.resources.requests.memory=null' \
--set 'global.consulSidecarContainer.resources.requests.cpu=null' \
--set 'global.consulSidecarContainer.resources.limits.memory=null' \
--set 'global.consulSidecarContainer.resources.limits.cpu=null' \
. | tee /dev/stderr |
yq '.spec.template.spec.containers[0].command' | tee /dev/stderr)

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-request"))' | tee /dev/stderr)
[ "${actual}" = "false" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-request"))' | tee /dev/stderr)
[ "${actual}" = "false" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-limit"))' | tee /dev/stderr)
[ "${actual}" = "false" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-limit"))' | tee /dev/stderr)
[ "${actual}" = "false" ]
}

@test "connectInject/Deployment: consul sidecar container resources can be set to null" {
cd `chart_dir`
local cmd=$(helm template \
-s templates/connect-inject-deployment.yaml \
--set 'connectInject.enabled=true' \
--set 'global.consulSidecarContainer.resources=null' \
. | tee /dev/stderr |
yq '.spec.template.spec.containers[0].command' | tee /dev/stderr)

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-request"))' | tee /dev/stderr)
[ "${actual}" = "false" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-request"))' | tee /dev/stderr)
[ "${actual}" = "false" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-memory-limit"))' | tee /dev/stderr)
[ "${actual}" = "false" ]

local actual=$(echo "$cmd" |
yq 'any(contains("-default-consul-sidecar-cpu-limit"))' | tee /dev/stderr)
[ "${actual}" = "false" ]
}

#--------------------------------------------------------------------
# sidecarProxy.resources

Expand Down
28 changes: 3 additions & 25 deletions charts/consul/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -574,28 +574,6 @@ global:
# @type: boolean
enableGatewayMetrics: true

# For connect-injected pods, the consul sidecar is responsible for metrics merging. For ingress/mesh/terminating
# gateways, it additionally ensures the Consul services are always registered with their local Consul client.
# @type: map
consulSidecarContainer:
# Set default resources for consul sidecar. If null, that resource won't
# be set.
# These settings can be overridden on a per-pod basis via these annotations:
#
# - `consul.hashicorp.com/consul-sidecar-cpu-limit`
# - `consul.hashicorp.com/consul-sidecar-cpu-request`
# - `consul.hashicorp.com/consul-sidecar-memory-limit`
# - `consul.hashicorp.com/consul-sidecar-memory-request`
# @recurse: false
# @type: map
resources:
requests:
memory: "25Mi"
cpu: "20m"
limits:
memory: "50Mi"
cpu: "20m"

# The name (and tag) of the Envoy Docker image used for the
# connect-injected sidecar proxies and mesh, terminating, and ingress gateways.
# See https://www.consul.io/docs/connect/proxies/envoy for full compatibility matrix between Consul and Envoy.
Expand Down Expand Up @@ -2073,18 +2051,18 @@ connectInject:
# add a listener on the Envoy sidecar to expose metrics. The exposed
# metrics will depend on whether metrics merging is enabled:
# - If metrics merging is enabled:
# the Consul sidecar will run a merged metrics server
# the consul-dataplane will run a merged metrics server
# combining Envoy sidecar and Connect service metrics,
# i.e. if your service exposes its own Prometheus metrics.
# - If metrics merging is disabled:
# the listener will just expose Envoy sidecar metrics.
# This will inherit from `global.metrics.enabled`.
defaultEnabled: "-"
# Configures the Consul sidecar to run a merged metrics server
# Configures the consul-dataplane to run a merged metrics server
# to combine and serve both Envoy and Connect service metrics.
# This feature is available only in Consul v1.10.0 or greater.
defaultEnableMerging: false
# Configures the port at which the Consul sidecar will listen on to return
# Configures the port at which the consul-dataplane will listen on to return
# combined metrics. This port only needs to be changed if it conflicts with
# the application's ports.
defaultMergedMetricsPort: 20100
Expand Down
5 changes: 0 additions & 5 deletions control-plane/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
cmdACLInit "github.com/hashicorp/consul-k8s/control-plane/subcommand/acl-init"
cmdConnectInit "github.com/hashicorp/consul-k8s/control-plane/subcommand/connect-init"
cmdConsulLogout "github.com/hashicorp/consul-k8s/control-plane/subcommand/consul-logout"
cmdConsulSidecar "github.com/hashicorp/consul-k8s/control-plane/subcommand/consul-sidecar"
cmdController "github.com/hashicorp/consul-k8s/control-plane/subcommand/controller"
cmdCreateFederationSecret "github.com/hashicorp/consul-k8s/control-plane/subcommand/create-federation-secret"
cmdDeleteCompletedJob "github.com/hashicorp/consul-k8s/control-plane/subcommand/delete-completed-job"
Expand Down Expand Up @@ -43,10 +42,6 @@ func init() {
return &cmdInjectConnect.Command{UI: ui}, nil
},

"consul-sidecar": func() (cli.Command, error) {
return &cmdConsulSidecar.Command{UI: ui}, nil
},

"consul-logout": func() (cli.Command, error) {
return &cmdConsulLogout.Command{UI: ui}, nil
},
Expand Down
19 changes: 5 additions & 14 deletions control-plane/connect-inject/annotations.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,6 @@ const (
annotationSidecarProxyMemoryLimit = "consul.hashicorp.com/sidecar-proxy-memory-limit"
annotationSidecarProxyMemoryRequest = "consul.hashicorp.com/sidecar-proxy-memory-request"

// annotations for consul sidecar resource limits.
annotationConsulSidecarCPULimit = "consul.hashicorp.com/consul-sidecar-cpu-limit"
annotationConsulSidecarCPURequest = "consul.hashicorp.com/consul-sidecar-cpu-request"
annotationConsulSidecarMemoryLimit = "consul.hashicorp.com/consul-sidecar-memory-limit"
annotationConsulSidecarMemoryRequest = "consul.hashicorp.com/consul-sidecar-memory-request"

// annotations for sidecar volumes.
annotationConsulSidecarUserVolume = "consul.hashicorp.com/consul-sidecar-user-volume"
annotationConsulSidecarUserVolumeMount = "consul.hashicorp.com/consul-sidecar-user-volume-mount"
Expand All @@ -139,14 +133,11 @@ const (
annotationServiceMetricsPort = "consul.hashicorp.com/service-metrics-port"
annotationServiceMetricsPath = "consul.hashicorp.com/service-metrics-path"

// todo (agentless): uncomment once consul-dataplane supports metrics
/*
annotations for configuring TLS for Prometheus.
annotationPrometheusCAFile = "consul.hashicorp.com/prometheus-ca-file"
annotationPrometheusCAPath = "consul.hashicorp.com/prometheus-ca-path"
annotationPrometheusCertFile = "consul.hashicorp.com/prometheus-cert-file"
annotationPrometheusKeyFile = "consul.hashicorp.com/prometheus-key-file"
*/
// annotations for configuring TLS for Prometheus.
annotationPrometheusCAFile = "consul.hashicorp.com/prometheus-ca-file"
annotationPrometheusCAPath = "consul.hashicorp.com/prometheus-ca-path"
annotationPrometheusCertFile = "consul.hashicorp.com/prometheus-cert-file"
annotationPrometheusKeyFile = "consul.hashicorp.com/prometheus-key-file"

// annotationEnvoyExtraArgs is a space-separated list of arguments to be passed to the
// envoy binary. See list of args here: https://www.envoyproxy.io/docs/envoy/latest/operations/cli
Expand Down
Loading

0 comments on commit a5d7115

Please sign in to comment.