Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API gateway metrics #3811

Merged
merged 11 commits into from
Mar 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/3811.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
api-gateway: Expose prometheus scrape metrics on api-gateway pods.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
api-gateway: Expose prometheus scrape metrics on api-gateway pods.
api-gateway: Expose Prometheus scrape metrics on api-gateway pods.

```
17 changes: 17 additions & 0 deletions charts/consul/templates/crd-gatewayclassconfigs-v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,23 @@ spec:
for gateway containers
format: int32
type: integer
metrics:
description: Metrics defines how to configure the metrics for a gateway.
properties:
enabled:
description: Enable metrics for this class of gateways. If unspecified,
will inherit behavior from the global Helm configuration.
type: boolean
path:
description: The path used for metrics.
type: string
port:
description: The port used for metrics.
format: int32
maximum: 65535
minimum: 1024
type: integer
type: object
nodeSelector:
additionalProperties:
type: string
Expand Down
9 changes: 9 additions & 0 deletions charts/consul/templates/gateway-resources-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ spec:
- -openshift-scc-name={{ .Values.connectInject.apiGateway.managedGatewayClass.openshiftSCCName }}
{{- end }}
- -map-privileged-container-ports={{ .Values.connectInject.apiGateway.managedGatewayClass.mapPrivilegedContainerPorts }}
{{- if (ne (.Values.connectInject.apiGateway.managedGatewayClass.metrics.enabled | toString) "-") }}
- -enable-metrics={{ .Values.connectInject.apiGateway.managedGatewayClass.metrics.enabled | toString }}
{{- end }}
{{- if .Values.connectInject.apiGateway.managedGatewayClass.metrics.path }}
- -metrics-path={{ .Values.connectInject.apiGateway.managedGatewayClass.metrics.path }}
{{- end }}
{{- if .Values.connectInject.apiGateway.managedGatewayClass.metrics.port }}
- -metrics-port={{ .Values.connectInject.apiGateway.managedGatewayClass.metrics.port }}
{{- end }}
resources:
requests:
memory: "50Mi"
Expand Down
13 changes: 13 additions & 0 deletions charts/consul/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2417,6 +2417,19 @@ connectInject:
# @type: string
service: null

# Metrics settings for gateways created with this gateway class configuration.
metrics:
# This value enables or disables metrics collection on a gateway, overriding the global gateway metrics collection settings.
# @type: boolean
enabled: "-"
# This value sets the port to use for scraping gateway metrics via prometheus, defaults to 20200 if not set. Must be in the port
# range of 1024-65535.
# @type: int
port: null
# This value sets the path to use for scraping gateway metrics via prometheus, defaults to /metrics if not set.
# @type: string
path: null

# The resource settings for Pods handling traffic for Gateway API.
# @recurse: false
# @type: map
Expand Down
6 changes: 5 additions & 1 deletion control-plane/api-gateway/binding/binder.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ type BinderConfig struct {

// Policies is a list containing all GatewayPolicies that are part of the Gateway Deployment
Policies []v1alpha1.GatewayPolicy

// Configuration from helm.
HelmConfig common.HelmConfig
}

// Binder is used for generating a Snapshot of all operations that should occur both
Expand Down Expand Up @@ -199,7 +202,8 @@ func (b *Binder) Snapshot() *Snapshot {
OnUpdate: b.handleGatewaySyncStatus(snapshot, &b.config.Gateway, consulStatus),
})

registrations := registrationsForPods(entry.Namespace, b.config.Gateway, registrationPods)
metricsConfig := common.GatewayMetricsConfig(b.config.Gateway, *gatewayClassConfig, b.config.HelmConfig)
registrations := registrationsForPods(metricsConfig, entry.Namespace, b.config.Gateway, registrationPods)
snapshot.Consul.Registrations = registrations

// deregister any not explicitly registered service
Expand Down
20 changes: 17 additions & 3 deletions control-plane/api-gateway/binding/registration.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package binding
import (
"fmt"

gatewaycommon "github.com/hashicorp/consul-k8s/control-plane/api-gateway/common"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/common"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/constants"
"github.com/hashicorp/consul/api"
Expand All @@ -22,22 +23,34 @@ const (

// consulKubernetesCheckName is the name of health check in Consul for Kubernetes readiness status.
consulKubernetesCheckName = "Kubernetes Readiness Check"

// metricsConfiguration is the configuration key for binding a prometheus port to the envoy instance.
metricsConfiguration = "envoy_prometheus_bind_addr"
)

func registrationsForPods(namespace string, gateway gwv1beta1.Gateway, pods []corev1.Pod) []api.CatalogRegistration {
func registrationsForPods(metrics gatewaycommon.MetricsConfig, namespace string, gateway gwv1beta1.Gateway, pods []corev1.Pod) []api.CatalogRegistration {
registrations := []api.CatalogRegistration{}
for _, pod := range pods {
registrations = append(registrations, registrationForPod(namespace, gateway, pod))
registrations = append(registrations, registrationForPod(metrics, namespace, gateway, pod))
}
return registrations
}

func registrationForPod(namespace string, gateway gwv1beta1.Gateway, pod corev1.Pod) api.CatalogRegistration {
func registrationForPod(metrics gatewaycommon.MetricsConfig, namespace string, gateway gwv1beta1.Gateway, pod corev1.Pod) api.CatalogRegistration {
healthStatus := api.HealthCritical
if isPodReady(pod) {
healthStatus = api.HealthPassing
}

var proxyConfigOverrides *api.AgentServiceConnectProxyConfig
if metrics.Enabled {
proxyConfigOverrides = &api.AgentServiceConnectProxyConfig{
Config: map[string]interface{}{
metricsConfiguration: fmt.Sprintf("%s:%d", pod.Status.PodIP, metrics.Port),
},
}
}

return api.CatalogRegistration{
Node: common.ConsulNodeNameFromK8sNode(pod.Spec.NodeName),
Address: pod.Status.HostIP,
Expand All @@ -50,6 +63,7 @@ func registrationForPod(namespace string, gateway gwv1beta1.Gateway, pod corev1.
Service: gateway.Name,
Address: pod.Status.PodIP,
Namespace: namespace,
Proxy: proxyConfigOverrides,
Meta: map[string]string{
constants.MetaKeyPodName: pod.Name,
constants.MetaKeyKubeNS: pod.Namespace,
Expand Down
3 changes: 2 additions & 1 deletion control-plane/api-gateway/binding/registration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package binding
import (
"testing"

"github.com/hashicorp/consul-k8s/control-plane/api-gateway/common"
"github.com/hashicorp/consul/api"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -68,7 +69,7 @@ func TestRegistrationsForPods_Health(t *testing.T) {
},
} {
t.Run(name, func(t *testing.T) {
registrations := registrationsForPods(tt.consulNamespace, tt.gateway, tt.pods)
registrations := registrationsForPods(common.MetricsConfig{}, tt.consulNamespace, tt.gateway, tt.pods)
require.Len(t, registrations, len(tt.expected))

for i := range registrations {
Expand Down
10 changes: 10 additions & 0 deletions control-plane/api-gateway/common/helm_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ type HelmConfig struct {
// defined on a Gateway.
MapPrivilegedServicePorts int

// EnableGatewayMetrics indicates whether or not gateway metrics should be enabled
// by default on a deployed gateway, passed from the helm chart via command-line flags to our controller.
EnableGatewayMetrics bool

// The default path to use for scraping prometheus metrics, passed from the helm chart via command-line flags to our controller.
DefaultPrometheusScrapePath string

// The default port to use for scraping prometheus metrics, passed from the helm chart via command-line flags to our controller.
DefaultPrometheusScrapePort string

InitContainerResources *v1.ResourceRequirements
}

Expand Down
103 changes: 103 additions & 0 deletions control-plane/api-gateway/common/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package common

import (
"strconv"

"github.com/hashicorp/consul-k8s/control-plane/api/v1alpha1"
"github.com/hashicorp/consul-k8s/control-plane/connect-inject/constants"
gwv1beta1 "sigs.k8s.io/gateway-api/apis/v1beta1"
)

const (
defaultScrapePort = 20200
defaultScrapePath = "/metrics"
)

type MetricsConfig struct {
Enabled bool
Path string
Port int
}

func gatewayMetricsEnabled(gateway gwv1beta1.Gateway, gcc v1alpha1.GatewayClassConfig, config HelmConfig) bool {
// first check our annotations, if something is there, then it means we've explicitly
// annotated metrics collection
if scrape, isSet := gateway.Annotations[constants.AnnotationEnableMetrics]; isSet {
enabled, err := strconv.ParseBool(scrape)
if err == nil {
return enabled
}
// TODO: log an error
// we fall through to the other metrics enabled checks
}

// if it's not set on the annotation, then we check to see if it's set on the GatewayClassConfig
if gcc.Spec.Metrics.Enabled != nil {
return *gcc.Spec.Metrics.Enabled
}

// otherwise, fallback to the global helm setting
return config.EnableGatewayMetrics
}

func fetchPortString(gateway gwv1beta1.Gateway, gcc v1alpha1.GatewayClassConfig, config HelmConfig) string {
// first check our annotations, if something is there, then it means we've explicitly
// annotated metrics collection
if portString, isSet := gateway.Annotations[constants.AnnotationPrometheusScrapePort]; isSet {
return portString
}

// if it's not set on the annotation, then we check to see if it's set on the GatewayClassConfig
if gcc.Spec.Metrics.Port != nil {
return strconv.Itoa(int(*gcc.Spec.Metrics.Port))
}

// otherwise, fallback to the global helm setting
return config.DefaultPrometheusScrapePort
}

func gatewayMetricsPort(gateway gwv1beta1.Gateway, gcc v1alpha1.GatewayClassConfig, config HelmConfig) int {
portString := fetchPortString(gateway, gcc, config)

port, err := strconv.Atoi(portString)
if err != nil {
// if we can't parse the port string, just use the default
// TODO: log an error
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you intending to merge with this TODO in place?

return defaultScrapePort
}

if port < 1024 || port > 65535 {
// if we requested a privileged port, use the default
// TODO: log an error
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, for now just to get this out the door, we can go back later and pass a logger instance into this if we want to log on an invalid port.

return defaultScrapePort
}

return port
}

func gatewayMetricsPath(gateway gwv1beta1.Gateway, gcc v1alpha1.GatewayClassConfig, config HelmConfig) string {
// first check our annotations, if something is there, then it means we've explicitly
// annotated metrics collection
if path, isSet := gateway.Annotations[constants.AnnotationPrometheusScrapePath]; isSet {
return path
}

// if it's not set on the annotation, then we check to see if it's set on the GatewayClassConfig
if gcc.Spec.Metrics.Path != nil {
return *gcc.Spec.Metrics.Path
}

// otherwise, fallback to the global helm setting
return config.DefaultPrometheusScrapePath
Comment on lines +82 to +94
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be doing any validation on the path here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, yes, but from what I can tell we don't really have solid path validation anywhere else in our implementation (unless you can point me to a prior art?). Was thinking we'd just keep things equivalent for now, but we could see if we could url.Parse or regex match the url string if we wanted to? I believe that if this is invalid you'll just get a dataplane process that fails to start up envoy.

}

func GatewayMetricsConfig(gateway gwv1beta1.Gateway, gcc v1alpha1.GatewayClassConfig, config HelmConfig) MetricsConfig {
return MetricsConfig{
Enabled: gatewayMetricsEnabled(gateway, gcc, config),
Path: gatewayMetricsPath(gateway, gcc, config),
Port: gatewayMetricsPort(gateway, gcc, config),
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ func (r *GatewayController) Reconcile(ctx context.Context, req ctrl.Request) (ct
ConsulGateway: consulGateway,
ConsulGatewayServices: consulServices,
Policies: policies,
HelmConfig: r.HelmConfig,
})

updates := binder.Snapshot()
Expand Down
23 changes: 16 additions & 7 deletions control-plane/api-gateway/gatekeeper/dataplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@ const (
netBindCapability = "NET_BIND_SERVICE"
consulDataplaneDNSBindHost = "127.0.0.1"
consulDataplaneDNSBindPort = 8600
defaultPrometheusScrapePath = "/metrics"
defaultEnvoyProxyConcurrency = 1
volumeName = "consul-connect-inject-data"
)

func consulDataplaneContainer(config common.HelmConfig, gcc v1alpha1.GatewayClassConfig, name, namespace string) (corev1.Container, error) {
func consulDataplaneContainer(metrics common.MetricsConfig, config common.HelmConfig, gcc v1alpha1.GatewayClassConfig, name, namespace string) (corev1.Container, error) {
// Extract the service account token's volume mount.
var (
err error
Expand All @@ -38,7 +37,7 @@ func consulDataplaneContainer(config common.HelmConfig, gcc v1alpha1.GatewayClas
bearerTokenFile = "/var/run/secrets/kubernetes.io/serviceaccount/token"
}

args, err := getDataplaneArgs(namespace, config, bearerTokenFile, name)
args, err := getDataplaneArgs(metrics, namespace, config, bearerTokenFile, name)
if err != nil {
return corev1.Container{}, err
}
Expand Down Expand Up @@ -100,6 +99,15 @@ func consulDataplaneContainer(config common.HelmConfig, gcc v1alpha1.GatewayClas
Name: "proxy-health",
ContainerPort: int32(constants.ProxyDefaultHealthPort),
})

if metrics.Enabled {
container.Ports = append(container.Ports, corev1.ContainerPort{
Name: "prometheus",
ContainerPort: int32(metrics.Port),
Protocol: corev1.ProtocolTCP,
})
}

// Configure the resource requests and limits for the proxy if they are set.
if gcc.Spec.DeploymentSpec.Resources != nil {
container.Resources = *gcc.Spec.DeploymentSpec.Resources
Expand All @@ -122,7 +130,7 @@ func consulDataplaneContainer(config common.HelmConfig, gcc v1alpha1.GatewayClas
return container, nil
}

func getDataplaneArgs(namespace string, config common.HelmConfig, bearerTokenFile string, name string) ([]string, error) {
func getDataplaneArgs(metrics common.MetricsConfig, namespace string, config common.HelmConfig, bearerTokenFile string, name string) ([]string, error) {
proxyIDFileName := "/consul/connect-inject/proxyid"
envoyConcurrency := defaultEnvoyProxyConcurrency

Expand Down Expand Up @@ -170,9 +178,10 @@ func getDataplaneArgs(namespace string, config common.HelmConfig, bearerTokenFil

args = append(args, fmt.Sprintf("-envoy-admin-bind-port=%d", 19000))

// Set a default scrape path that can be overwritten by the annotation.
prometheusScrapePath := defaultPrometheusScrapePath
args = append(args, "-telemetry-prom-scrape-path="+prometheusScrapePath)
if metrics.Enabled {
// Set up metrics collection.
args = append(args, "-telemetry-prom-scrape-path="+metrics.Path)
}

return args, nil
}
25 changes: 18 additions & 7 deletions control-plane/api-gateway/gatekeeper/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package gatekeeper

import (
"context"
"strconv"

"github.com/google/go-cmp/cmp"
appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -92,7 +93,21 @@ func (g *Gatekeeper) deployment(gateway gwv1beta1.Gateway, gcc v1alpha1.GatewayC
return nil, err
}

container, err := consulDataplaneContainer(config, gcc, gateway.Name, gateway.Namespace)
annotations := map[string]string{
"consul.hashicorp.com/connect-inject": "false",
constants.AnnotationGatewayConsulServiceName: gateway.Name,
constants.AnnotationGatewayKind: "api-gateway",
}

metrics := common.GatewayMetricsConfig(gateway, gcc, config)

if metrics.Enabled {
annotations[constants.AnnotationPrometheusScrape] = "true"
annotations[constants.AnnotationPrometheusPath] = metrics.Path
annotations[constants.AnnotationPrometheusPort] = strconv.Itoa(metrics.Port)
}

container, err := consulDataplaneContainer(metrics, config, gcc, gateway.Name, gateway.Namespace)
if err != nil {
return nil, err
}
Expand All @@ -110,12 +125,8 @@ func (g *Gatekeeper) deployment(gateway gwv1beta1.Gateway, gcc v1alpha1.GatewayC
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: common.LabelsForGateway(&gateway),
Annotations: map[string]string{
constants.AnnotationInject: "false",
constants.AnnotationGatewayConsulServiceName: gateway.Name,
constants.AnnotationGatewayKind: "api-gateway",
},
Labels: common.LabelsForGateway(&gateway),
Annotations: annotations,
},
Spec: corev1.PodSpec{
Volumes: []corev1.Volume{
Expand Down
Loading
Loading