From 88d81246900a75b533152ef93ef1e987b4c48400 Mon Sep 17 00:00:00 2001 From: Lukasz Dziedziak Date: Mon, 28 Aug 2023 07:37:02 -0500 Subject: [PATCH] feat(kuma-cp): added docs for tls prom secured (#1438) Signed-off-by: Lukasz Dziedziak --- .github/styles/Vocab/Base/accept.txt | 2 + app/_src/explore/observability.md | 5 +- app/_src/policies/traffic-metrics.md | 295 ++++++++++++++++++++++++++- app/_src/quickstart/universal.md | 23 +++ 4 files changed, 321 insertions(+), 4 deletions(-) diff --git a/.github/styles/Vocab/Base/accept.txt b/.github/styles/Vocab/Base/accept.txt index d7985dc91..3fb3d7bbb 100644 --- a/.github/styles/Vocab/Base/accept.txt +++ b/.github/styles/Vocab/Base/accept.txt @@ -33,6 +33,7 @@ deduplicat(:?e|ion) degraphql denylist dev +disabled DNS eBPF EKS|eks @@ -130,6 +131,7 @@ targetRef tbl tcpdump TLS +tls transcoder ttl txt diff --git a/app/_src/explore/observability.md b/app/_src/explore/observability.md index 55270762e..9171f76fb 100644 --- a/app/_src/explore/observability.md +++ b/app/_src/explore/observability.md @@ -45,7 +45,10 @@ This service discovery will connect to the control plane and retrieve all data p There are 2 ways you can run prometheus: 1. Inside the mesh (default for [`kumactl install observability`](#demo-setup)). In this case you can use mTLS to retrieve the metrics. This provides high security but will require one prometheus per mesh and might not be accessible if your mesh becomes unavailable. It will also require one Prometheus deployment per {{site.mesh_product_name}} mesh. -2. Outside the mesh. In this case you'll need to specify `skipMTLS: true` in the [traffic metrics configuration](/docs/{{ page.version }}/policies/traffic-metrics). This is less secured but will ensure Prometheus is as available as possible. It is also easier to add to an existing setup with services in and outside the mesh. +2. Outside the mesh. In this case you'll need to specify {% if_version lte:2.3.x %}`skipMTLS: true`{% endif_version %}{% if_version gte:2.4.x %}`tls.mode: disabled`{% endif_version %} in the [traffic metrics configuration](/docs/{{ page.version }}/policies/traffic-metrics). This is less secure but ensures Prometheus is as available as possible. It's also easier to add to an existing setup with services in and outside the mesh. +{% if_version gte:2.4.x %} +3. Outside the mesh with TLS enabled. In this case you'll need to provide certificates for each dataplane and specify configuration in the [traffic metrics configuration](/docs/{{ page.version }}/policies/traffic-metrics#secure-metrics-with-tls). This is more secure than second option but requires more configuration. +{% endif_version %} In production, we recommend the second option as it provides better visibility when things go wrong, and it's usually acceptable for metrics to be less secure. {% endtip %} diff --git a/app/_src/policies/traffic-metrics.md b/app/_src/policies/traffic-metrics.md index 28e730252..724acfe34 100644 --- a/app/_src/policies/traffic-metrics.md +++ b/app/_src/policies/traffic-metrics.md @@ -41,6 +41,7 @@ spec: which is a shortcut for: +{% if_version lte:2.3.x %} ```yaml apiVersion: kuma.io/v1alpha1 kind: Mesh @@ -59,9 +60,33 @@ spec: tags: # tags that can be referred in Traffic Permission when metrics are secured by mTLS kuma.io/service: dataplane-metrics ``` +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +apiVersion: kuma.io/v1alpha1 +kind: Mesh +metadata: + name: default +spec: + metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + tls: + mode: activeMTLSBackend + port: 5670 + path: /metrics + tags: # tags that can be referred in Traffic Permission when metrics are secured by mTLS + kuma.io/service: dataplane-metrics +``` +{% endif_version %} + {% endtab %} {% tab expose-metrics-data-plane-proxies Universal %} +{% if_version lte:2.3.x %} ```yaml type: Mesh name: default @@ -73,9 +98,24 @@ metrics: conf: skipMTLS: true # by default mTLS metrics are also protected by mTLS. Scraping metrics with mTLS without transparent proxy is not supported at the moment. ``` - +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +type: Mesh +name: default +metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + tls: + mode: disabled +``` +{% endif_version %} which is a shortcut for: +{% if_version lte:2.3.x %} ```yaml type: Mesh name: default @@ -91,6 +131,25 @@ metrics: tags: # tags that can be referred in Traffic Permission when metrics are secured by mTLS kuma.io/service: dataplane-metrics ``` +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +type: Mesh +name: default +metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + port: 5670 + path: /metrics + tags: # tags that can be referred in Traffic Permission when metrics are secured by mTLS + kuma.io/service: dataplane-metrics + tls: + mode: disabled +``` +{% endif_version %} {% endtab %} {% endtabs %} @@ -100,6 +159,88 @@ The metrics endpoint is forwarded to the standard Envoy [Prometheus metrics endp You can pass the `filter` query parameter to limit the results to metrics whose names match a given regular expression. By default all available metrics are returned. +{% if_version gte:2.4.x %} +### Secure metrics with TLS + +{{site.mesh_product_name}} allows configuring metrics endpoint with TLS. You can use it when the `Prometheus` deployment is outside of the mesh and requires secure communication. + +{% tabs expose-metrics-data-plane-proxies-tls useUrlFragment=false %} +{% tab expose-metrics-data-plane-proxies-tls Kubernetes %} + +```yaml +apiVersion: kuma.io/v1alpha1 +kind: Mesh +metadata: + name: default +spec: + metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + port: 5670 + path: /metrics + tls: + mode: providedTLS +``` +In addition to the `Mesh` configuration, `kuma-sidecar` requires a provided certificate and key for its operation. When the certificate and key are available within the container, `kuma-sidecar` needs the paths to provided files as the following environment variables: + +* KUMA_DATAPLANE_RUNTIME_METRICS_CERT_PATH +* KUMA_DATAPLANE_RUNTIME_METRICS_KEY_PATH + +It's possible to use a [`ContainerPatch`](/docs/{{ page.version }}/production/dp-config/dpp-on-kubernetes/#custom-container-configuration) to add variables to `kuma-sidecar`: + +```yaml +apiVersion: kuma.io/v1alpha1 +kind: ContainerPatch +metadata: + name: container-patch-1 + namespace: kuma-system +spec: + sidecarPatch: + - op: add + path: /env/- + value: '{ + "name": "KUMA_DATAPLANE_RUNTIME_METRICS_CERT_PATH", + "value": "/kuma/server.crt" + }' + - op: add + path: /env/- + value: '{ + "name": "KUMA_DATAPLANE_RUNTIME_METRICS_KEY_PATH", + "value": "/kuma/server.key" + }' +``` + +{% endtab %} +{% tab expose-metrics-data-plane-proxies-tls Universal %} + +```yaml +type: Mesh +name: default +metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + port: 5670 + path: /metrics + tls: + mode: providedTLS +``` + +In addition to the `Mesh` configuration, `kuma-dp` requires a provided certificate and key for its operation. Please upload the certificate and the key to the machine, and then define the following environment variables with the correct paths: + + * KUMA_DATAPLANE_RUNTIME_METRICS_CERT_PATH + * KUMA_DATAPLANE_RUNTIME_METRICS_KEY_PATH + +{% endtab %} +{% endtabs %} + +{% endif_version %} + ## Expose metrics from applications In addition to exposing metrics from the data plane proxies, you might want to expose metrics from applications running next to the proxies. {{site.mesh_product_name}} allows scraping Prometheus metrics from the applications endpoint running in the same `Pod` or `VM`. @@ -110,7 +251,7 @@ Here are reasons where you'd want to use this feature: - Application metrics are labelled with your mesh parameters (tags, mesh, data plane name...), this means that in mixed Universal and Kubernetes mode metrics are reported with the same types of labels. - Both application and sidecar metrics are scraped at the same time. This makes sure they are coherent (with 2 different scrapers they can end up scraping at different intervals and make metrics harder to correlate). -- If you disable [passthrough](/docs/{{ page.version }}/networking/non-mesh-traffic#outgoing) and your mesh uses mTLS but Prometheus is outside the mesh (`skipMTLS: true`) this will be the only way to retrieve these metrics as the application is completely hidden behind the sidecar. +- If you disable [passthrough](/docs/{{ page.version }}/networking/non-mesh-traffic#outgoing) and your mesh uses mTLS but Prometheus is outside the mesh {% if_version lte:2.3.x %}(`skipMTLS: true`){% endif_version %}{% if_version gte:2.4.x %}(`tls.mode: disabled`){% endif_version %} this is the only way to retrieve these metrics as the app is completely hidden behind the sidecar. {% warning %} Any configuration change requires redeployment of the data plane. @@ -118,6 +259,7 @@ Any configuration change requires redeployment of the data plane. {% tabs expose-metrics-apps useUrlFragment=false %} {% tab expose-metrics-apps Kubernetes %} +{% if_version lte:2.3.x %} ```yaml apiVersion: kuma.io/v1alpha1 kind: Mesh @@ -143,8 +285,38 @@ spec: # default path is going to be used, default: /metrics port: 8000 ``` +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +apiVersion: kuma.io/v1alpha1 +kind: Mesh +metadata: + name: default +spec: + metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + port: 5670 + path: /metrics + tags: # tags that can be referred in Traffic Permission when metrics are secured by mTLS + kuma.io/service: dataplane-metrics + tls: + mode: activeMTLSBackend + aggregate: + - name: my-service # name of the metric, required to later disable/override with pod annotations + path: "/metrics/prometheus" + port: 8888 + - name: other-sidecar + # default path is going to be used, default: /metrics + port: 8000 +``` +{% endif_version %} {% endtab %} {% tab expose-metrics-apps Universal %} +{% if_version lte:2.3.x %} ```yaml type: Mesh name: default @@ -165,6 +337,30 @@ metrics: # default path is going to be used, default: /metrics port: 8000 ``` +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +type: Mesh +name: default +metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + port: 5670 + path: /metrics + tls: + mode: disabled + aggregate: + - name: my-service # name of the metric, required to later disable/override in the Dataplane resource + path: "/metrics/prometheus" + port: 8888 + - name: other-sidecar + # default path is going to be used, default: /metrics + port: 8000 +``` +{% endif_version %} {% endtab %} {% endtabs %} @@ -256,6 +452,7 @@ Proxies for this Pod expose an HTTP endpoint with Prometheus metrics on port `12 To override mesh-wide defaults on a particular machine, configure the `Dataplane` resource: +{% if_version lte:2.3.x %} ```yaml type: Dataplane mesh: default @@ -267,7 +464,21 @@ metrics: port: 1234 path: /non-standard-path ``` - +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +type: Dataplane +mesh: default +name: example +metrics: + type: prometheus + conf: + tls: + mode: activeMTLSBackend + port: 1234 + path: /non-standard-path +``` +{% endif_version %} This proxy exposes an HTTP endpoint with Prometheus metrics on port `1234` and URI path `/non-standard-path`. {% endtab %} {% endtabs %} @@ -278,6 +489,7 @@ In case you don't want to retrieve all Envoy's metrics, it's possible to filter {% tabs envoy useUrlFragment=false %} {% tab envoy Kubernetes %} +{% if_version lte:2.3.x %} ```yaml apiVersion: kuma.io/v1alpha1 kind: Mesh @@ -297,6 +509,29 @@ spec: filterRegex: http2_act.* usedOnly: true ``` +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +apiVersion: kuma.io/v1alpha1 +kind: Mesh +metadata: + name: default +spec: + metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + tls: + mode: activeMTLSBackend + port: 5670 + path: /metrics + envoy: + filterRegex: http2_act.* + usedOnly: true +``` +{% endif_version %} {% endtab %} {% tab envoy Universal %} ```yaml @@ -324,6 +559,7 @@ metrics: {% tabs secure-data-plane-proxy-metrics useUrlFragment=false %} {% tab secure-data-plane-proxy-metrics Kubernetes %} Make sure that mTLS is enabled in the mesh. +{% if_version lte:2.3.x %} ```yaml apiVersion: kuma.io/v1alpha1 kind: Mesh @@ -347,6 +583,33 @@ spec: tags: # tags that can be referred in a TrafficPermission resource kuma.io/service: dataplane-metrics ``` +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +apiVersion: kuma.io/v1alpha1 +kind: Mesh +metadata: + name: default +spec: + mtls: + enabledBackend: ca-1 + backends: + - name: ca-1 + type: builtin + metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + port: 5670 + path: /metrics + tls: + mode: activeMTLSBackend + tags: # tags that can be referred in a TrafficPermission resource + kuma.io/service: dataplane-metrics +``` +{% endif_version %} If you have strict [traffic permissions](/docs/{{ page.version }}/policies/traffic-permissions) you will want to allow the traffic from Grafana to Prometheus and from Prometheus to data plane proxy metrics: @@ -380,6 +643,7 @@ spec: {% endtab %} {% tab secure-data-plane-proxy-metrics Universal %} Make sure that mTLS is enabled in the mesh. +{% if_version lte:2.3.x %} ```yaml type: Mesh name: default @@ -401,6 +665,31 @@ spec: tags: # tags that can be referred in a TrafficPermission resource kuma.io/service: dataplane-metrics ``` +{% endif_version %} +{% if_version gte:2.4.x %} +```yaml +type: Mesh +name: default +spec: + mtls: + enabledBackend: ca-1 + backends: + - name: ca-1 + type: builtin + metrics: + enabledBackend: prometheus-1 + backends: + - name: prometheus-1 + type: prometheus + conf: + port: 5670 + path: /metrics + tls: + mode: activeMTLSBackend + tags: # tags that can be referred in a TrafficPermission resource + kuma.io/service: dataplane-metrics +``` +{% endif_version %} If you have strict [traffic permissions](/docs/{{ page.version }}/policies/traffic-permissions) you will want to allow the traffic from Grafana to Prometheus and from Prometheus to data plane proxy metrics: diff --git a/app/_src/quickstart/universal.md b/app/_src/quickstart/universal.md index 42f332421..5c2f1b4a5 100644 --- a/app/_src/quickstart/universal.md +++ b/app/_src/quickstart/universal.md @@ -211,6 +211,7 @@ One of the most important [policies](/policies) that {{site.mesh_product_name}} With Traffic Metrics we can leverage Prometheus and Grafana to provide powerful dashboards that visualize the overall traffic activity of our application and the status of the service mesh. +{% if_version lte:2.3.x %} ```sh cat <