diff --git a/content/en/docs/Configuration/p8s-jaeger-grafana/tracing/tempo.md b/content/en/docs/Configuration/p8s-jaeger-grafana/tracing/tempo.md index 2ee66c076..fbf0c0dba 100644 --- a/content/en/docs/Configuration/p8s-jaeger-grafana/tracing/tempo.md +++ b/content/en/docs/Configuration/p8s-jaeger-grafana/tracing/tempo.md @@ -5,6 +5,28 @@ description: > weight: 2 --- +- [Grafana Tempo Configuration](#grafana-tempo-configuration) + - [Using the Grafana Tempo API](#using-the-grafana-tempo-api) + - [Setup the Kiali CR](#set-up-the-kiali-cr) + - [Set up a Tempo Datasource in Grafana](#set-up-a-tempo-datasource-in-grafana) + - [Additional Configuration](#additional-configuration) + - [Service check URL](#service-check-url) + - [Configuration for the Grafana Tempo Datasource](#configuration-for-the-grafana-tempo-datasource) + - [Using the Jaeger frontend with Grafana Tempo tracing backend](#using-the-jaeger-frontend-with-grafana-tempo-tracing-backend) + - [Tanka](#tanka) + - [Tempo Operator](#tempo-operator) +- [Configuration table](#configuration-table) + - [Supported Versions](#supported-versions) + - [Minimal configuration for Kiali <= 1.79](#minimal-configuration-for-kiali--179) + - [Minimal configuration for Kiali > 1.79](#minimal-configuration-for-kiali--179-1) +- [Tempo tuning](#tempo-tuning) + - [Resources consumption](#resources-consumption) + - [Caching](#caching) + - [Resources consumption](#tune-search-pipeline) + - [Dedicated attribute columns](#dedicated-attribute-columns) +- [Tempo authentication configuration](#tempo-authentication-configuration) + + ## Grafana Tempo Configuration There are two possibilities to integrate Kiali with Grafana Tempo: @@ -237,6 +259,62 @@ In `external_services.tracing` | Jaeger | `.internal_url = 'http://jaeger_service_url:16686/jaeger'`
`.use_grpc = false`
| `.internal_url = 'http://jaeger_service_url:16685/jaeger'`
`.use_grpc = true (Not required: by default)`

| | Tempo |
`internal_url = 'http://query_frontend_url:3200'`
`.use_grpc = false`
`.provider = 'tempo'`

| `.internal_url = 'http://query_frontend_url:3200'`
`.grpc_port: 9095`
`.provider: 'tempo'`
`.use_grpc = true (Not required: by default)`
| +### Tempo tuning + +#### Resources consumption + +Grafana Tempo is a powerful tool, but it can lead to performance issues when not configured correctly. +For example, the following configuration is not recommended and may lead to OOM issues for simple queries in the query-frontend component: + +```yaml +spec: + resources: + total: + limits: + memory: 2Gi + cpu: 2000m +``` + +These resources are shared between all the Tempo components. +When needed, apply resources to each specific component, instead of applying the resources globally: + +```yaml +spec: + template: + queryFrontend: + component: + resources: + limits: + cpu: "2" + memory: 2Gi +``` + +[This Grafana Dashboard](/files/tempo-dashboard.json) is available to measure the resources used in the **tempo** namespace. + +#### Caching + +Tempo offers multi-level [caching](https://grafana.com/docs/tempo/latest/operations/caching/) that is used by default with Tanka and Helm deployment examples. It uses external cache, supporting Memcached and Redis. +The lower level cache has a higher hit rate, and caches bloom filters and parquet data. +The higher level caches frontend-search data. + +Optimizing the cache depends on the application usage, and can be done modifying different parameters: + +- Connection limit for MemCached: Should be increased in large deployments, as MemCached is set to 1024 by default. +- Cache size control: Should be increased when the working set is larger than the size of cache. + +#### Tune search pipeline + +There are many parameters to [tune the search pipeline](https://grafana.com/docs/tempo/latest/operations/backend_search/), some of these: + +- max_concurrent_queries: If it is too high it can cause OOM. +- concurrent_jobs: How many jobs are done concurrently. +- max_retries: When it is too high it can result in a lot of load. + +#### Dedicated attribute columns + +When using the vParquet3 storage format , defining [dedicated attribute columns](https://grafana.com/docs/tempo/latest/operations/dedicated_columns/) can improve the query performance. +In order to best choose those columns (Up to 10), a good criteria is to choose attributes that contribute growing the block size (And not those commonly used). + ### Tempo authentication configuration The Kiali CR provides authentication configuration that will be used also for querying the version check to provide information in the Mesh graph. diff --git a/static/files/tempo-dashboard.json b/static/files/tempo-dashboard.json new file mode 100644 index 000000000..f76812fa1 --- /dev/null +++ b/static/files/tempo-dashboard.json @@ -0,0 +1,276 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 9, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 22, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "rate(container_cpu_system_seconds_total{namespace=\"tempo\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(sum) (rate(container_cpu_system_seconds_total{namespace=\"tempo\"}[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Container CPU system seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 22, + "x": 0, + "y": 11 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "container_memory_usage_bytes{namespace=\"tempo\"} / 1048576", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "interval": "", + "legendFormat": "{{pod}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum(container_memory_usage_bytes{namespace=\"tempo\"} / 1048576)", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Container memory usage MB", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timeRangeUpdatedDuringEditOrView": false, + "timepicker": {}, + "timezone": "browser", + "title": "Tempo resources consumption by pod", + "uid": "bdyyqh50uxwcgf", + "version": 3, + "weekStart": "" +} \ No newline at end of file