diff --git a/jsonnetfile.lock.json b/jsonnetfile.lock.json index 109d5056..9fbf94a7 100644 --- a/jsonnetfile.lock.json +++ b/jsonnetfile.lock.json @@ -29,7 +29,7 @@ "subdir": "contrib/mixin" } }, - "version": "f7bae0da4ee0b62a0874b8d0cf3640580ee45597", + "version": "a7136933ddb07652f5136cf35d454b805a2a615b", "sum": "IkDHlaE0gvvcPjSNurFT+jQ2aCOAbqHF1WVmXbAgkds=" }, { @@ -39,8 +39,8 @@ "subdir": "operations/observability/mixins" } }, - "version": "cf39aba54afdfca8dd6f0f4c2aa262ffbbfb3fc9", - "sum": "7ZMog6DcquMx8FRSmkNSKL6e7DyTMez2RV0DwqMwQ9k=", + "version": "62a663912eb99f82e7d7f7fbece3a971197c2f25", + "sum": "yOyZudPcE0EVTBtW0gTf1/vRt60NCf3U2+DUiSDNVEw=", "name": "gitpod" }, { @@ -172,7 +172,7 @@ "subdir": "documentation/prometheus-mixin" } }, - "version": "d2701be53ae7de46b77779121a52451cd822cc77", + "version": "35d6813963b3f82c5d07f3602cc5e6decd12f53e", "sum": "Dq+wurABxuqRAHj4DGp2sCmjJWzNjrhP2XEScsS0kmY=", "name": "prometheus" }, diff --git a/monitoring-satellite/manifests/grafana/dashboardDefinitions.yaml b/monitoring-satellite/manifests/grafana/dashboardDefinitions.yaml index 75176605..f2e091ef 100644 --- a/monitoring-satellite/manifests/grafana/dashboardDefinitions.yaml +++ b/monitoring-satellite/manifests/grafana/dashboardDefinitions.yaml @@ -71597,8 +71597,203 @@ items: ], "title": "Scheduled job duration", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 16, + "panels": [ + + ], + "title": "Ledger reconciler", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 900 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 18, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "time() - max_over_time(max(gitpod_usage_ledger_last_completed_time{outcome=\"success\"}[15m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Time since last successful run", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [ + + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 900 + } + ] + }, + "unit": "s" + }, + "overrides": [ + + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "time() - max_over_time(max(gitpod_usage_ledger_last_completed_time{outcome=\"success\"}[15m]))", + "legendFormat": "success", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "time() - max_over_time(max(gitpod_usage_ledger_last_completed_time{outcome=\"error\"}[15m]))", + "hide": false, + "legendFormat": "error", + "range": true, + "refId": "B" + } + ], + "title": "Time since last ledger completion, by outcome", + "type": "timeseries" } ], + "refresh": false, "schemaVersion": 37, "style": "dark", "tags": [ @@ -71716,7 +71911,7 @@ items: "timezone": "utc", "title": "Component: Usage", "uid": "8W7P-jg4z", - "version": 1, + "version": 2, "weekStart": "monday" } kind: ConfigMap diff --git a/vendor/github.com/gitpod-io/gitpod/operations/observability/mixins/meta/dashboards/components/usage.json b/vendor/github.com/gitpod-io/gitpod/operations/observability/mixins/meta/dashboards/components/usage.json index 2b773424..0ccf03b3 100644 --- a/vendor/github.com/gitpod-io/gitpod/operations/observability/mixins/meta/dashboards/components/usage.json +++ b/vendor/github.com/gitpod-io/gitpod/operations/observability/mixins/meta/dashboards/components/usage.json @@ -462,8 +462,191 @@ ], "title": "Scheduled job duration", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 16, + "panels": [], + "title": "Ledger reconciler", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 900 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 18, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "time() - max_over_time(max(gitpod_usage_ledger_last_completed_time{outcome=\"success\"}[15m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Time since last successful run", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 900 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "time() - max_over_time(max(gitpod_usage_ledger_last_completed_time{outcome=\"success\"}[15m]))", + "legendFormat": "success", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "time() - max_over_time(max(gitpod_usage_ledger_last_completed_time{outcome=\"error\"}[15m]))", + "hide": false, + "legendFormat": "error", + "range": true, + "refId": "B" + } + ], + "title": "Time since last ledger completion, by outcome", + "type": "timeseries" } ], + "refresh": false, "schemaVersion": 37, "style": "dark", "tags": [], @@ -571,6 +754,6 @@ "timezone": "utc", "title": "Component: Usage", "uid": "8W7P-jg4z", - "version": 1, + "version": 2, "weekStart": "monday" } diff --git a/vendor/github.com/gitpod-io/gitpod/operations/observability/mixins/meta/rules/usage.yaml b/vendor/github.com/gitpod-io/gitpod/operations/observability/mixins/meta/rules/usage.yaml index 89a70760..0473acb8 100644 --- a/vendor/github.com/gitpod-io/gitpod/operations/observability/mixins/meta/rules/usage.yaml +++ b/vendor/github.com/gitpod-io/gitpod/operations/observability/mixins/meta/rules/usage.yaml @@ -35,3 +35,14 @@ spec: runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodUsageReconcileInvoicesFailures.md summary: There are failed Stripe invoice reconciliations. description: We have accumulated {{ printf "%.2f" $value }} failures. This affects how much customers will be billed. + + - alert: GitpodUsageTooLongSinceLastSuccessfulLedgerReconciliation + expr: (time() - gitpod_usage_ledger_last_completed_time{outcome!="success"}) > 60 * 60 + for: 30m + labels: + severity: warning + team: webapp + annotations: + runbook_url: https://github.com/gitpod-io/runbooks/blob/main/runbooks/GitpodUsageTooLongSinceLastSuccessfulLedgerReconciliation.md + summary: Usage reconciliation has not run successfully for {{ printf "%.2f" $value }} seconds. Usage data is stale. + description: We have not executed scheduled usage reconciliation for {{ printf "%.2f" $value }} seconds. We expect the data to update every 15 minutes to avoid stale usage records and stale invoices.