From 92f08ce20d678870e4c0a272cccb5068f7ebc134 Mon Sep 17 00:00:00 2001 From: YangKeao Date: Tue, 7 Feb 2023 16:43:19 +0800 Subject: [PATCH] add metrics for TTL Signed-off-by: YangKeao --- metrics/grafana/tidb.json | 600 ++++++++++++++---- metrics/metrics.go | 1 + metrics/ttl.go | 8 + session/BUILD.bazel | 3 + session/session.go | 3 + session/session_test.go | 49 ++ sessionctx/variable/session.go | 5 + table/tables/tables.go | 7 + ttl/metrics/metrics.go | 3 +- ttl/ttlworker/task_manager.go | 12 +- .../task_manager_integration_test.go | 2 +- 11 files changed, 579 insertions(+), 114 deletions(-) diff --git a/metrics/grafana/tidb.json b/metrics/grafana/tidb.json index f8b125746a804..59daac4b0720e 100644 --- a/metrics/grafana/tidb.json +++ b/metrics/grafana/tidb.json @@ -3706,7 +3706,7 @@ "avg": false, "current": false, "max": false, - "min": false, + "min": false, "show": true, "total": false, "values": false @@ -16957,11 +16957,11 @@ "panels": [ { "aliasColors": {}, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "kv request count by instance and command source", + "description": "TiDB cpu usage calculated with process cpu running seconds", "editable": true, "error": false, "fieldConfig": { @@ -16972,28 +16972,26 @@ "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 0, - "y": 15 + "y": 20 }, "hiddenSeries": false, - "id": 259, + "id": 297, "legend": { "alignAsTable": true, - "avg": true, - "current": false, - "max": true, + "avg": false, + "current": true, + "max": false, "min": false, "rightSide": true, "show": true, - "sort": "max", - "sortDesc": true, "total": false, "values": true }, - "lines": false, - "linewidth": 2, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", "options": { @@ -17006,46 +17004,43 @@ "renderer": "flot", "seriesOverrides": [ { - "alias": "KV Requst Count", - "bars": false, - "color": "#FADE2A", - "lines": true, - "linewidth": 1, - "stack": false + "alias": "total", + "fill": 0, + "lines": false + }, + { + "alias": "/limit/", + "color": "#C4162A", + "fill": 0, + "nullPointMode": "null" } ], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { - "exemplar": true, - "expr": "sum(rate(tidb_tikvclient_request_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type, source)", + "expr": "irate(process_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=\"tidb\"}[30s])", "format": "time_series", - "interval": "", + "hide": false, "intervalFactor": 1, - "legendFormat": "{{instance}}-{{type}}-{{source}}", + "legendFormat": "{{instance}}", "refId": "A", "step": 40 }, { - "exemplar": true, - "expr": "sum(rate(tidb_tikvclient_request_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "KV Requst Count", - "refId": "B", - "step": 40 + "expr": "tidb_server_maxprocs{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=\"tidb\"}", + "legendFormat": "limit-{{instance}}", + "refId": "B" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "KV Request OPS by source", + "title": "TiDB CPU Usage", "tooltip": { - "msResolution": true, + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -17060,8 +17055,7 @@ }, "yaxes": [ { - "$$hashKey": "object:62", - "format": "short", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -17069,7 +17063,6 @@ "show": true }, { - "$$hashKey": "object:63", "format": "short", "label": null, "logBase": 1, @@ -17085,11 +17078,12 @@ }, { "aliasColors": {}, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "kv request time by instance and command source", + "decimals": null, + "description": "IO MBps: The total bytes of read and write in all TiKV instances", "editable": true, "error": false, "fieldConfig": { @@ -17100,29 +17094,27 @@ "fillGradient": 0, "grid": {}, "gridPos": { - "h": 7, + "h": 8, "w": 12, "x": 12, - "y": 15 + "y": 20 }, "hiddenSeries": false, - "id": 260, + "id": 301, "legend": { "alignAsTable": true, "avg": true, "current": false, "hideEmpty": true, "hideZero": true, - "max": true, + "max": false, "min": false, "rightSide": true, "show": true, - "sort": "max", - "sortDesc": true, "total": false, "values": true }, - "lines": false, + "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null as zero", @@ -17134,46 +17126,155 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "KV Requst Time", - "bars": false, - "color": "#FADE2A", - "lines": true, - "linewidth": 1, - "stack": false - } - ], + "seriesOverrides": [], "spaceLength": 10, - "stack": true, + "stack": false, "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "sum(rate(tidb_tikvclient_request_time_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m])) by (instance, type, source)", - "format": "time_series", + "expr": "avg(sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", db=\"kv\", type=~\"wal_file_bytes|bytes_read|iter_bytes_read\"}[1m])) by (instance))", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}-{{type}}-{{source}}", - "refId": "A", - "step": 40 + "legendFormat": "IO-Avg", + "refId": "D" }, { "exemplar": true, - "expr": "sum(rate(tidb_tikvclient_request_time_counter{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", - "format": "time_series", + "expr": "max(sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", db=\"kv\", type=~\"wal_file_bytes|bytes_read|iter_bytes_read\"}[1m])) by (instance))", + "hide": false, "interval": "", - "legendFormat": "KV Requst Time", - "refId": "B", - "step": 40 + "legendFormat": "IO-Max", + "refId": "E" + }, + { + "exemplar": true, + "expr": "max(sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", db=\"kv\", type=~\"wal_file_bytes|bytes_read|iter_bytes_read\"}[1m])) by (instance)) - min(sum(rate(tikv_engine_flow_bytes{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", db=\"kv\", type=~\"wal_file_bytes|bytes_read|iter_bytes_read\"}[1m])) by (instance))", + "hide": false, + "interval": "", + "legendFormat": "IO-Delta", + "refId": "F" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "KV Request Time by source", + "title": "TiKV IO MBps", "tooltip": { - "msResolution": true, + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:208", + "decimals": null, + "format": "Bps", + "label": "MBps", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:209", + "decimals": null, + "format": "Bps", + "label": "MBps ", + "logBase": 1, + "max": null, + "min": "0", + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "decimals": 1, + "description": "The CPU usage of each TiKV instance", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 28 + }, + "hiddenSeries": false, + "id": 299, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": null, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(process_cpu_seconds_total{k8s_cluster=\"$k8s_cluster\", tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", job=~\".*tikv\"}[1m])) by (instance)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TiKV CPU", + "tooltip": { + "msResolution": false, "shared": true, "sort": 0, "value_type": "individual" @@ -17188,8 +17289,7 @@ }, "yaxes": [ { - "$$hashKey": "object:62", - "format": "s", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -17197,7 +17297,6 @@ "show": true }, { - "$$hashKey": "object:63", "format": "short", "label": null, "logBase": 1, @@ -17210,22 +17309,7 @@ "align": false, "alignLevel": null } - } - ], - "title": "SourceSQL", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 75 - }, - "id": 274, - "panels": [ + }, { "aliasColors": {}, "bars": false, @@ -17242,8 +17326,8 @@ "gridPos": { "h": 8, "w": 12, - "x": 0, - "y": 76 + "x": 12, + "y": 28 }, "hiddenSeries": false, "id": 279, @@ -17264,7 +17348,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -17346,7 +17430,6 @@ "dashLength": 10, "dashes": false, "datasource": "${DS_TEST-CLUSTER}", - "description": "The processed rows per second by TTL jobs", "fieldConfig": { "defaults": {}, "overrides": [] @@ -17356,12 +17439,13 @@ "gridPos": { "h": 8, "w": 12, - "x": 12, - "y": 76 + "x": 0, + "y": 36 }, "hiddenSeries": false, - "id": 287, + "id": 302, "legend": { + "alignAsTable": false, "avg": false, "current": false, "max": false, @@ -17378,16 +17462,110 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", - "seriesOverrides": [ + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { - "alias": "delete error", - "color": "#F2495C" + "exemplar": true, + "expr": "sum(rate(tidb_server_ttl_insert_rows{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1m]))", + "interval": "", + "legendFormat": "insert rows per second", + "queryType": "randomWalk", + "refId": "A" } ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TTL Insert Rows Per Second", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:394", + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:395", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The processed rows per second by TTL jobs", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "hiddenSeries": false, + "id": 287, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -17421,6 +17599,7 @@ }, "yaxes": [ { + "$$hashKey": "object:1185", "format": "short", "label": null, "logBase": 1, @@ -17429,12 +17608,211 @@ "show": true }, { + "$$hashKey": "object:1186", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 44 + }, + "hiddenSeries": false, + "id": 296, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_server_ttl_insert_rows{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\"}[1h]))", + "interval": "1h", + "legendFormat": "insert rows per hour", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TTL Insert Rows Per Hour", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:394", + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:395", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_TEST-CLUSTER}", + "description": "The rows deleted per hour by TTL jobs", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 44 + }, + "hiddenSeries": false, + "id": 303, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "7.5.11", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(increase(tidb_server_ttl_processed_expired_rows{k8s_cluster=\"$k8s_cluster\",tidb_cluster=\"$tidb_cluster\", instance=~\"$instance\", sql_type=\"delete\", result=\"ok\"}[1h])) by (sql_type, result)", + "interval": "1h", + "legendFormat": "delete rows per hour", + "queryType": "randomWalk", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TTL Delete Rows Per Hour", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1185", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", "show": true + }, + { + "$$hashKey": "object:1186", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } ], "yaxis": { @@ -17459,7 +17837,7 @@ "h": 8, "w": 12, "x": 0, - "y": 84 + "y": 52 }, "hiddenSeries": false, "id": 284, @@ -17480,7 +17858,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -17580,7 +17958,7 @@ "h": 8, "w": 12, "x": 12, - "y": 84 + "y": 52 }, "hiddenSeries": false, "id": 285, @@ -17601,7 +17979,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -17701,7 +18079,7 @@ "h": 8, "w": 12, "x": 0, - "y": 92 + "y": 60 }, "hiddenSeries": false, "id": 276, @@ -17722,7 +18100,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -17831,7 +18209,7 @@ "h": 8, "w": 12, "x": 12, - "y": 92 + "y": 60 }, "hiddenSeries": false, "id": 282, @@ -17852,7 +18230,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -17961,7 +18339,7 @@ "h": 8, "w": 12, "x": 0, - "y": 100 + "y": 68 }, "hiddenSeries": false, "id": 281, @@ -17982,7 +18360,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", @@ -18067,7 +18445,7 @@ "h": 8, "w": 12, "x": 12, - "y": 100 + "y": 68 }, "hiddenSeries": false, "id": 294, @@ -18088,7 +18466,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.10", + "pluginVersion": "7.5.11", "pointradius": 2, "points": false, "renderer": "flot", diff --git a/metrics/metrics.go b/metrics/metrics.go index 6767d2dfc6adb..1e25f9438c794 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -218,6 +218,7 @@ func RegisterMetrics() { prometheus.MustRegister(TTLJobStatus) prometheus.MustRegister(TTLTaskStatus) prometheus.MustRegister(TTLPhaseTime) + prometheus.MustRegister(TTLInsertRowsCount) prometheus.MustRegister(EMACPUUsageGauge) prometheus.MustRegister(PoolConcurrencyCounter) diff --git a/metrics/ttl.go b/metrics/ttl.go index 754744e93d1d8..52782b08dba76 100644 --- a/metrics/ttl.go +++ b/metrics/ttl.go @@ -58,4 +58,12 @@ var ( Name: "ttl_phase_time", Help: "The time spent in each phase", }, []string{LblType, LblPhase}) + + TTLInsertRowsCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: "tidb", + Subsystem: "server", + Name: "ttl_insert_rows", + Help: "The count of TTL rows inserted", + }) ) diff --git a/session/BUILD.bazel b/session/BUILD.bazel index f5ef849b3a036..2af8f62b74fa9 100644 --- a/session/BUILD.bazel +++ b/session/BUILD.bazel @@ -136,6 +136,7 @@ go_test( "//expression", "//kv", "//meta", + "//metrics", "//parser/ast", "//parser/auth", "//parser/model", @@ -162,6 +163,8 @@ go_test( "//util/sqlexec", "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_log//:log", + "@com_github_prometheus_client_golang//prometheus", + "@com_github_prometheus_client_model//go", "@com_github_stretchr_testify//require", "@com_github_tikv_client_go_v2//testutils", "@com_github_tikv_client_go_v2//tikv", diff --git a/session/session.go b/session/session.go index 2d84f1c0fa858..12e6d734b991a 100644 --- a/session/session.go +++ b/session/session.go @@ -1090,6 +1090,9 @@ func (s *session) CommitTxn(ctx context.Context) error { s.sessionVars.StmtCtx.MergeExecDetails(nil, commitDetail) } + // record the TTLInsertRows in the metric + metrics.TTLInsertRowsCount.Add(float64(s.sessionVars.TxnCtx.InsertTTLRowsCount)) + failpoint.Inject("keepHistory", func(val failpoint.Value) { if val.(bool) { failpoint.Return(err) diff --git a/session/session_test.go b/session/session_test.go index 2d1c9669445c5..4edb8ddce4c86 100644 --- a/session/session_test.go +++ b/session/session_test.go @@ -21,10 +21,13 @@ import ( "testing" "github.com/pingcap/tidb/ddl" + "github.com/pingcap/tidb/metrics" "github.com/pingcap/tidb/session" "github.com/pingcap/tidb/tablecodec" "github.com/pingcap/tidb/testkit" "github.com/pingcap/tidb/testkit/external" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" "github.com/stretchr/testify/require" ) @@ -89,3 +92,49 @@ func TestMetaTableRegion(t *testing.T) { require.NotEqual(t, ddlBackfillTableRegionID, ddlBackfillHistoryTableRegionID) } + +func MustReadCounter(t *testing.T, m prometheus.Counter) float64 { + pb := &dto.Metric{} + require.NoError(t, m.Write(pb)) + return pb.GetCounter().GetValue() +} + +func TestRecordTTLRows(t *testing.T) { + store := testkit.CreateMockStore(t) + tk := testkit.NewTestKit(t, store) + + tk.MustExec("use test") + tk.MustExec("create table t(created_at datetime) TTL = created_at + INTERVAL 1 DAY") + // simple insert should be recorded + tk.MustExec("insert into t values (NOW())") + require.Equal(t, 1.0, MustReadCounter(t, metrics.TTLInsertRowsCount)) + + // insert in a explicit transaction should be recorded + tk.MustExec("begin") + tk.MustExec("insert into t values (NOW())") + tk.MustExec("commit") + require.Equal(t, 2.0, MustReadCounter(t, metrics.TTLInsertRowsCount)) + + // insert multiple rows should be the same + tk.MustExec("begin") + tk.MustExec("insert into t values (NOW())") + tk.MustExec("insert into t values (NOW())") + tk.MustExec("commit") + require.Equal(t, 4.0, MustReadCounter(t, metrics.TTLInsertRowsCount)) + + // rollback will remove all recorded TTL rows + tk.MustExec("begin") + tk.MustExec("insert into t values (NOW())") + tk.MustExec("insert into t values (NOW())") + tk.MustExec("rollback") + require.Equal(t, 6.0, MustReadCounter(t, metrics.TTLInsertRowsCount)) + + // savepoint will save the recorded TTL rows + tk.MustExec("begin") + tk.MustExec("insert into t values (NOW())") + tk.MustExec("savepoint insert1") + tk.MustExec("insert into t values (NOW())") + tk.MustExec("rollback to insert1") + tk.MustExec("commit") + require.Equal(t, 7.0, MustReadCounter(t, metrics.TTLInsertRowsCount)) +} diff --git a/sessionctx/variable/session.go b/sessionctx/variable/session.go index 46cc6110af27e..0c4c2cb862983 100644 --- a/sessionctx/variable/session.go +++ b/sessionctx/variable/session.go @@ -187,6 +187,9 @@ type TxnCtxNeedToRestore struct { // CachedTables is not nil if the transaction write on cached table. CachedTables map[int64]interface{} + + // InsertTTLRowsCount counts how many rows are inserted in this statement + InsertTTLRowsCount int } // TxnCtxNoNeedToRestore stores transaction variables which do not need to restored when rolling back to a savepoint. @@ -377,6 +380,7 @@ func (tc *TransactionContext) GetCurrentSavepoint() TxnCtxNeedToRestore { TableDeltaMap: tableDeltaMap, pessimisticLockCache: pessimisticLockCache, CachedTables: cachedTables, + InsertTTLRowsCount: tc.InsertTTLRowsCount, } } @@ -385,6 +389,7 @@ func (tc *TransactionContext) RestoreBySavepoint(savepoint TxnCtxNeedToRestore) tc.TableDeltaMap = savepoint.TableDeltaMap tc.pessimisticLockCache = savepoint.pessimisticLockCache tc.CachedTables = savepoint.CachedTables + tc.InsertTTLRowsCount = savepoint.InsertTTLRowsCount } // AddSavepoint adds a new savepoint. diff --git a/table/tables/tables.go b/table/tables/tables.go index bbdbffdd893fc..3e4a673cc77fb 100644 --- a/table/tables/tables.go +++ b/table/tables/tables.go @@ -924,6 +924,9 @@ func (t *TableCommon) AddRecord(sctx sessionctx.Context, r []types.Datum, opts . return nil, err } } + if shouldIncreaseTTLMetricCount(t.meta) { + sctx.GetSessionVars().TxnCtx.InsertTTLRowsCount += 1 + } if sessVars.TxnCtx == nil { return recordID, nil } @@ -1592,6 +1595,10 @@ func shouldWriteBinlog(ctx sessionctx.Context, tblInfo *model.TableInfo) bool { return !ctx.GetSessionVars().InRestrictedSQL } +func shouldIncreaseTTLMetricCount(tblInfo *model.TableInfo) bool { + return tblInfo.TTLInfo != nil +} + func (t *TableCommon) getMutation(ctx sessionctx.Context) *binlog.TableMutation { return ctx.StmtGetMutation(t.tableID) } diff --git a/ttl/metrics/metrics.go b/ttl/metrics/metrics.go index 8bc01551bc2a0..da9549cdb023d 100644 --- a/ttl/metrics/metrics.go +++ b/ttl/metrics/metrics.go @@ -49,7 +49,8 @@ var ( RunningJobsCnt = metrics.TTLJobStatus.With(prometheus.Labels{metrics.LblType: "running"}) CancellingJobsCnt = metrics.TTLJobStatus.With(prometheus.Labels{metrics.LblType: "cancelling"}) - RunningTaskCnt = metrics.TTLTaskStatus.With(prometheus.Labels{metrics.LblType: "running"}) + ScanningTaskCnt = metrics.TTLTaskStatus.With(prometheus.Labels{metrics.LblType: "scanning"}) + DeletingTaskCnt = metrics.TTLTaskStatus.With(prometheus.Labels{metrics.LblType: "deleting"}) ) func initWorkerPhases(workerType string) map[string]prometheus.Counter { diff --git a/ttl/ttlworker/task_manager.go b/ttl/ttlworker/task_manager.go index ee245aa1c8c29..4abd2d0098924 100644 --- a/ttl/ttlworker/task_manager.go +++ b/ttl/ttlworker/task_manager.go @@ -497,7 +497,17 @@ func (m *taskManager) checkInvalidTask(se session.Session) { } func (m *taskManager) reportMetrics() { - metrics.RunningTaskCnt.Set(float64(len(m.runningTasks))) + scanningTaskCnt := 0 + deletingTaskCnt := 0 + for _, task := range m.runningTasks { + if task.result != nil { + scanningTaskCnt += 1 + } else { + deletingTaskCnt += 1 + } + } + metrics.ScanningTaskCnt.Set(float64(scanningTaskCnt)) + metrics.DeletingTaskCnt.Set(float64(deletingTaskCnt)) } type runningScanTask struct { diff --git a/ttl/ttlworker/task_manager_integration_test.go b/ttl/ttlworker/task_manager_integration_test.go index 13278d66bf010..19bf25fd3b1d6 100644 --- a/ttl/ttlworker/task_manager_integration_test.go +++ b/ttl/ttlworker/task_manager_integration_test.go @@ -227,7 +227,7 @@ func TestTaskMetrics(t *testing.T) { m.ReportMetrics() out := &dto.Metric{} - require.NoError(t, metrics.RunningTaskCnt.Write(out)) + require.NoError(t, metrics.DeletingTaskCnt.Write(out)) require.Equal(t, float64(1), out.GetGauge().GetValue()) }