From c8cb3d999943ccebe908c08768ed64de99e6e982 Mon Sep 17 00:00:00 2001 From: taylan isikdemir Date: Mon, 16 Dec 2024 16:16:13 -0800 Subject: [PATCH 1/5] fixes --- common/dynamicconfig/constants.go | 7 + ...cker-compose-local-matching-simulation.yml | 2 +- .../dashboards/cadence-matching.json | 2745 +++++++++-------- host/matching_simulation_test.go | 125 +- host/onebox.go | 30 +- .../matching_simulation_burst_adaptive.yaml | 1 + .../matching_simulation_fluctuating.yaml | 40 + ...ching_simulation_fluctuating_adaptive.yaml | 48 + service/matching/config/config.go | 3 + service/matching/tasklist/adaptive_scaler.go | 7 +- .../matching/tasklist/task_list_manager.go | 20 +- tools/matchingsimulationcomparison/README.md | 2 +- 12 files changed, 1706 insertions(+), 1324 deletions(-) create mode 100644 host/testdata/matching_simulation_fluctuating.yaml create mode 100644 host/testdata/matching_simulation_fluctuating_adaptive.yaml diff --git a/common/dynamicconfig/constants.go b/common/dynamicconfig/constants.go index a9233c54a10..70ddbf132bc 100644 --- a/common/dynamicconfig/constants.go +++ b/common/dynamicconfig/constants.go @@ -2436,6 +2436,7 @@ const ( MatchingPartitionUpscaleSustainedDuration MatchingPartitionDownscaleSustainedDuration MatchingAdaptiveScalerUpdateInterval + MatchingQPSTrackerInterval // HistoryLongPollExpirationInterval is the long poll expiration interval in the history service // KeyName: history.longPollExpirationInterval @@ -4778,6 +4779,12 @@ var DurationKeys = map[DurationKey]DynamicDuration{ Description: "MatchingAdaptiveScalerUpdateInterval is the internal for adaptive scaler to update", DefaultValue: time.Second * 15, }, + MatchingQPSTrackerInterval: { + KeyName: "matching.qpsTrackerInterval", + Filters: []Filter{DomainName, TaskListName, TaskType}, + Description: "MatchingQPSTrackerInterval is the interval for qps tracker's loop. Changes are not reflected until service restart", + DefaultValue: time.Second * 10, + }, HistoryLongPollExpirationInterval: { KeyName: "history.longPollExpirationInterval", Filters: []Filter{DomainName}, diff --git a/docker/buildkite/docker-compose-local-matching-simulation.yml b/docker/buildkite/docker-compose-local-matching-simulation.yml index 41861fda40c..012ebee155c 100644 --- a/docker/buildkite/docker-compose-local-matching-simulation.yml +++ b/docker/buildkite/docker-compose-local-matching-simulation.yml @@ -53,7 +53,7 @@ services: - -c - > go test -timeout 180s - -run ^TestMatchingSimulationSuite$ + -run ^TestMatchingSimulation.*$ -count 1 -v -tags matchingsim diff --git a/docker/buildkite/grafana/provisioning/dashboards/cadence-matching.json b/docker/buildkite/grafana/provisioning/dashboards/cadence-matching.json index bc76ae1c601..9fa633aadf2 100644 --- a/docker/buildkite/grafana/provisioning/dashboards/cadence-matching.json +++ b/docker/buildkite/grafana/provisioning/dashboards/cadence-matching.json @@ -1,1424 +1,1563 @@ { - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 6, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 9, - "panels": [], - "title": "Add & Poll QPS and Latencies", - "type": "row" - }, + "annotations": { + "list": [ { + "builtIn": 1, "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "type": "grafana", + "uid": "-- Grafana --" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 3, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "panels": [], + "title": "Add & Poll QPS and Latencies", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 1 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum by (tasklist) (\n rate(cadence_requests_per_tl{\n operation=\"AddDecisionTask\"\n }[1m])\n)", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "AddDecisionTask QPS per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (tasklist) (\n rate(cadence_requests_per_tl{\n operation=\"AddDecisionTask\"\n }[10s])\n)", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "AddDecisionTask QPS per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 1 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum by (tasklist) (\n rate(cadence_requests_per_tl{\n operation=\"PollForDecisionTask\"\n }[1m])\n)", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "PollForDecisionTask QPS per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (tasklist) (\n rate(cadence_requests_per_tl{\n operation=\"PollForDecisionTask\"\n }[10s])\n)", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "PollForDecisionTask QPS per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 9 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, rate(cadence_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[1m]))", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "AddDecisionTask p99 latency per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(cadence_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[10s]))", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "AddDecisionTask p99 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 9 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, rate(cadence_latency_per_tl_bucket{operation=\"PollForDecisionTask\"}[1m]))", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "PollForDecisionTask p99 latency per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(cadence_latency_per_tl_bucket{operation=\"PollForDecisionTask\"}[10s]))", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "PollForDecisionTask p99 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 17 - }, - "id": 13, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.50, rate(cadence_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[1m]))", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "AddDecisionTask p50 latency per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 17 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, rate(cadence_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[10s]))", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "AddDecisionTask p50 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 17 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.50, rate(cadence_latency_per_tl_bucket{operation=\"PollForDecisionTask\"}[1m]))", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "PollForDecisionTask p50 latency per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 25 - }, - "id": 10, - "panels": [], - "title": "Sync vs Async Matches", - "type": "row" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 17 }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, rate(cadence_latency_per_tl_bucket{operation=\"PollForDecisionTask\"}[10s]))", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "PollForDecisionTask p50 latency per tasklist", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 10, + "panels": [], + "title": "Sync vs Async Matches", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 17, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum by(tasklist)(rate(syncmatch_latency_per_tl_count{operation=\"AddDecisionTask\"}[1m])) / sum by (tasklist) (rate(cadence_requests_per_tl{operation=\"AddDecisionTask\"}[1m]))", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Sync Match Rate", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by(tasklist)(rate(syncmatch_latency_per_tl_count{operation=\"AddDecisionTask\"}[10s])) / sum by (tasklist) (rate(cadence_requests_per_tl{operation=\"AddDecisionTask\"}[10s]))", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Sync Match Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 34 - }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum by(tasklist) (\n rate(syncmatch_latency_per_tl_sum{operation=\"AddDecisionTask\"}[1m])\n )", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "DecisionTask Sync Matches", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by(tasklist) (\n rate(syncmatch_latency_per_tl_sum{operation=\"AddDecisionTask\"}[10s])\n )", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Sync Matches", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 34 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum by(tasklist) (\n rate(asyncmatch_latency_per_tl_sum{tasklistType=\"decision\"}[1m])\n )", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "DecisionTask Async Matches", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by(tasklist) (\n rate(asyncmatch_latency_per_tl_sum{tasklistType=\"decision\"}[10s])\n )", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Async Matches", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 42 - }, - "id": 7, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, rate(syncmatch_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[1m]))\n", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "DecisionTask Sync Match p99 latency per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(syncmatch_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[10s]))\n", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Sync Match p99 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 42 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, rate(asyncmatch_latency_per_tl_bucket{tasklistType=\"decision\"}[1m]))\n", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "DecisionTask Async Match p99 latency per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, rate(asyncmatch_latency_per_tl_bucket{tasklistType=\"decision\"}[10s]))\n", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Async Match p99 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 50 - }, - "id": 11, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.50, rate(syncmatch_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[1m]))\n", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "DecisionTask Sync Match p50 latency per tasklist", - "type": "timeseries" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 50 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.50, rate(syncmatch_latency_per_tl_bucket{operation=\"AddDecisionTask\"}[10s]))\n", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Sync Match p50 latency per tasklist", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 50 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 50 + }, + "id": 12, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "editorMode": "code", + "expr": "histogram_quantile(0.50, rate(asyncmatch_latency_per_tl_bucket{tasklistType=\"decision\"}[10s]))\n", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Async Match p50 latency per tasklist", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 58 + }, + "id": 15, + "panels": [], + "title": "Backlog", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "editorMode": "code", - "expr": "histogram_quantile(0.50, rate(asyncmatch_latency_per_tl_bucket{tasklistType=\"decision\"}[1m]))\n", - "legendFormat": "__auto", - "range": true, - "refId": "A" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } - ], - "title": "DecisionTask Async Match p50 latency per tasklist", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 58 }, - "id": 15, - "panels": [], - "title": "Backlog", - "type": "row" + "overrides": [] }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by(tasklist) (rate(task_backlog_per_tl{tasklistType=\"decision\"}[10s]))\n", + "interval": "10s", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "DecisionTask Backlog per tasklist", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 67 + }, + "id": 18, + "panels": [], + "title": "Adaptive Partition Scaler", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "barWidthFactor": 0.6, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "overrides": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 59 + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 68 + }, + "id": 19, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "id": 16, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "11.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "editorMode": "code", + "expr": "max(task_list_partition_config_num_read{tasklist=\"my_tasklist\"})", + "hide": false, + "interval": "10s", + "legendFormat": "read_partitions", + "range": true, + "refId": "A" }, - "pluginVersion": "11.4.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum by(tasklist) (rate(task_backlog_per_tl{tasklistType=\"decision\"}[1m]))\n", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "DecisionTask Backlog per tasklist", - "type": "timeseries" - } - ], - "preload": false, - "schemaVersion": 40, - "tags": [], - "templating": { - "list": [] - }, - "time": { - "from": "2024-12-10T17:20:49.632Z", - "to": "2024-12-10T17:26:01.274Z" - }, - "timepicker": {}, - "timezone": "browser", - "title": "Cadence Matching", - "uid": "de6iyddpu6nswd", - "version": 2, - "weekStart": "" - } + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "max(task_list_partition_config_num_write{tasklist=\"my_tasklist\"})", + "hide": false, + "instant": false, + "legendFormat": "write_partitions", + "range": true, + "refId": "B" + } + ], + "title": "Partition Counts", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 40, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "2024-12-10T17:20:49.632Z", + "to": "2024-12-10T17:26:01.274Z" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Cadence Matching", + "uid": "de6iyddpu6nswd", + "version": 1, + "weekStart": "" +} diff --git a/host/matching_simulation_test.go b/host/matching_simulation_test.go index 37a080caeae..16330f84787 100644 --- a/host/matching_simulation_test.go +++ b/host/matching_simulation_test.go @@ -44,6 +44,7 @@ import ( "math/rand" "os" "reflect" + "sort" "strings" "sync" "sync/atomic" @@ -55,6 +56,7 @@ import ( "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" "go.uber.org/yarpc" + "golang.org/x/exp/slices" "golang.org/x/time/rate" "github.com/uber/cadence/client/history" @@ -92,7 +94,7 @@ type operationAggStats struct { lastUpdated time.Time } -func TestMatchingSimulationSuite(t *testing.T) { +func TestMatchingSimulation(t *testing.T) { flag.Parse() confPath := os.Getenv("MATCHING_SIMULATION_CONFIG") @@ -125,6 +127,7 @@ func TestMatchingSimulationSuite(t *testing.T) { dynamicconfig.MatchingPartitionUpscaleSustainedDuration: clusterConfig.MatchingConfig.SimulationConfig.PartitionUpscaleSustainedDuration, dynamicconfig.MatchingPartitionDownscaleSustainedDuration: clusterConfig.MatchingConfig.SimulationConfig.PartitionDownscaleSustainedDuration, dynamicconfig.MatchingAdaptiveScalerUpdateInterval: clusterConfig.MatchingConfig.SimulationConfig.AdaptiveScalerUpdateInterval, + dynamicconfig.MatchingQPSTrackerInterval: clusterConfig.MatchingConfig.SimulationConfig.QPSTrackerInterval, dynamicconfig.TaskIsolationDuration: clusterConfig.MatchingConfig.SimulationConfig.TaskIsolationDuration, } @@ -267,12 +270,23 @@ func (s *MatchingSimulationSuite) TestMatchingSimulation() { lastTaskScheduleID := int32(0) for _, taskConfig := range s.testClusterConfig.MatchingConfig.SimulationConfig.Tasks { tasksGenerated := int32(0) - rateLimiter := rate.NewLimiter(rate.Limit(taskConfig.getTasksPerSecond()), taskConfig.getTasksBurst()) + rateLimiter := newSimulationRateLimiter(taskConfig, startTime, s.log) for i := 0; i < taskConfig.getNumTaskGenerators(); i++ { numGenerators++ generatorWG.Add(1) config := taskConfig - go s.generate(ctx, matchingClients[i%len(matchingClients)], domainID, tasklist, rateLimiter, &tasksGenerated, &lastTaskScheduleID, &generatorWG, statsCh, &config) + go s.generate( + ctx, + matchingClients[i%len(matchingClients)], + domainID, + tasklist, + &tasksGenerated, + &lastTaskScheduleID, + &generatorWG, + statsCh, + &config, + rateLimiter, + ) } } @@ -281,8 +295,8 @@ func (s *MatchingSimulationSuite) TestMatchingSimulation() { // can change if your test case needs more time s.log("Waiting until all tasks are received") tasksToReceive.Wait() - executionTime := time.Now().Sub(startTime) - s.log("Completed benchmark in %v", time.Now().Sub(startTime)) + executionTime := time.Since(startTime) + s.log("Completed benchmark in %v", executionTime) s.log("Canceling context to stop pollers and task generators") cancel() pollerWG.Wait() @@ -333,12 +347,13 @@ func (s *MatchingSimulationSuite) generate( ctx context.Context, matchingClient MatchingClient, domainID, tasklist string, - rateLimiter *rate.Limiter, tasksGenerated *int32, lastTaskScheduleID *int32, wg *sync.WaitGroup, statsCh chan *operationStats, - taskConfig *SimulationTaskConfiguration) { + taskConfig *SimulationTaskConfiguration, + rateLimiter *simulationRateLimiter, +) { defer wg.Done() for { @@ -349,7 +364,7 @@ func (s *MatchingSimulationSuite) generate( default: if err := rateLimiter.Wait(ctx); err != nil { if !errors.Is(err, context.Canceled) { - s.T().Fatal("Rate limiter failed: ", err) + s.T().Error("Rate limiter failed: ", err) } return } @@ -677,3 +692,97 @@ func randomlyPickKey(weights map[string]int) string { // Return an empty string as a fallback (should not happen if weights are positive) return "" } + +type rateLimiterForTimeRange struct { + limiter *rate.Limiter + start, end int +} + +func (r rateLimiterForTimeRange) String() string { + return fmt.Sprintf("{start: %d, end: %d}", r.start, r.end) +} + +type simulationRateLimiter struct { + startTime time.Time + rateLimiters []rateLimiterForTimeRange + logFn func(msg string, args ...interface{}) +} + +func newSimulationRateLimiter(taskConfig SimulationTaskConfiguration, startTime time.Time, logFn func(msg string, args ...interface{})) *simulationRateLimiter { + var rateLimiters []rateLimiterForTimeRange + if len(taskConfig.TasksProduceSpecOverTime) == 0 { + l := rate.NewLimiter(rate.Limit(taskConfig.getTasksPerSecond()), taskConfig.getTasksBurst()) + rateLimiters = append(rateLimiters, rateLimiterForTimeRange{limiter: l, start: 0, end: -1}) + } else { + for _, spec := range taskConfig.TasksProduceSpecOverTime { + l := rate.NewLimiter(rate.Limit(spec.TasksPerSecond), spec.TasksBurst) + rateLimiters = append(rateLimiters, rateLimiterForTimeRange{limiter: l, start: spec.Start, end: spec.End}) + } + } + + sort.Slice(rateLimiters, func(i, j int) bool { + return rateLimiters[i].start < rateLimiters[j].start + }) + + logFn("Rate limiters: %v", rateLimiters) + + return &simulationRateLimiter{ + startTime: startTime, + rateLimiters: rateLimiters, + logFn: logFn, + } +} + +// TODO: test this function. lookup is not working +func (r *simulationRateLimiter) Wait(ctx context.Context) error { + elapsed := int(time.Since(r.startTime).Seconds()) + idx, ok := slices.BinarySearchFunc(r.rateLimiters, elapsed, func(r rateLimiterForTimeRange, t int) int { + if t >= r.start && (r.end == -1 || t < r.end) { + return 0 + } + + if r.start > t { + return 1 + } + + return -1 + }) + + if !ok { + return fmt.Errorf("rate limiter not found, elapsed: %ds", elapsed) + } + + r.logFn("Elapsed %vs so using rate limiter at index %d", elapsed, idx) + return r.rateLimiters[idx].limiter.Wait(ctx) +} + +func TestMatchingSimulation_RateLimiterBST(t *testing.T) { + srl := &simulationRateLimiter{ + startTime: time.Now(), + rateLimiters: []rateLimiterForTimeRange{ + {limiter: rate.NewLimiter(rate.Limit(10), 1), start: 0, end: 5}, + {limiter: rate.NewLimiter(rate.Limit(10), 1), start: 5, end: -1}, + }, + logFn: t.Logf, + } + + if err := srl.Wait(context.Background()); err != nil { + t.Error(err) + } + time.Sleep(1 * time.Second) + if err := srl.Wait(context.Background()); err != nil { + t.Error(err) + } + time.Sleep(5 * time.Second) + if err := srl.Wait(context.Background()); err != nil { + t.Error(err) + } + time.Sleep(1 * time.Second) + if err := srl.Wait(context.Background()); err != nil { + t.Error(err) + } + time.Sleep(5 * time.Second) + if err := srl.Wait(context.Background()); err != nil { + t.Error(err) + } +} diff --git a/host/onebox.go b/host/onebox.go index 112a3343699..3e229a940b8 100644 --- a/host/onebox.go +++ b/host/onebox.go @@ -162,6 +162,7 @@ type ( // Number of task list read partitions defaults to 1 TaskListReadPartitions int + // At most N polls will be forwarded at a time. defaults to 20 ForwarderMaxOutstandingPolls int @@ -200,10 +201,11 @@ type ( // Adaptive scaler configurations EnableAdaptiveScaler bool PartitionDownscaleFactor float64 - PartitionUpscaleRPS float64 + PartitionUpscaleRPS int PartitionUpscaleSustainedDuration time.Duration PartitionDownscaleSustainedDuration time.Duration AdaptiveScalerUpdateInterval time.Duration + QPSTrackerInterval time.Duration TaskIsolationDuration time.Duration } @@ -225,7 +227,11 @@ type ( // Number of task generators defaults to 1 NumTaskGenerators int - // The total QPS to generate tasks. Defaults to 40. + // Upper limit of tasks to generate. Task generators will stop if total number of tasks generated reaches MaxTaskToGenerate during simulation + // Defaults to 2k + MaxTaskToGenerate int + + // Task generation QPS. Defaults to 40. TasksPerSecond int // The burst value for the rate limiter for task generation. Controls the maximum number of AddTask requests @@ -234,9 +240,23 @@ type ( // TasksBurst to 1 then you'd get a steady stream of tasks, with one task every 100ms. TasksBurst int - // Upper limit of tasks to generate. Task generators will stop if total number of tasks generated reaches MaxTaskToGenerate during simulation - // Defaults to 2k - MaxTaskToGenerate int + // TasksProduceSpecOverTime is a list of TasksProduceSpec that will be used to change the qps over time. + // If this is set, TasksPerSecond and TasksBurst will be ignored. + TasksProduceSpecOverTime []TasksProduceSpec + } + + TasksProduceSpec struct { + // Task generation qps + TasksPerSecond int + + // The burst value for the rate limiter for task generation. + TasksBurst int + + // The time range in seconds that the above settings will be applied. + // The time range is [Start, End) + // For example, if the time range is [10, 20), the settings will be applied from 10s to 19s. + // Simulation start time is considered as second 0. + Start, End int } SimulationBacklogConfiguration struct { diff --git a/host/testdata/matching_simulation_burst_adaptive.yaml b/host/testdata/matching_simulation_burst_adaptive.yaml index 0aec9f60766..c4b6815f97e 100644 --- a/host/testdata/matching_simulation_burst_adaptive.yaml +++ b/host/testdata/matching_simulation_burst_adaptive.yaml @@ -31,5 +31,6 @@ matchingconfig: partitionupscalesustainedduration: 5s partitiondownscalesustainedduration: 5s adaptivescalerupdateinterval: 1s + qpstrackerinterval: 2s workerconfig: enableasyncwfconsumer: false diff --git a/host/testdata/matching_simulation_fluctuating.yaml b/host/testdata/matching_simulation_fluctuating.yaml new file mode 100644 index 00000000000..93b42d1416e --- /dev/null +++ b/host/testdata/matching_simulation_fluctuating.yaml @@ -0,0 +1,40 @@ +enablearchival: false +clusterno: 1 +messagingclientconfig: + usemock: true +historyconfig: + numhistoryshards: 4 + numhistoryhosts: 1 +matchingconfig: + nummatchinghosts: 4 + simulationconfig: + tasklistwritepartitions: 1 + tasklistreadpartitions: 1 + forwardermaxoutstandingpolls: 1 + forwardermaxoutstandingtasks: 1 + forwardermaxratepersecond: 10 + forwardermaxchildrenpernode: 20 + localpollwaittime: 10ms + localtaskwaittime: 10ms + tasks: + - numtaskgenerators: 4 + maxtasktogenerate: 5000 + tasksproducespecovertime: + - taskspersecond: 10 + tasksburst: 10 + start: 0 + end: 30 + - taskspersecond: 250 + tasksburst: 250 + start: 30 + end: 45 + - taskspersecond: 10 + tasksburst: 10 + start: 45 + end: -1 # -1 means forever + pollers: + - taskprocesstime: 1ms + numpollers: 8 + polltimeout: 60s +workerconfig: + enableasyncwfconsumer: false diff --git a/host/testdata/matching_simulation_fluctuating_adaptive.yaml b/host/testdata/matching_simulation_fluctuating_adaptive.yaml new file mode 100644 index 00000000000..5f9ae01264d --- /dev/null +++ b/host/testdata/matching_simulation_fluctuating_adaptive.yaml @@ -0,0 +1,48 @@ +enablearchival: false +clusterno: 1 +messagingclientconfig: + usemock: true +historyconfig: + numhistoryshards: 4 + numhistoryhosts: 1 +matchingconfig: + nummatchinghosts: 4 + simulationconfig: + tasklistwritepartitions: 0 # this doesn't matter. adaptive scaler will start from 1 + tasklistreadpartitions: 0 # this doesn't matter. adaptive scaler will start from 1 + forwardermaxoutstandingpolls: 1 + forwardermaxoutstandingtasks: 1 + forwardermaxratepersecond: 10 + forwardermaxchildrenpernode: 20 + localpollwaittime: 10ms + localtaskwaittime: 10ms + tasks: + - numtaskgenerators: 3 + maxtasktogenerate: 8000 + tasksproducespecovertime: + - taskspersecond: 10 + tasksburst: 10 + start: 0 + end: 15 + - taskspersecond: 250 + tasksburst: 250 + start: 15 + end: 45 + - taskspersecond: 10 + tasksburst: 10 + start: 45 + end: -1 # -1 means forever + pollers: + - taskprocesstime: 1ms + numpollers: 8 + polltimeout: 60s + getpartitionconfigfromdb: true + enableadaptivescaler: true + partitiondownscalefactor: 0.7 + partitionupscalerps: 120 + partitionupscalesustainedduration: 3s + partitiondownscalesustainedduration: 3s + adaptivescalerupdateinterval: 1s + qpstrackerinterval: 2s +workerconfig: + enableasyncwfconsumer: false diff --git a/service/matching/config/config.go b/service/matching/config/config.go index 80c61b6853d..a5587cbc644 100644 --- a/service/matching/config/config.go +++ b/service/matching/config/config.go @@ -64,6 +64,7 @@ type ( EnableAdaptiveScaler dynamicconfig.BoolPropertyFnWithTaskListInfoFilters EnableStandbyTaskCompletion dynamicconfig.BoolPropertyFnWithTaskListInfoFilters EnableClientAutoConfig dynamicconfig.BoolPropertyFnWithTaskListInfoFilters + QPSTrackerInterval dynamicconfig.DurationPropertyFnWithTaskListInfoFilters // Time to hold a poll request before returning an empty response if there are no tasks LongPollExpirationInterval dynamicconfig.DurationPropertyFnWithTaskListInfoFilters @@ -124,6 +125,7 @@ type ( PartitionUpscaleSustainedDuration func() time.Duration PartitionDownscaleSustainedDuration func() time.Duration AdaptiveScalerUpdateInterval func() time.Duration + QPSTrackerInterval func() time.Duration // taskWriter configuration OutstandingTaskAppendsThreshold func() int MaxTaskBatchSize func() int @@ -193,6 +195,7 @@ func NewConfig(dc *dynamicconfig.Collection, hostName string, getIsolationGroups PartitionDownscaleSustainedDuration: dc.GetDurationPropertyFilteredByTaskListInfo(dynamicconfig.MatchingPartitionDownscaleSustainedDuration), AdaptiveScalerUpdateInterval: dc.GetDurationPropertyFilteredByTaskListInfo(dynamicconfig.MatchingAdaptiveScalerUpdateInterval), EnableAdaptiveScaler: dc.GetBoolPropertyFilteredByTaskListInfo(dynamicconfig.MatchingEnableAdaptiveScaler), + QPSTrackerInterval: dc.GetDurationPropertyFilteredByTaskListInfo(dynamicconfig.MatchingQPSTrackerInterval), TaskIsolationDuration: dc.GetDurationPropertyFilteredByTaskListInfo(dynamicconfig.TaskIsolationDuration), TaskIsolationPollerWindow: dc.GetDurationPropertyFilteredByTaskListInfo(dynamicconfig.TaskIsolationPollerWindow), HostName: hostName, diff --git a/service/matching/tasklist/adaptive_scaler.go b/service/matching/tasklist/adaptive_scaler.go index 2589c09db74..fa323f1b2d9 100644 --- a/service/matching/tasklist/adaptive_scaler.go +++ b/service/matching/tasklist/adaptive_scaler.go @@ -156,7 +156,12 @@ func (a *adaptiveScalerImpl) run() { if numReadPartitions == partitionConfig.NumReadPartitions && numWritePartitions == partitionConfig.NumWritePartitions { return } - a.logger.Info("adaptive scaler is updating number of partitions", tag.CurrentQPS(qps), tag.NumReadPartitions(numReadPartitions), tag.NumWritePartitions(numWritePartitions), tag.Dynamic("task-list-partition-config", partitionConfig)) + a.logger.Info("adaptive scaler is updating number of partitions", + tag.CurrentQPS(qps), + tag.NumReadPartitions(numReadPartitions), + tag.NumWritePartitions(numWritePartitions), + tag.Dynamic("task-list-partition-config", partitionConfig), + ) a.scope.IncCounter(metrics.CadenceRequests) err := a.tlMgr.UpdateTaskListPartitionConfig(a.ctx, &types.TaskListPartitionConfig{ NumReadPartitions: numReadPartitions, diff --git a/service/matching/tasklist/task_list_manager.go b/service/matching/tasklist/task_list_manager.go index 79138ec29a6..bc3adb8e869 100644 --- a/service/matching/tasklist/task_list_manager.go +++ b/service/matching/tasklist/task_list_manager.go @@ -219,7 +219,8 @@ func NewManager( TaskListKind: taskListKind, TaskListType: taskList.GetType(), } - tlMgr.qpsTracker = stats.NewEmaFixedWindowQPSTracker(timeSource, 0.5, 10*time.Second, baseEvent) + + tlMgr.qpsTracker = stats.NewEmaFixedWindowQPSTracker(timeSource, 0.5, taskListConfig.QPSTrackerInterval(), baseEvent) if taskList.IsRoot() && *taskListKind == types.TaskListKindNormal { adaptiveScalerScope := common.NewPerTaskListScope(domainName, taskList.GetName(), *taskListKind, metricsClient, metrics.MatchingAdaptiveScalerScope). Tagged(getTaskListTypeTag(taskList.GetType())) @@ -341,14 +342,20 @@ func (c *taskListManagerImpl) handleErr(err error) error { func (c *taskListManagerImpl) TaskListPartitionConfig() *types.TaskListPartitionConfig { c.partitionConfigLock.RLock() defer c.partitionConfigLock.RUnlock() + + scope := c.scope.Tagged(metrics.TaskListRootPartitionTag(c.taskListID.GetRoot())) if c.partitionConfig == nil { + // if partition config is nil, read/write partition count is considered 1. Emit those metrics for continuity + scope.UpdateGauge(metrics.TaskListPartitionConfigNumReadGauge, 1) + scope.UpdateGauge(metrics.TaskListPartitionConfigNumWriteGauge, 1) return nil } + config := *c.partitionConfig - c.logger.Debug("get task list partition config from db", tag.Dynamic("root-partition", c.taskListID.GetRoot()), tag.Dynamic("config", config)) - c.scope.Tagged(metrics.TaskListRootPartitionTag(c.taskListID.GetRoot())).UpdateGauge(metrics.TaskListPartitionConfigNumReadGauge, float64(config.NumReadPartitions)) - c.scope.Tagged(metrics.TaskListRootPartitionTag(c.taskListID.GetRoot())).UpdateGauge(metrics.TaskListPartitionConfigNumWriteGauge, float64(config.NumWritePartitions)) - c.scope.Tagged(metrics.TaskListRootPartitionTag(c.taskListID.GetRoot())).UpdateGauge(metrics.TaskListPartitionConfigVersionGauge, float64(config.Version)) + c.logger.Debug("current partition config", tag.Dynamic("root-partition", c.taskListID.GetRoot()), tag.Dynamic("config", config)) + scope.UpdateGauge(metrics.TaskListPartitionConfigNumReadGauge, float64(config.NumReadPartitions)) + scope.UpdateGauge(metrics.TaskListPartitionConfigNumWriteGauge, float64(config.NumWritePartitions)) + scope.UpdateGauge(metrics.TaskListPartitionConfigVersionGauge, float64(config.Version)) return &config } @@ -1033,6 +1040,9 @@ func newTaskListConfig(id *Identifier, cfg *config.Config, domainName string) *c AdaptiveScalerUpdateInterval: func() time.Duration { return cfg.AdaptiveScalerUpdateInterval(domainName, taskListName, taskType) }, + QPSTrackerInterval: func() time.Duration { + return cfg.QPSTrackerInterval(domainName, taskListName, taskType) + }, EnableAdaptiveScaler: func() bool { return cfg.EnableAdaptiveScaler(domainName, taskListName, taskType) }, diff --git a/tools/matchingsimulationcomparison/README.md b/tools/matchingsimulationcomparison/README.md index e92c815c6ff..fc87647decf 100644 --- a/tools/matchingsimulationcomparison/README.md +++ b/tools/matchingsimulationcomparison/README.md @@ -12,7 +12,7 @@ go run tools/matchingsimulationcomparison/*.go Run subset of scenarios and compare: ``` go run tools/matchingsimulationcomparison/*.go \ - --scenarios "burst" + --scenarios "fluctuating" ``` If you have already run some scenarios before and made changes in the csv output then run in Compare mode From 4d1d011a1c8fafdc38e2446760917ae3ae4872ca Mon Sep 17 00:00:00 2001 From: taylan isikdemir Date: Mon, 16 Dec 2024 16:42:06 -0800 Subject: [PATCH 2/5] fix test --- host/matching_simulation_test.go | 86 ++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/host/matching_simulation_test.go b/host/matching_simulation_test.go index 16330f84787..5c265a6b451 100644 --- a/host/matching_simulation_test.go +++ b/host/matching_simulation_test.go @@ -61,6 +61,7 @@ import ( "github.com/uber/cadence/client/history" "github.com/uber/cadence/common" + "github.com/uber/cadence/common/clock" "github.com/uber/cadence/common/dynamicconfig" "github.com/uber/cadence/common/partition" "github.com/uber/cadence/common/persistence" @@ -270,7 +271,7 @@ func (s *MatchingSimulationSuite) TestMatchingSimulation() { lastTaskScheduleID := int32(0) for _, taskConfig := range s.testClusterConfig.MatchingConfig.SimulationConfig.Tasks { tasksGenerated := int32(0) - rateLimiter := newSimulationRateLimiter(taskConfig, startTime, s.log) + rateLimiter := newSimulationRateLimiter(taskConfig, startTime, clock.NewRealTimeSource(), s.log) for i := 0; i < taskConfig.getNumTaskGenerators(); i++ { numGenerators++ generatorWG.Add(1) @@ -698,25 +699,31 @@ type rateLimiterForTimeRange struct { start, end int } -func (r rateLimiterForTimeRange) String() string { +func (r *rateLimiterForTimeRange) String() string { return fmt.Sprintf("{start: %d, end: %d}", r.start, r.end) } type simulationRateLimiter struct { startTime time.Time - rateLimiters []rateLimiterForTimeRange + timeSrc clock.TimeSource + rateLimiters []*rateLimiterForTimeRange logFn func(msg string, args ...interface{}) } -func newSimulationRateLimiter(taskConfig SimulationTaskConfiguration, startTime time.Time, logFn func(msg string, args ...interface{})) *simulationRateLimiter { - var rateLimiters []rateLimiterForTimeRange +func newSimulationRateLimiter( + taskConfig SimulationTaskConfiguration, + startTime time.Time, + timeSrc clock.TimeSource, + logFn func(msg string, args ...interface{}), +) *simulationRateLimiter { + var rateLimiters []*rateLimiterForTimeRange if len(taskConfig.TasksProduceSpecOverTime) == 0 { l := rate.NewLimiter(rate.Limit(taskConfig.getTasksPerSecond()), taskConfig.getTasksBurst()) - rateLimiters = append(rateLimiters, rateLimiterForTimeRange{limiter: l, start: 0, end: -1}) + rateLimiters = append(rateLimiters, &rateLimiterForTimeRange{limiter: l, start: 0, end: -1}) } else { for _, spec := range taskConfig.TasksProduceSpecOverTime { l := rate.NewLimiter(rate.Limit(spec.TasksPerSecond), spec.TasksBurst) - rateLimiters = append(rateLimiters, rateLimiterForTimeRange{limiter: l, start: spec.Start, end: spec.End}) + rateLimiters = append(rateLimiters, &rateLimiterForTimeRange{limiter: l, start: spec.Start, end: spec.End}) } } @@ -728,6 +735,7 @@ func newSimulationRateLimiter(taskConfig SimulationTaskConfiguration, startTime return &simulationRateLimiter{ startTime: startTime, + timeSrc: timeSrc, rateLimiters: rateLimiters, logFn: logFn, } @@ -735,8 +743,17 @@ func newSimulationRateLimiter(taskConfig SimulationTaskConfiguration, startTime // TODO: test this function. lookup is not working func (r *simulationRateLimiter) Wait(ctx context.Context) error { - elapsed := int(time.Since(r.startTime).Seconds()) - idx, ok := slices.BinarySearchFunc(r.rateLimiters, elapsed, func(r rateLimiterForTimeRange, t int) int { + limiter, err := r.getLimiter() + if err != nil { + return err + } + + return limiter.limiter.Wait(ctx) +} + +func (r *simulationRateLimiter) getLimiter() (*rateLimiterForTimeRange, error) { + elapsed := int(r.timeSrc.Since(r.startTime).Seconds()) + idx, ok := slices.BinarySearchFunc(r.rateLimiters, elapsed, func(r *rateLimiterForTimeRange, t int) int { if t >= r.start && (r.end == -1 || t < r.end) { return 0 } @@ -749,40 +766,45 @@ func (r *simulationRateLimiter) Wait(ctx context.Context) error { }) if !ok { - return fmt.Errorf("rate limiter not found, elapsed: %ds", elapsed) + return nil, fmt.Errorf("rate limiter not found, elapsed: %ds", elapsed) } r.logFn("Elapsed %vs so using rate limiter at index %d", elapsed, idx) - return r.rateLimiters[idx].limiter.Wait(ctx) + return r.rateLimiters[idx], nil } func TestMatchingSimulation_RateLimiterBST(t *testing.T) { + mockTimeSrc := clock.NewMockedTimeSource() srl := &simulationRateLimiter{ - startTime: time.Now(), - rateLimiters: []rateLimiterForTimeRange{ + startTime: mockTimeSrc.Now(), + timeSrc: mockTimeSrc, + rateLimiters: []*rateLimiterForTimeRange{ {limiter: rate.NewLimiter(rate.Limit(10), 1), start: 0, end: 5}, {limiter: rate.NewLimiter(rate.Limit(10), 1), start: 5, end: -1}, }, logFn: t.Logf, } - if err := srl.Wait(context.Background()); err != nil { - t.Error(err) - } - time.Sleep(1 * time.Second) - if err := srl.Wait(context.Background()); err != nil { - t.Error(err) - } - time.Sleep(5 * time.Second) - if err := srl.Wait(context.Background()); err != nil { - t.Error(err) - } - time.Sleep(1 * time.Second) - if err := srl.Wait(context.Background()); err != nil { - t.Error(err) - } - time.Sleep(5 * time.Second) - if err := srl.Wait(context.Background()); err != nil { - t.Error(err) - } + // t = 0 + l, err := srl.getLimiter() + require.NoError(t, err) + require.Equal(t, 0, l.start) // limiter at index 0 should be used + + // t = 3 + mockTimeSrc.Advance(time.Second * 3) + l, err = srl.getLimiter() + require.NoError(t, err) + require.Equal(t, 0, l.start) // limiter at index 0 should be used + + // t = 5 + mockTimeSrc.Advance(time.Second * 2) + l, err = srl.getLimiter() + require.NoError(t, err) + require.Equal(t, 5, l.start) // limiter at index 1 should be used + + // t = 10 + mockTimeSrc.Advance(time.Second * 5) + l, err = srl.getLimiter() + require.NoError(t, err) + require.Equal(t, 5, l.start) // limiter at index 1 should be used } From 74705ab75145a37ae568cc6615671bbb1e0f8c18 Mon Sep 17 00:00:00 2001 From: taylan isikdemir Date: Mon, 16 Dec 2024 17:01:34 -0800 Subject: [PATCH 3/5] ut fix --- service/matching/config/config_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/service/matching/config/config_test.go b/service/matching/config/config_test.go index 05f97a86b72..1233b7349b1 100644 --- a/service/matching/config/config_test.go +++ b/service/matching/config/config_test.go @@ -87,6 +87,7 @@ func TestNewConfig(t *testing.T) { "PartitionDownscaleSustainedDuration": {dynamicconfig.MatchingPartitionDownscaleSustainedDuration, time.Duration(33)}, "AdaptiveScalerUpdateInterval": {dynamicconfig.MatchingAdaptiveScalerUpdateInterval, time.Duration(34)}, "EnableAdaptiveScaler": {dynamicconfig.MatchingEnableAdaptiveScaler, true}, + "QPSTrackerInterval": {dynamicconfig.MatchingQPSTrackerInterval, 5 * time.Second}, "EnableStandbyTaskCompletion": {dynamicconfig.MatchingEnableStandbyTaskCompletion, false}, "EnableClientAutoConfig": {dynamicconfig.MatchingEnableClientAutoConfig, false}, "TaskIsolationDuration": {dynamicconfig.TaskIsolationDuration, time.Duration(35)}, From 3967db77792a2af5b93a0e2943ffc33698c6721d Mon Sep 17 00:00:00 2001 From: taylan isikdemir Date: Tue, 17 Dec 2024 12:33:43 -0800 Subject: [PATCH 4/5] address feedback --- host/matching_simulation_test.go | 12 +++++++++--- host/onebox.go | 13 ++++++------- host/testdata/matching_simulation_fluctuating.yaml | 10 +++------- .../matching_simulation_fluctuating_adaptive.yaml | 10 +++------- 4 files changed, 21 insertions(+), 24 deletions(-) diff --git a/host/matching_simulation_test.go b/host/matching_simulation_test.go index 5c265a6b451..c8b6bd55daf 100644 --- a/host/matching_simulation_test.go +++ b/host/matching_simulation_test.go @@ -717,13 +717,19 @@ func newSimulationRateLimiter( logFn func(msg string, args ...interface{}), ) *simulationRateLimiter { var rateLimiters []*rateLimiterForTimeRange - if len(taskConfig.TasksProduceSpecOverTime) == 0 { + if len(taskConfig.OverTime) == 0 { l := rate.NewLimiter(rate.Limit(taskConfig.getTasksPerSecond()), taskConfig.getTasksBurst()) rateLimiters = append(rateLimiters, &rateLimiterForTimeRange{limiter: l, start: 0, end: -1}) } else { - for _, spec := range taskConfig.TasksProduceSpecOverTime { + start := 0 + for _, spec := range taskConfig.OverTime { l := rate.NewLimiter(rate.Limit(spec.TasksPerSecond), spec.TasksBurst) - rateLimiters = append(rateLimiters, &rateLimiterForTimeRange{limiter: l, start: spec.Start, end: spec.End}) + end := -1 + if spec.Duration != nil { + end = start + int(spec.Duration.Seconds()) + } + rateLimiters = append(rateLimiters, &rateLimiterForTimeRange{limiter: l, start: start, end: end}) + start = end } } diff --git a/host/onebox.go b/host/onebox.go index 3e229a940b8..4c8de66b165 100644 --- a/host/onebox.go +++ b/host/onebox.go @@ -240,9 +240,10 @@ type ( // TasksBurst to 1 then you'd get a steady stream of tasks, with one task every 100ms. TasksBurst int - // TasksProduceSpecOverTime is a list of TasksProduceSpec that will be used to change the qps over time. + // OverTime is a list of TasksProduceSpec that will be used to change the qps over time. + // Each item has a duration and they will be applied in the given order. // If this is set, TasksPerSecond and TasksBurst will be ignored. - TasksProduceSpecOverTime []TasksProduceSpec + OverTime []TasksProduceSpec } TasksProduceSpec struct { @@ -252,11 +253,9 @@ type ( // The burst value for the rate limiter for task generation. TasksBurst int - // The time range in seconds that the above settings will be applied. - // The time range is [Start, End) - // For example, if the time range is [10, 20), the settings will be applied from 10s to 19s. - // Simulation start time is considered as second 0. - Start, End int + // The duration for which the settings will be applied. + // If the duration is unset, the settings will be applied indefinitely. + Duration *time.Duration } SimulationBacklogConfiguration struct { diff --git a/host/testdata/matching_simulation_fluctuating.yaml b/host/testdata/matching_simulation_fluctuating.yaml index 93b42d1416e..89e64bdc4c5 100644 --- a/host/testdata/matching_simulation_fluctuating.yaml +++ b/host/testdata/matching_simulation_fluctuating.yaml @@ -19,19 +19,15 @@ matchingconfig: tasks: - numtaskgenerators: 4 maxtasktogenerate: 5000 - tasksproducespecovertime: + overtime: - taskspersecond: 10 tasksburst: 10 - start: 0 - end: 30 + duration: 30s - taskspersecond: 250 tasksburst: 250 - start: 30 - end: 45 + duration: 15s - taskspersecond: 10 tasksburst: 10 - start: 45 - end: -1 # -1 means forever pollers: - taskprocesstime: 1ms numpollers: 8 diff --git a/host/testdata/matching_simulation_fluctuating_adaptive.yaml b/host/testdata/matching_simulation_fluctuating_adaptive.yaml index 5f9ae01264d..6c6e00cca3f 100644 --- a/host/testdata/matching_simulation_fluctuating_adaptive.yaml +++ b/host/testdata/matching_simulation_fluctuating_adaptive.yaml @@ -19,19 +19,15 @@ matchingconfig: tasks: - numtaskgenerators: 3 maxtasktogenerate: 8000 - tasksproducespecovertime: + overtime: - taskspersecond: 10 tasksburst: 10 - start: 0 - end: 15 + duration: 15s - taskspersecond: 250 tasksburst: 250 - start: 15 - end: 45 + duration: 30s - taskspersecond: 10 tasksburst: 10 - start: 45 - end: -1 # -1 means forever pollers: - taskprocesstime: 1ms numpollers: 8 From 2117ddd7147abf565b5e5e3c9c8bd2c6405c2cd9 Mon Sep 17 00:00:00 2001 From: taylan isikdemir Date: Tue, 17 Dec 2024 12:34:38 -0800 Subject: [PATCH 5/5] remove comment --- host/matching_simulation_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/host/matching_simulation_test.go b/host/matching_simulation_test.go index c8b6bd55daf..0a7493dbdba 100644 --- a/host/matching_simulation_test.go +++ b/host/matching_simulation_test.go @@ -747,7 +747,6 @@ func newSimulationRateLimiter( } } -// TODO: test this function. lookup is not working func (r *simulationRateLimiter) Wait(ctx context.Context) error { limiter, err := r.getLimiter() if err != nil {