From 798f40b5e7f7bffb5c9713ad72bc70ce2af7e411 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Krupa?= <pawel@krupa.net.pl>
Date: Sun, 16 Aug 2020 08:56:14 +0200
Subject: [PATCH 1/6] mixin: Adhere to monitoring mixins annotation guidelines
 (#3035)

* mixin: Adhere to monitoring mixins annotation guidelines

* replaced `message` annotation field with `description`
* added simple `summary` field

Signed-off-by: paulfantom <pawel@krupa.net.pl>

* examples/alerts: regenerate & adjust tests

Signed-off-by: paulfantom <pawel@krupa.net.pl>
---
 examples/alerts/alerts.md               | 174 +++++++++++++++---------
 examples/alerts/alerts.yaml             | 172 ++++++++++++++---------
 examples/alerts/tests.yaml              |  18 ++-
 mixin/alerts/absent.libsonnet           |   3 +-
 mixin/alerts/bucket_replicate.libsonnet |   9 +-
 mixin/alerts/compact.libsonnet          |  15 +-
 mixin/alerts/query.libsonnet            |  21 ++-
 mixin/alerts/receive.libsonnet          |  21 ++-
 mixin/alerts/rule.libsonnet             |  33 +++--
 mixin/alerts/sidecar.libsonnet          |   6 +-
 mixin/alerts/store.libsonnet            |  12 +-
 11 files changed, 313 insertions(+), 171 deletions(-)

diff --git a/examples/alerts/alerts.md b/examples/alerts/alerts.md
index 89e567a07dd..dc83d3d35d6 100644
--- a/examples/alerts/alerts.md
+++ b/examples/alerts/alerts.md
@@ -10,23 +10,26 @@ name: thanos-compact.rules
 rules:
 - alert: ThanosCompactMultipleRunning
   annotations:
-    message: No more than one Thanos Compact instance should be running at once. There
-      are {{ $value }}
+    description: No more than one Thanos Compact instance should be running at once.
+      There are {{ $value }}
+    summary: Thanos Compact has multiple instances running.
   expr: sum(up{job=~"thanos-compact.*"}) > 1
   for: 5m
   labels:
     severity: warning
 - alert: ThanosCompactHalted
   annotations:
-    message: Thanos Compact {{$labels.job}} has failed to run and now is halted.
+    description: Thanos Compact {{$labels.job}} has failed to run and now is halted.
+    summary: Thanos Compact has failed to run ans is now halted.
   expr: thanos_compactor_halted{job=~"thanos-compact.*"} == 1
   for: 5m
   labels:
     severity: warning
 - alert: ThanosCompactHighCompactionFailures
   annotations:
-    message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize
-      }}% of compactions.
+    description: Thanos Compact {{$labels.job}} is failing to execute {{ $value |
+      humanize }}% of compactions.
+    summary: Thanos Compact is failing to execute compactions.
   expr: |
     (
       sum by (job) (rate(thanos_compact_group_compactions_failures_total{job=~"thanos-compact.*"}[5m]))
@@ -39,8 +42,9 @@ rules:
     severity: warning
 - alert: ThanosCompactBucketHighOperationFailures
   annotations:
-    message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value
+    description: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value
       | humanize }}% of operations.
+    summary: Thanos Compact Bucket is having a high number of operation failures.
   expr: |
     (
       sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-compact.*"}[5m]))
@@ -53,7 +57,8 @@ rules:
     severity: warning
 - alert: ThanosCompactHasNotRun
   annotations:
-    message: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.
+    description: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.
+    summary: Thanos Compact has not uploaded anything for last 24 hours.
   expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h])))
     / 60 / 60 > 24
   labels:
@@ -70,7 +75,8 @@ name: thanos-rule.rules
 rules:
 - alert: ThanosRuleQueueIsDroppingAlerts
   annotations:
-    message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to queue alerts.
+    description: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to queue alerts.
+    summary: Thanos Rule is failing to queue alerts.
   expr: |
     sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0
   for: 5m
@@ -78,8 +84,9 @@ rules:
     severity: critical
 - alert: ThanosRuleSenderIsFailingAlerts
   annotations:
-    message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts
+    description: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts
       to alertmanager.
+    summary: Thanos Rule is failing to send alerts to alertmanager.
   expr: |
     sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0
   for: 5m
@@ -87,7 +94,9 @@ rules:
     severity: critical
 - alert: ThanosRuleHighRuleEvaluationFailures
   annotations:
-    message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate rules.
+    description: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate
+      rules.
+    summary: Thanos Rule is failing to evaluate rules.
   expr: |
     (
       sum by (job) (rate(prometheus_rule_evaluation_failures_total{job=~"thanos-rule.*"}[5m]))
@@ -100,8 +109,9 @@ rules:
     severity: critical
 - alert: ThanosRuleHighRuleEvaluationWarnings
   annotations:
-    message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
+    description: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
       warnings.
+    summary: Thanos Rule has high number of evaluation warnings.
   expr: |
     sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0
   for: 15m
@@ -109,8 +119,9 @@ rules:
     severity: info
 - alert: ThanosRuleRuleEvaluationLatencyHigh
   annotations:
-    message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency
-      than interval for {{$labels.rule_group}}.
+    description: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation
+      latency than interval for {{$labels.rule_group}}.
+    summary: Thanos Rule has high rule evaluation latency.
   expr: |
     (
       sum by (job, pod, rule_group) (prometheus_rule_group_last_duration_seconds{job=~"thanos-rule.*"})
@@ -122,8 +133,9 @@ rules:
     severity: warning
 - alert: ThanosRuleGrpcErrorRate
   annotations:
-    message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize
+    description: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize
       }}% of requests.
+    summary: Thanos Rule is failing to handle grpc requests.
   expr: |
     (
       sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m]))
@@ -136,7 +148,8 @@ rules:
     severity: warning
 - alert: ThanosRuleConfigReloadFailure
   annotations:
-    message: Thanos Rule {{$labels.job}} has not been able to reload its configuration.
+    description: Thanos Rule {{$labels.job}} has not been able to reload its configuration.
+    summary: Thanos Rule has not been able to reload configuration.
   expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by (job)
     != 1
   for: 5m
@@ -144,8 +157,9 @@ rules:
     severity: info
 - alert: ThanosRuleQueryHighDNSFailures
   annotations:
-    message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS
-      queries for query endpoints.
+    description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
+      DNS queries for query endpoints.
+    summary: Thanos Rule is having high number of DNS failures.
   expr: |
     (
       sum by (job) (rate(thanos_ruler_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@@ -158,8 +172,9 @@ rules:
     severity: warning
 - alert: ThanosRuleAlertmanagerHighDNSFailures
   annotations:
-    message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS
-      queries for Alertmanager endpoints.
+    description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
+      DNS queries for Alertmanager endpoints.
+    summary: Thanos Rule is having high number of DNS failures.
   expr: |
     (
       sum by (job) (rate(thanos_ruler_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@@ -172,8 +187,9 @@ rules:
     severity: warning
 - alert: ThanosRuleNoEvaluationFor10Intervals
   annotations:
-    message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups
+    description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups
       that did not evaluate for at least 10x of their expected interval.
+    summary: Thanos Rule has rule groups that did not evaluate for 10 intervals.
   expr: |
     time() -  max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"})
     >
@@ -183,8 +199,9 @@ rules:
     severity: info
 - alert: ThanosNoRuleEvaluations
   annotations:
-    message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in the
-      past 2 minutes.
+    description: Thanos Rule {{$labels.job}} did not perform any rule evaluations
+      in the past 2 minutes.
+    summary: Thanos Rule did not perform any rule evaluations.
   expr: |
     sum(rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[2m])) <= 0
       and
@@ -202,8 +219,9 @@ name: thanos-store.rules
 rules:
 - alert: ThanosStoreGrpcErrorRate
   annotations:
-    message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize
+    description: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize
       }}% of requests.
+    summary: Thanos Store is failing to handle qrpcd requests.
   expr: |
     (
       sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*"}[5m]))
@@ -216,8 +234,9 @@ rules:
     severity: warning
 - alert: ThanosStoreSeriesGateLatencyHigh
   annotations:
-    message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value
-      }} seconds for store series gate requests.
+    description: Thanos Store {{$labels.job}} has a 99th percentile latency of {{
+      $value }} seconds for store series gate requests.
+    summary: Thanos Store has high latency for store series gate requests.
   expr: |
     (
       histogram_quantile(0.9, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@@ -229,8 +248,9 @@ rules:
     severity: warning
 - alert: ThanosStoreBucketHighOperationFailures
   annotations:
-    message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value |
-      humanize }}% of operations.
+    description: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value
+      | humanize }}% of operations.
+    summary: Thanos Store Bucket is failing to execute operations.
   expr: |
     (
       sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-store.*"}[5m]))
@@ -243,8 +263,9 @@ rules:
     severity: warning
 - alert: ThanosStoreObjstoreOperationLatencyHigh
   annotations:
-    message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of
-      {{ $value }} seconds for the bucket operations.
+    description: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency
+      of {{ $value }} seconds for the bucket operations.
+    summary: Thanos Store is having high latency for bucket operations.
   expr: |
     (
       histogram_quantile(0.9, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@@ -264,7 +285,9 @@ name: thanos-sidecar.rules
 rules:
 - alert: ThanosSidecarPrometheusDown
   annotations:
-    message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} cannot connect to Prometheus.
+    description: Thanos Sidecar {{$labels.job}} {{$labels.pod}} cannot connect to
+      Prometheus.
+    summary: Thanos Sidecar cannot connect to Prometheus
   expr: |
     sum by (job, pod) (thanos_sidecar_prometheus_up{job=~"thanos-sidecar.*"} == 0)
   for: 5m
@@ -272,8 +295,9 @@ rules:
     severity: critical
 - alert: ThanosSidecarUnhealthy
   annotations:
-    message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value
-      }} seconds.
+    description: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{
+      $value }} seconds.
+    summary: Thanos Sidecar is unhealthy.
   expr: |
     time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"}) by (job, pod) >= 600
   labels:
@@ -288,8 +312,9 @@ name: thanos-query.rules
 rules:
 - alert: ThanosQueryHttpRequestQueryErrorRateHigh
   annotations:
-    message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+    description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
       }}% of "query" requests.
+    summary: Thanos Query is failing to handle requests.
   expr: |
     (
       sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m]))
@@ -301,8 +326,9 @@ rules:
     severity: critical
 - alert: ThanosQueryHttpRequestQueryRangeErrorRateHigh
   annotations:
-    message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+    description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
       }}% of "query_range" requests.
+    summary: Thanos Query is failing to handle requests.
   expr: |
     (
       sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m]))
@@ -314,8 +340,9 @@ rules:
     severity: critical
 - alert: ThanosQueryGrpcServerErrorRate
   annotations:
-    message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+    description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
       }}% of requests.
+    summary: Thanos Query is failing to handle requests.
   expr: |
     (
       sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*"}[5m]))
@@ -328,8 +355,9 @@ rules:
     severity: warning
 - alert: ThanosQueryGrpcClientErrorRate
   annotations:
-    message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize
+    description: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize
       }}% of requests.
+    summary: Thanos Query is failing to send requests.
   expr: |
     (
       sum by (job) (rate(grpc_client_handled_total{grpc_code!="OK", job=~"thanos-query.*"}[5m]))
@@ -341,8 +369,9 @@ rules:
     severity: warning
 - alert: ThanosQueryHighDNSFailures
   annotations:
-    message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing
+    description: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing
       DNS queries for store endpoints.
+    summary: Thanos Query is having high number of DNS failures.
   expr: |
     (
       sum by (job) (rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
@@ -354,8 +383,9 @@ rules:
     severity: warning
 - alert: ThanosQueryInstantLatencyHigh
   annotations:
-    message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
-      }} seconds for instant queries.
+    description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{
+      $value }} seconds for instant queries.
+    summary: Thanos Query has high latency for queries.
   expr: |
     (
       histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m]))) > 40
@@ -367,8 +397,9 @@ rules:
     severity: critical
 - alert: ThanosQueryRangeLatencyHigh
   annotations:
-    message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
-      }} seconds for range queries.
+    description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{
+      $value }} seconds for range queries.
+    summary: Thanos Query has high latency for queries.
   expr: |
     (
       histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query_range"}[5m]))) > 90
@@ -388,8 +419,9 @@ name: thanos-receive.rules
 rules:
 - alert: ThanosReceiveHttpRequestErrorRateHigh
   annotations:
-    message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize
+    description: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize
       }}% of requests.
+    summary: Thanos Receive is failing to handle requests.
   expr: |
     (
       sum(rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m]))
@@ -401,8 +433,9 @@ rules:
     severity: critical
 - alert: ThanosReceiveHttpRequestLatencyHigh
   annotations:
-    message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value
-      }} seconds for requests.
+    description: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{
+      $value }} seconds for requests.
+    summary: Thanos Receive has high HTTP requests latency.
   expr: |
     (
       histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-receive.*", handler="receive"}[5m]))) > 10
@@ -414,8 +447,9 @@ rules:
     severity: critical
 - alert: ThanosReceiveHighReplicationFailures
   annotations:
-    message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize
-      }}% of requests.
+    description: Thanos Receive {{$labels.job}} is failing to replicate {{ $value
+      | humanize }}% of requests.
+    summary: Thanos Receive is having high number of replication failures.
   expr: |
     thanos_receive_replication_factor > 1
       and
@@ -437,8 +471,9 @@ rules:
     severity: warning
 - alert: ThanosReceiveHighForwardRequestFailures
   annotations:
-    message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize
-      }}% of requests.
+    description: Thanos Receive {{$labels.job}} is failing to forward {{ $value |
+      humanize }}% of requests.
+    summary: Thanos Receive is failing to forward requests.
   expr: |
     (
       sum by (job) (rate(thanos_receive_forward_requests_total{result="error", job=~"thanos-receive.*"}[5m]))
@@ -450,8 +485,9 @@ rules:
     severity: warning
 - alert: ThanosReceiveHighHashringFileRefreshFailures
   annotations:
-    message: Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{
-      $value | humanize }} of attempts failed.
+    description: Thanos Receive {{$labels.job}} is failing to refresh hashring file,
+      {{ $value | humanize }} of attempts failed.
+    summary: Thanos Receive is failing to refresh hasring file.
   expr: |
     (
       sum by (job) (rate(thanos_receive_hashrings_file_errors_total{job=~"thanos-receive.*"}[5m]))
@@ -464,7 +500,9 @@ rules:
     severity: warning
 - alert: ThanosReceiveConfigReloadFailure
   annotations:
-    message: Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.
+    description: Thanos Receive {{$labels.job}} has not been able to reload hashring
+      configurations.
+    summary: Thanos Receive has not been able to reload configuration.
   expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"})
     by (job) != 1
   for: 5m
@@ -472,8 +510,9 @@ rules:
     severity: warning
 - alert: ThanosReceiveNoUpload
   annotations:
-    message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded
-      latest data to object storage.
+    description: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not
+      uploaded latest data to object storage.
+    summary: Thanos Receive has not uploaded latest data to object storage.
   expr: |
     (up{job=~"thanos-receive.*"} - 1)
     + on (instance) # filters to only alert on current instance last 3h
@@ -491,7 +530,8 @@ name: thanos-bucket-replicate.rules
 rules:
 - alert: ThanosBucketReplicateIsDown
   annotations:
-    message: Thanos Replicate has disappeared from Prometheus target discovery.
+    description: Thanos Replicate has disappeared from Prometheus target discovery.
+    summary: Thanos Replicate has disappeared from Prometheus target discovery.
   expr: |
     absent(up{job=~"thanos-bucket-replicate.*"})
   for: 5m
@@ -499,8 +539,9 @@ rules:
     severity: critical
 - alert: ThanosBucketReplicateErrorRate
   annotations:
-    message: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts
+    description: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts
       failed.
+    summary: Thanose Replicate is failing to run.
   expr: |
     (
       sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m]))
@@ -512,8 +553,9 @@ rules:
     severity: critical
 - alert: ThanosBucketReplicateRunLatency
   annotations:
-    message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{
-      $value }} seconds for the replicate operations.
+    description: Thanos Replicate {{$labels.job}} has a 99th percentile latency of
+      {{ $value }} seconds for the replicate operations.
+    summary: Thanos Replicate has a high latency for replicate operations.
   expr: |
     (
       histogram_quantile(0.9, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20
@@ -535,7 +577,8 @@ name: thanos-component-absent.rules
 rules:
 - alert: ThanosCompactIsDown
   annotations:
-    message: ThanosCompact has disappeared from Prometheus target discovery.
+    description: ThanosCompact has disappeared from Prometheus target discovery.
+    summary: thanos component has disappeared from Prometheus target discovery.
   expr: |
     absent(up{job=~"thanos-compact.*"} == 1)
   for: 5m
@@ -543,7 +586,8 @@ rules:
     severity: critical
 - alert: ThanosQueryIsDown
   annotations:
-    message: ThanosQuery has disappeared from Prometheus target discovery.
+    description: ThanosQuery has disappeared from Prometheus target discovery.
+    summary: thanos component has disappeared from Prometheus target discovery.
   expr: |
     absent(up{job=~"thanos-query.*"} == 1)
   for: 5m
@@ -551,7 +595,8 @@ rules:
     severity: critical
 - alert: ThanosReceiveIsDown
   annotations:
-    message: ThanosReceive has disappeared from Prometheus target discovery.
+    description: ThanosReceive has disappeared from Prometheus target discovery.
+    summary: thanos component has disappeared from Prometheus target discovery.
   expr: |
     absent(up{job=~"thanos-receive.*"} == 1)
   for: 5m
@@ -559,7 +604,8 @@ rules:
     severity: critical
 - alert: ThanosRuleIsDown
   annotations:
-    message: ThanosRule has disappeared from Prometheus target discovery.
+    description: ThanosRule has disappeared from Prometheus target discovery.
+    summary: thanos component has disappeared from Prometheus target discovery.
   expr: |
     absent(up{job=~"thanos-rule.*"} == 1)
   for: 5m
@@ -567,7 +613,8 @@ rules:
     severity: critical
 - alert: ThanosSidecarIsDown
   annotations:
-    message: ThanosSidecar has disappeared from Prometheus target discovery.
+    description: ThanosSidecar has disappeared from Prometheus target discovery.
+    summary: thanos component has disappeared from Prometheus target discovery.
   expr: |
     absent(up{job=~"thanos-sidecar.*"} == 1)
   for: 5m
@@ -575,7 +622,8 @@ rules:
     severity: critical
 - alert: ThanosStoreIsDown
   annotations:
-    message: ThanosStore has disappeared from Prometheus target discovery.
+    description: ThanosStore has disappeared from Prometheus target discovery.
+    summary: thanos component has disappeared from Prometheus target discovery.
   expr: |
     absent(up{job=~"thanos-store.*"} == 1)
   for: 5m
diff --git a/examples/alerts/alerts.yaml b/examples/alerts/alerts.yaml
index ad5f75301bc..98886a3fb67 100644
--- a/examples/alerts/alerts.yaml
+++ b/examples/alerts/alerts.yaml
@@ -3,23 +3,26 @@ groups:
   rules:
   - alert: ThanosCompactMultipleRunning
     annotations:
-      message: No more than one Thanos Compact instance should be running at once.
+      description: No more than one Thanos Compact instance should be running at once.
         There are {{ $value }}
+      summary: Thanos Compact has multiple instances running.
     expr: sum(up{job=~"thanos-compact.*"}) > 1
     for: 5m
     labels:
       severity: warning
   - alert: ThanosCompactHalted
     annotations:
-      message: Thanos Compact {{$labels.job}} has failed to run and now is halted.
+      description: Thanos Compact {{$labels.job}} has failed to run and now is halted.
+      summary: Thanos Compact has failed to run ans is now halted.
     expr: thanos_compactor_halted{job=~"thanos-compact.*"} == 1
     for: 5m
     labels:
       severity: warning
   - alert: ThanosCompactHighCompactionFailures
     annotations:
-      message: Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize
-        }}% of compactions.
+      description: Thanos Compact {{$labels.job}} is failing to execute {{ $value
+        | humanize }}% of compactions.
+      summary: Thanos Compact is failing to execute compactions.
     expr: |
       (
         sum by (job) (rate(thanos_compact_group_compactions_failures_total{job=~"thanos-compact.*"}[5m]))
@@ -32,8 +35,9 @@ groups:
       severity: warning
   - alert: ThanosCompactBucketHighOperationFailures
     annotations:
-      message: Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value
-        | humanize }}% of operations.
+      description: Thanos Compact {{$labels.job}} Bucket is failing to execute {{
+        $value | humanize }}% of operations.
+      summary: Thanos Compact Bucket is having a high number of operation failures.
     expr: |
       (
         sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-compact.*"}[5m]))
@@ -46,7 +50,9 @@ groups:
       severity: warning
   - alert: ThanosCompactHasNotRun
     annotations:
-      message: Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.
+      description: Thanos Compact {{$labels.job}} has not uploaded anything for 24
+        hours.
+      summary: Thanos Compact has not uploaded anything for last 24 hours.
     expr: (time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{job=~"thanos-compact.*"}[24h])))
       / 60 / 60 > 24
     labels:
@@ -55,8 +61,9 @@ groups:
   rules:
   - alert: ThanosQueryHttpRequestQueryErrorRateHigh
     annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+      description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
         }}% of "query" requests.
+      summary: Thanos Query is failing to handle requests.
     expr: |
       (
         sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query"}[5m]))
@@ -68,8 +75,9 @@ groups:
       severity: critical
   - alert: ThanosQueryHttpRequestQueryRangeErrorRateHigh
     annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+      description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
         }}% of "query_range" requests.
+      summary: Thanos Query is failing to handle requests.
     expr: |
       (
         sum(rate(http_requests_total{code=~"5..", job=~"thanos-query.*", handler="query_range"}[5m]))
@@ -81,8 +89,9 @@ groups:
       severity: critical
   - alert: ThanosQueryGrpcServerErrorRate
     annotations:
-      message: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
+      description: Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize
         }}% of requests.
+      summary: Thanos Query is failing to handle requests.
     expr: |
       (
         sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-query.*"}[5m]))
@@ -95,8 +104,9 @@ groups:
       severity: warning
   - alert: ThanosQueryGrpcClientErrorRate
     annotations:
-      message: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize
+      description: Thanos Query {{$labels.job}} is failing to send {{ $value | humanize
         }}% of requests.
+      summary: Thanos Query is failing to send requests.
     expr: |
       (
         sum by (job) (rate(grpc_client_handled_total{grpc_code!="OK", job=~"thanos-query.*"}[5m]))
@@ -108,8 +118,9 @@ groups:
       severity: warning
   - alert: ThanosQueryHighDNSFailures
     annotations:
-      message: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing
+      description: Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing
         DNS queries for store endpoints.
+      summary: Thanos Query is having high number of DNS failures.
     expr: |
       (
         sum by (job) (rate(thanos_querier_store_apis_dns_failures_total{job=~"thanos-query.*"}[5m]))
@@ -121,8 +132,9 @@ groups:
       severity: warning
   - alert: ThanosQueryInstantLatencyHigh
     annotations:
-      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
-        }} seconds for instant queries.
+      description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{
+        $value }} seconds for instant queries.
+      summary: Thanos Query has high latency for queries.
     expr: |
       (
         histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query"}[5m]))) > 40
@@ -134,8 +146,9 @@ groups:
       severity: critical
   - alert: ThanosQueryRangeLatencyHigh
     annotations:
-      message: Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value
-        }} seconds for range queries.
+      description: Thanos Query {{$labels.job}} has a 99th percentile latency of {{
+        $value }} seconds for range queries.
+      summary: Thanos Query has high latency for queries.
     expr: |
       (
         histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-query.*", handler="query_range"}[5m]))) > 90
@@ -149,8 +162,9 @@ groups:
   rules:
   - alert: ThanosReceiveHttpRequestErrorRateHigh
     annotations:
-      message: Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize
-        }}% of requests.
+      description: Thanos Receive {{$labels.job}} is failing to handle {{ $value |
+        humanize }}% of requests.
+      summary: Thanos Receive is failing to handle requests.
     expr: |
       (
         sum(rate(http_requests_total{code=~"5..", job=~"thanos-receive.*", handler="receive"}[5m]))
@@ -162,8 +176,9 @@ groups:
       severity: critical
   - alert: ThanosReceiveHttpRequestLatencyHigh
     annotations:
-      message: Thanos Receive {{$labels.job}} has a 99th percentile latency of {{
-        $value }} seconds for requests.
+      description: Thanos Receive {{$labels.job}} has a 99th percentile latency of
+        {{ $value }} seconds for requests.
+      summary: Thanos Receive has high HTTP requests latency.
     expr: |
       (
         histogram_quantile(0.99, sum by (job, le) (rate(http_request_duration_seconds_bucket{job=~"thanos-receive.*", handler="receive"}[5m]))) > 10
@@ -175,8 +190,9 @@ groups:
       severity: critical
   - alert: ThanosReceiveHighReplicationFailures
     annotations:
-      message: Thanos Receive {{$labels.job}} is failing to replicate {{ $value |
-        humanize }}% of requests.
+      description: Thanos Receive {{$labels.job}} is failing to replicate {{ $value
+        | humanize }}% of requests.
+      summary: Thanos Receive is having high number of replication failures.
     expr: |
       thanos_receive_replication_factor > 1
         and
@@ -198,8 +214,9 @@ groups:
       severity: warning
   - alert: ThanosReceiveHighForwardRequestFailures
     annotations:
-      message: Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize
-        }}% of requests.
+      description: Thanos Receive {{$labels.job}} is failing to forward {{ $value
+        | humanize }}% of requests.
+      summary: Thanos Receive is failing to forward requests.
     expr: |
       (
         sum by (job) (rate(thanos_receive_forward_requests_total{result="error", job=~"thanos-receive.*"}[5m]))
@@ -211,8 +228,9 @@ groups:
       severity: warning
   - alert: ThanosReceiveHighHashringFileRefreshFailures
     annotations:
-      message: Thanos Receive {{$labels.job}} is failing to refresh hashring file,
+      description: Thanos Receive {{$labels.job}} is failing to refresh hashring file,
         {{ $value | humanize }} of attempts failed.
+      summary: Thanos Receive is failing to refresh hasring file.
     expr: |
       (
         sum by (job) (rate(thanos_receive_hashrings_file_errors_total{job=~"thanos-receive.*"}[5m]))
@@ -225,8 +243,9 @@ groups:
       severity: warning
   - alert: ThanosReceiveConfigReloadFailure
     annotations:
-      message: Thanos Receive {{$labels.job}} has not been able to reload hashring
+      description: Thanos Receive {{$labels.job}} has not been able to reload hashring
         configurations.
+      summary: Thanos Receive has not been able to reload configuration.
     expr: avg(thanos_receive_config_last_reload_successful{job=~"thanos-receive.*"})
       by (job) != 1
     for: 5m
@@ -234,8 +253,9 @@ groups:
       severity: warning
   - alert: ThanosReceiveNoUpload
     annotations:
-      message: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded
-        latest data to object storage.
+      description: Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not
+        uploaded latest data to object storage.
+      summary: Thanos Receive has not uploaded latest data to object storage.
     expr: |
       (up{job=~"thanos-receive.*"} - 1)
       + on (instance) # filters to only alert on current instance last 3h
@@ -247,7 +267,9 @@ groups:
   rules:
   - alert: ThanosSidecarPrometheusDown
     annotations:
-      message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} cannot connect to Prometheus.
+      description: Thanos Sidecar {{$labels.job}} {{$labels.pod}} cannot connect to
+        Prometheus.
+      summary: Thanos Sidecar cannot connect to Prometheus
     expr: |
       sum by (job, pod) (thanos_sidecar_prometheus_up{job=~"thanos-sidecar.*"} == 0)
     for: 5m
@@ -255,8 +277,9 @@ groups:
       severity: critical
   - alert: ThanosSidecarUnhealthy
     annotations:
-      message: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{
-        $value }} seconds.
+      description: Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for
+        {{ $value }} seconds.
+      summary: Thanos Sidecar is unhealthy.
     expr: |
       time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{job=~"thanos-sidecar.*"}) by (job, pod) >= 600
     labels:
@@ -265,8 +288,9 @@ groups:
   rules:
   - alert: ThanosStoreGrpcErrorRate
     annotations:
-      message: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize
+      description: Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize
         }}% of requests.
+      summary: Thanos Store is failing to handle qrpcd requests.
     expr: |
       (
         sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-store.*"}[5m]))
@@ -279,8 +303,9 @@ groups:
       severity: warning
   - alert: ThanosStoreSeriesGateLatencyHigh
     annotations:
-      message: Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value
-        }} seconds for store series gate requests.
+      description: Thanos Store {{$labels.job}} has a 99th percentile latency of {{
+        $value }} seconds for store series gate requests.
+      summary: Thanos Store has high latency for store series gate requests.
     expr: |
       (
         histogram_quantile(0.9, sum by (job, le) (rate(thanos_bucket_store_series_gate_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@@ -292,8 +317,9 @@ groups:
       severity: warning
   - alert: ThanosStoreBucketHighOperationFailures
     annotations:
-      message: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value
+      description: Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value
         | humanize }}% of operations.
+      summary: Thanos Store Bucket is failing to execute operations.
     expr: |
       (
         sum by (job) (rate(thanos_objstore_bucket_operation_failures_total{job=~"thanos-store.*"}[5m]))
@@ -306,8 +332,9 @@ groups:
       severity: warning
   - alert: ThanosStoreObjstoreOperationLatencyHigh
     annotations:
-      message: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of
-        {{ $value }} seconds for the bucket operations.
+      description: Thanos Store {{$labels.job}} Bucket has a 99th percentile latency
+        of {{ $value }} seconds for the bucket operations.
+      summary: Thanos Store is having high latency for bucket operations.
     expr: |
       (
         histogram_quantile(0.9, sum by (job, le) (rate(thanos_objstore_bucket_operation_duration_seconds_bucket{job=~"thanos-store.*"}[5m]))) > 2
@@ -321,7 +348,9 @@ groups:
   rules:
   - alert: ThanosRuleQueueIsDroppingAlerts
     annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to queue alerts.
+      description: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to queue
+        alerts.
+      summary: Thanos Rule is failing to queue alerts.
     expr: |
       sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0
     for: 5m
@@ -329,8 +358,9 @@ groups:
       severity: critical
   - alert: ThanosRuleSenderIsFailingAlerts
     annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts
-        to alertmanager.
+      description: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send
+        alerts to alertmanager.
+      summary: Thanos Rule is failing to send alerts to alertmanager.
     expr: |
       sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{job=~"thanos-rule.*"}[5m])) > 0
     for: 5m
@@ -338,8 +368,9 @@ groups:
       severity: critical
   - alert: ThanosRuleHighRuleEvaluationFailures
     annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate
+      description: Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate
         rules.
+      summary: Thanos Rule is failing to evaluate rules.
     expr: |
       (
         sum by (job) (rate(prometheus_rule_evaluation_failures_total{job=~"thanos-rule.*"}[5m]))
@@ -352,8 +383,9 @@ groups:
       severity: critical
   - alert: ThanosRuleHighRuleEvaluationWarnings
     annotations:
-      message: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation
-        warnings.
+      description: Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of
+        evaluation warnings.
+      summary: Thanos Rule has high number of evaluation warnings.
     expr: |
       sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{job=~"thanos-rule.*"}[5m])) > 0
     for: 15m
@@ -361,8 +393,9 @@ groups:
       severity: info
   - alert: ThanosRuleRuleEvaluationLatencyHigh
     annotations:
-      message: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency
-        than interval for {{$labels.rule_group}}.
+      description: Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation
+        latency than interval for {{$labels.rule_group}}.
+      summary: Thanos Rule has high rule evaluation latency.
     expr: |
       (
         sum by (job, pod, rule_group) (prometheus_rule_group_last_duration_seconds{job=~"thanos-rule.*"})
@@ -374,8 +407,9 @@ groups:
       severity: warning
   - alert: ThanosRuleGrpcErrorRate
     annotations:
-      message: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize
+      description: Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize
         }}% of requests.
+      summary: Thanos Rule is failing to handle grpc requests.
     expr: |
       (
         sum by (job) (rate(grpc_server_handled_total{grpc_code=~"Unknown|ResourceExhausted|Internal|Unavailable|DataLoss|DeadlineExceeded", job=~"thanos-rule.*"}[5m]))
@@ -388,7 +422,8 @@ groups:
       severity: warning
   - alert: ThanosRuleConfigReloadFailure
     annotations:
-      message: Thanos Rule {{$labels.job}} has not been able to reload its configuration.
+      description: Thanos Rule {{$labels.job}} has not been able to reload its configuration.
+      summary: Thanos Rule has not been able to reload configuration.
     expr: avg(thanos_rule_config_last_reload_successful{job=~"thanos-rule.*"}) by
       (job) != 1
     for: 5m
@@ -396,8 +431,9 @@ groups:
       severity: info
   - alert: ThanosRuleQueryHighDNSFailures
     annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
+      description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
         DNS queries for query endpoints.
+      summary: Thanos Rule is having high number of DNS failures.
     expr: |
       (
         sum by (job) (rate(thanos_ruler_query_apis_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@@ -410,8 +446,9 @@ groups:
       severity: warning
   - alert: ThanosRuleAlertmanagerHighDNSFailures
     annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
+      description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing
         DNS queries for Alertmanager endpoints.
+      summary: Thanos Rule is having high number of DNS failures.
     expr: |
       (
         sum by (job) (rate(thanos_ruler_alertmanagers_dns_failures_total{job=~"thanos-rule.*"}[5m]))
@@ -424,8 +461,9 @@ groups:
       severity: warning
   - alert: ThanosRuleNoEvaluationFor10Intervals
     annotations:
-      message: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups
+      description: Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups
         that did not evaluate for at least 10x of their expected interval.
+      summary: Thanos Rule has rule groups that did not evaluate for 10 intervals.
     expr: |
       time() -  max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{job=~"thanos-rule.*"})
       >
@@ -435,8 +473,9 @@ groups:
       severity: info
   - alert: ThanosNoRuleEvaluations
     annotations:
-      message: Thanos Rule {{$labels.job}} did not perform any rule evaluations in
-        the past 2 minutes.
+      description: Thanos Rule {{$labels.job}} did not perform any rule evaluations
+        in the past 2 minutes.
+      summary: Thanos Rule did not perform any rule evaluations.
     expr: |
       sum(rate(prometheus_rule_evaluations_total{job=~"thanos-rule.*"}[2m])) <= 0
         and
@@ -448,7 +487,8 @@ groups:
   rules:
   - alert: ThanosCompactIsDown
     annotations:
-      message: ThanosCompact has disappeared from Prometheus target discovery.
+      description: ThanosCompact has disappeared from Prometheus target discovery.
+      summary: thanos component has disappeared from Prometheus target discovery.
     expr: |
       absent(up{job=~"thanos-compact.*"} == 1)
     for: 5m
@@ -456,7 +496,8 @@ groups:
       severity: critical
   - alert: ThanosQueryIsDown
     annotations:
-      message: ThanosQuery has disappeared from Prometheus target discovery.
+      description: ThanosQuery has disappeared from Prometheus target discovery.
+      summary: thanos component has disappeared from Prometheus target discovery.
     expr: |
       absent(up{job=~"thanos-query.*"} == 1)
     for: 5m
@@ -464,7 +505,8 @@ groups:
       severity: critical
   - alert: ThanosReceiveIsDown
     annotations:
-      message: ThanosReceive has disappeared from Prometheus target discovery.
+      description: ThanosReceive has disappeared from Prometheus target discovery.
+      summary: thanos component has disappeared from Prometheus target discovery.
     expr: |
       absent(up{job=~"thanos-receive.*"} == 1)
     for: 5m
@@ -472,7 +514,8 @@ groups:
       severity: critical
   - alert: ThanosRuleIsDown
     annotations:
-      message: ThanosRule has disappeared from Prometheus target discovery.
+      description: ThanosRule has disappeared from Prometheus target discovery.
+      summary: thanos component has disappeared from Prometheus target discovery.
     expr: |
       absent(up{job=~"thanos-rule.*"} == 1)
     for: 5m
@@ -480,7 +523,8 @@ groups:
       severity: critical
   - alert: ThanosSidecarIsDown
     annotations:
-      message: ThanosSidecar has disappeared from Prometheus target discovery.
+      description: ThanosSidecar has disappeared from Prometheus target discovery.
+      summary: thanos component has disappeared from Prometheus target discovery.
     expr: |
       absent(up{job=~"thanos-sidecar.*"} == 1)
     for: 5m
@@ -488,7 +532,8 @@ groups:
       severity: critical
   - alert: ThanosStoreIsDown
     annotations:
-      message: ThanosStore has disappeared from Prometheus target discovery.
+      description: ThanosStore has disappeared from Prometheus target discovery.
+      summary: thanos component has disappeared from Prometheus target discovery.
     expr: |
       absent(up{job=~"thanos-store.*"} == 1)
     for: 5m
@@ -498,7 +543,8 @@ groups:
   rules:
   - alert: ThanosBucketReplicateIsDown
     annotations:
-      message: Thanos Replicate has disappeared from Prometheus target discovery.
+      description: Thanos Replicate has disappeared from Prometheus target discovery.
+      summary: Thanos Replicate has disappeared from Prometheus target discovery.
     expr: |
       absent(up{job=~"thanos-bucket-replicate.*"})
     for: 5m
@@ -506,8 +552,9 @@ groups:
       severity: critical
   - alert: ThanosBucketReplicateErrorRate
     annotations:
-      message: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts
+      description: Thanos Replicate failing to run, {{ $value | humanize }}% of attempts
         failed.
+      summary: Thanose Replicate is failing to run.
     expr: |
       (
         sum(rate(thanos_replicate_replication_runs_total{result="error", job=~"thanos-bucket-replicate.*"}[5m]))
@@ -519,8 +566,9 @@ groups:
       severity: critical
   - alert: ThanosBucketReplicateRunLatency
     annotations:
-      message: Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{
-        $value }} seconds for the replicate operations.
+      description: Thanos Replicate {{$labels.job}} has a 99th percentile latency
+        of {{ $value }} seconds for the replicate operations.
+      summary: Thanos Replicate has a high latency for replicate operations.
     expr: |
       (
         histogram_quantile(0.9, sum by (job, le) (rate(thanos_replicate_replication_run_duration_seconds_bucket{job=~"thanos-bucket-replicate.*"}[5m]))) > 20
diff --git a/examples/alerts/tests.yaml b/examples/alerts/tests.yaml
index adac87b9a4a..3d75fd7353f 100644
--- a/examples/alerts/tests.yaml
+++ b/examples/alerts/tests.yaml
@@ -79,13 +79,15 @@ tests:
         job: thanos-sidecar
         pod: thanos-sidecar-pod-0
       exp_annotations:
-        message: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-0 is unhealthy for 600 seconds.'
+        description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-0 is unhealthy for 600 seconds.'
+        summary: 'Thanos Sidecar is unhealthy.'
     - exp_labels:
         severity: critical
         job: thanos-sidecar
         pod: thanos-sidecar-pod-1
       exp_annotations:
-        message: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-1 is unhealthy for 600 seconds.'
+        description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-1 is unhealthy for 600 seconds.'
+        summary: 'Thanos Sidecar is unhealthy.'
   - eval_time: 11m
     alertname: ThanosSidecarUnhealthy
     exp_alerts:
@@ -94,13 +96,15 @@ tests:
         job: thanos-sidecar
         pod: thanos-sidecar-pod-0
       exp_annotations:
-        message: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-0 is unhealthy for 660 seconds.'
+        description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-0 is unhealthy for 660 seconds.'
+        summary: 'Thanos Sidecar is unhealthy.'
     - exp_labels:
         severity: critical
         job: thanos-sidecar
         pod: thanos-sidecar-pod-1
       exp_annotations:
-        message: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-1 is unhealthy for 660 seconds.'
+        description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-1 is unhealthy for 660 seconds.'
+        summary: 'Thanos Sidecar is unhealthy.'
   - eval_time: 12m
     alertname: ThanosSidecarUnhealthy
     exp_alerts:
@@ -109,10 +113,12 @@ tests:
         job: thanos-sidecar
         pod: thanos-sidecar-pod-0
       exp_annotations:
-        message: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-0 is unhealthy for 720 seconds.'
+        description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-0 is unhealthy for 720 seconds.'
+        summary: 'Thanos Sidecar is unhealthy.'
     - exp_labels:
         severity: critical
         job: thanos-sidecar
         pod: thanos-sidecar-pod-1
       exp_annotations:
-        message: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-1 is unhealthy for 720 seconds.'
+        description: 'Thanos Sidecar thanos-sidecar thanos-sidecar-pod-1 is unhealthy for 720 seconds.'
+        summary: 'Thanos Sidecar is unhealthy.'
diff --git a/mixin/alerts/absent.libsonnet b/mixin/alerts/absent.libsonnet
index 4c7bf6bdff1..ba1e56c3ff4 100644
--- a/mixin/alerts/absent.libsonnet
+++ b/mixin/alerts/absent.libsonnet
@@ -26,7 +26,8 @@
               severity: 'critical',
             },
             annotations: {
-              message: '%s has disappeared from Prometheus target discovery.' % name,
+              description: '%s has disappeared from Prometheus target discovery.' % name,
+              summary: 'thanos component has disappeared from Prometheus target discovery.',
             },
           }
           for name in std.objectFields(thanos.jobs)
diff --git a/mixin/alerts/bucket_replicate.libsonnet b/mixin/alerts/bucket_replicate.libsonnet
index 7517187c4e2..6235f133761 100644
--- a/mixin/alerts/bucket_replicate.libsonnet
+++ b/mixin/alerts/bucket_replicate.libsonnet
@@ -20,13 +20,15 @@
               severity: 'critical',
             },
             annotations: {
-              message: 'Thanos Replicate has disappeared from Prometheus target discovery.',
+              description: 'Thanos Replicate has disappeared from Prometheus target discovery.',
+              summary: 'Thanos Replicate has disappeared from Prometheus target discovery.',
             },
           },
           {
             alert: 'ThanosBucketReplicateErrorRate',
             annotations: {
-              message: 'Thanos Replicate failing to run, {{ $value | humanize }}% of attempts failed.',
+              description: 'Thanos Replicate failing to run, {{ $value | humanize }}% of attempts failed.',
+              summary: 'Thanose Replicate is failing to run.',
             },
             expr: |||
               (
@@ -43,7 +45,8 @@
           {
             alert: 'ThanosBucketReplicateRunLatency',
             annotations: {
-              message: 'Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for the replicate operations.',
+              description: 'Thanos Replicate {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for the replicate operations.',
+              summary: 'Thanos Replicate has a high latency for replicate operations.',
             },
             expr: |||
               (
diff --git a/mixin/alerts/compact.libsonnet b/mixin/alerts/compact.libsonnet
index 3fb8f474a5d..decc6d184b2 100644
--- a/mixin/alerts/compact.libsonnet
+++ b/mixin/alerts/compact.libsonnet
@@ -13,7 +13,8 @@
           {
             alert: 'ThanosCompactMultipleRunning',
             annotations: {
-              message: 'No more than one Thanos Compact instance should be running at once. There are {{ $value }}',
+              description: 'No more than one Thanos Compact instance should be running at once. There are {{ $value }}',
+              summary: 'Thanos Compact has multiple instances running.',
             },
             expr: 'sum(up{%(selector)s}) > 1' % thanos.compact,
             'for': '5m',
@@ -24,7 +25,8 @@
           {
             alert: 'ThanosCompactHalted',
             annotations: {
-              message: 'Thanos Compact {{$labels.job}} has failed to run and now is halted.',
+              description: 'Thanos Compact {{$labels.job}} has failed to run and now is halted.',
+              summary: 'Thanos Compact has failed to run ans is now halted.',
             },
             expr: 'thanos_compactor_halted{%(selector)s} == 1' % thanos.compact,
             'for': '5m',
@@ -35,7 +37,8 @@
           {
             alert: 'ThanosCompactHighCompactionFailures',
             annotations: {
-              message: 'Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize }}% of compactions.',
+              description: 'Thanos Compact {{$labels.job}} is failing to execute {{ $value | humanize }}% of compactions.',
+              summary: 'Thanos Compact is failing to execute compactions.',
             },
             expr: |||
               (
@@ -53,7 +56,8 @@
           {
             alert: 'ThanosCompactBucketHighOperationFailures',
             annotations: {
-              message: 'Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.',
+              description: 'Thanos Compact {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.',
+              summary: 'Thanos Compact Bucket is having a high number of operation failures.',
             },
             expr: |||
               (
@@ -71,7 +75,8 @@
           {
             alert: 'ThanosCompactHasNotRun',
             annotations: {
-              message: 'Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.',
+              description: 'Thanos Compact {{$labels.job}} has not uploaded anything for 24 hours.',
+              summary: 'Thanos Compact has not uploaded anything for last 24 hours.',
             },
             expr: '(time() - max(max_over_time(thanos_objstore_bucket_last_successful_upload_time{%(selector)s}[24h]))) / 60 / 60 > 24' % thanos.compact,
             labels: {
diff --git a/mixin/alerts/query.libsonnet b/mixin/alerts/query.libsonnet
index 6326c846a7c..fdfd5f611a1 100644
--- a/mixin/alerts/query.libsonnet
+++ b/mixin/alerts/query.libsonnet
@@ -16,7 +16,8 @@
           {
             alert: 'ThanosQueryHttpRequestQueryErrorRateHigh',
             annotations: {
-              message: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query" requests.',
+              description: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query" requests.',
+              summary: 'Thanos Query is failing to handle requests.',
             },
             expr: |||
               (
@@ -33,7 +34,8 @@
           {
             alert: 'ThanosQueryHttpRequestQueryRangeErrorRateHigh',
             annotations: {
-              message: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query_range" requests.',
+              description: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of "query_range" requests.',
+              summary: 'Thanos Query is failing to handle requests.',
             },
             expr: |||
               (
@@ -50,7 +52,8 @@
           {
             alert: 'ThanosQueryGrpcServerErrorRate',
             annotations: {
-              message: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.',
+              description: 'Thanos Query {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.',
+              summary: 'Thanos Query is failing to handle requests.',
             },
             expr: |||
               (
@@ -68,7 +71,8 @@
           {
             alert: 'ThanosQueryGrpcClientErrorRate',
             annotations: {
-              message: 'Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}% of requests.',
+              description: 'Thanos Query {{$labels.job}} is failing to send {{ $value | humanize }}% of requests.',
+              summary: 'Thanos Query is failing to send requests.',
             },
             expr: |||
               (
@@ -85,7 +89,8 @@
           {
             alert: 'ThanosQueryHighDNSFailures',
             annotations: {
-              message: 'Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS queries for store endpoints.',
+              description: 'Thanos Query {{$labels.job}} have {{ $value | humanize }}% of failing DNS queries for store endpoints.',
+              summary: 'Thanos Query is having high number of DNS failures.',
             },
             expr: |||
               (
@@ -102,7 +107,8 @@
           {
             alert: 'ThanosQueryInstantLatencyHigh',
             annotations: {
-              message: 'Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for instant queries.',
+              description: 'Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for instant queries.',
+              summary: 'Thanos Query has high latency for queries.',
             },
             expr: |||
               (
@@ -119,7 +125,8 @@
           {
             alert: 'ThanosQueryRangeLatencyHigh',
             annotations: {
-              message: 'Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for range queries.',
+              description: 'Thanos Query {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for range queries.',
+              summary: 'Thanos Query has high latency for queries.',
             },
             expr: |||
               (
diff --git a/mixin/alerts/receive.libsonnet b/mixin/alerts/receive.libsonnet
index 7c3dbee5e0e..e07bf84dbbb 100644
--- a/mixin/alerts/receive.libsonnet
+++ b/mixin/alerts/receive.libsonnet
@@ -15,7 +15,8 @@
           {
             alert: 'ThanosReceiveHttpRequestErrorRateHigh',
             annotations: {
-              message: 'Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.',
+              description: 'Thanos Receive {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.',
+              summary: 'Thanos Receive is failing to handle requests.',
             },
             expr: |||
               (
@@ -32,7 +33,8 @@
           {
             alert: 'ThanosReceiveHttpRequestLatencyHigh',
             annotations: {
-              message: 'Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for requests.',
+              description: 'Thanos Receive {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for requests.',
+              summary: 'Thanos Receive has high HTTP requests latency.',
             },
             expr: |||
               (
@@ -49,7 +51,8 @@
           {
             alert: 'ThanosReceiveHighReplicationFailures',
             annotations: {
-              message: 'Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize }}% of requests.',
+              description: 'Thanos Receive {{$labels.job}} is failing to replicate {{ $value | humanize }}% of requests.',
+              summary: 'Thanos Receive is having high number of replication failures.',
             },
             expr: |||
               thanos_receive_replication_factor > 1
@@ -76,7 +79,8 @@
           {
             alert: 'ThanosReceiveHighForwardRequestFailures',
             annotations: {
-              message: 'Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize }}% of requests.',
+              description: 'Thanos Receive {{$labels.job}} is failing to forward {{ $value | humanize }}% of requests.',
+              summary: 'Thanos Receive is failing to forward requests.',
             },
             expr: |||
               (
@@ -93,7 +97,8 @@
           {
             alert: 'ThanosReceiveHighHashringFileRefreshFailures',
             annotations: {
-              message: 'Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{ $value | humanize }} of attempts failed.',
+              description: 'Thanos Receive {{$labels.job}} is failing to refresh hashring file, {{ $value | humanize }} of attempts failed.',
+              summary: 'Thanos Receive is failing to refresh hasring file.',
             },
             expr: |||
               (
@@ -111,7 +116,8 @@
           {
             alert: 'ThanosReceiveConfigReloadFailure',
             annotations: {
-              message: 'Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.',
+              description: 'Thanos Receive {{$labels.job}} has not been able to reload hashring configurations.',
+              summary: 'Thanos Receive has not been able to reload configuration.',
             },
             expr: 'avg(thanos_receive_config_last_reload_successful{%(selector)s}) by (job) != 1' % thanos.receive,
             'for': '5m',
@@ -122,7 +128,8 @@
           {
             alert: 'ThanosReceiveNoUpload',
             annotations: {
-              message: 'Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded latest data to object storage.',
+              description: 'Thanos Receive {{ $labels.instance }} of {{$labels.job}} has not uploaded latest data to object storage.',
+              summary: 'Thanos Receive has not uploaded latest data to object storage.',
             },
             expr: |||
               (up{%(selector)s} - 1)
diff --git a/mixin/alerts/rule.libsonnet b/mixin/alerts/rule.libsonnet
index 80f6ebd58b7..552625bd201 100644
--- a/mixin/alerts/rule.libsonnet
+++ b/mixin/alerts/rule.libsonnet
@@ -15,7 +15,8 @@
           {
             alert: 'ThanosRuleQueueIsDroppingAlerts',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to queue alerts.',
+              description: 'Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to queue alerts.',
+              summary: 'Thanos Rule is failing to queue alerts.',
             },
             expr: |||
               sum by (job) (rate(thanos_alert_queue_alerts_dropped_total{%(selector)s}[5m])) > 0
@@ -28,7 +29,8 @@
           {
             alert: 'ThanosRuleSenderIsFailingAlerts',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to alertmanager.',
+              description: 'Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to send alerts to alertmanager.',
+              summary: 'Thanos Rule is failing to send alerts to alertmanager.',
             },
             expr: |||
               sum by (job) (rate(thanos_alert_sender_alerts_dropped_total{%(selector)s}[5m])) > 0
@@ -41,7 +43,8 @@
           {
             alert: 'ThanosRuleHighRuleEvaluationFailures',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate rules.',
+              description: 'Thanos Rule {{$labels.job}} {{$labels.pod}} is failing to evaluate rules.',
+              summary: 'Thanos Rule is failing to evaluate rules.',
             },
             expr: |||
               (
@@ -60,7 +63,8 @@
           {
             alert: 'ThanosRuleHighRuleEvaluationWarnings',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation warnings.',
+              description: 'Thanos Rule {{$labels.job}} {{$labels.pod}} has high number of evaluation warnings.',
+              summary: 'Thanos Rule has high number of evaluation warnings.',
             },
             expr: |||
               sum by (job) (rate(thanos_rule_evaluation_with_warnings_total{%(selector)s}[5m])) > 0
@@ -74,7 +78,8 @@
           {
             alert: 'ThanosRuleRuleEvaluationLatencyHigh',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency than interval for {{$labels.rule_group}}.',
+              description: 'Thanos Rule {{$labels.job}}/{{$labels.pod}} has higher evaluation latency than interval for {{$labels.rule_group}}.',
+              summary: 'Thanos Rule has high rule evaluation latency.',
             },
             expr: |||
               (
@@ -91,7 +96,8 @@
           {
             alert: 'ThanosRuleGrpcErrorRate',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.',
+              description: 'Thanos Rule {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.',
+              summary: 'Thanos Rule is failing to handle grpc requests.',
             },
             expr: |||
               (
@@ -109,7 +115,8 @@
           {
             alert: 'ThanosRuleConfigReloadFailure',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} has not been able to reload its configuration.',
+              description: 'Thanos Rule {{$labels.job}} has not been able to reload its configuration.',
+              summary: 'Thanos Rule has not been able to reload configuration.',
             },
             expr: 'avg(thanos_rule_config_last_reload_successful{%(selector)s}) by (job) != 1' % thanos.rule,
             'for': '5m',
@@ -120,7 +127,8 @@
           {
             alert: 'ThanosRuleQueryHighDNSFailures',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for query endpoints.',
+              description: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for query endpoints.',
+              summary: 'Thanos Rule is having high number of DNS failures.',
             },
             expr: |||
               (
@@ -138,7 +146,8 @@
           {
             alert: 'ThanosRuleAlertmanagerHighDNSFailures',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for Alertmanager endpoints.',
+              description: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% of failing DNS queries for Alertmanager endpoints.',
+              summary: 'Thanos Rule is having high number of DNS failures.',
             },
             expr: |||
               (
@@ -157,7 +166,8 @@
             // NOTE: This alert will give false positive if no rules are configured.
             alert: 'ThanosRuleNoEvaluationFor10Intervals',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.',
+              description: 'Thanos Rule {{$labels.job}} has {{ $value | humanize }}% rule groups that did not evaluate for at least 10x of their expected interval.',
+              summary: 'Thanos Rule has rule groups that did not evaluate for 10 intervals.',
             },
             expr: |||
               time() -  max by (job, group) (prometheus_rule_group_last_evaluation_timestamp_seconds{%(selector)s})
@@ -173,7 +183,8 @@
           {
             alert: 'ThanosNoRuleEvaluations',
             annotations: {
-              message: 'Thanos Rule {{$labels.job}} did not perform any rule evaluations in the past 2 minutes.',
+              description: 'Thanos Rule {{$labels.job}} did not perform any rule evaluations in the past 2 minutes.',
+              summary: 'Thanos Rule did not perform any rule evaluations.',
             },
             expr: |||
               sum(rate(prometheus_rule_evaluations_total{%(selector)s}[2m])) <= 0
diff --git a/mixin/alerts/sidecar.libsonnet b/mixin/alerts/sidecar.libsonnet
index e1790dbac63..20f217ce15e 100644
--- a/mixin/alerts/sidecar.libsonnet
+++ b/mixin/alerts/sidecar.libsonnet
@@ -11,7 +11,8 @@
           {
             alert: 'ThanosSidecarPrometheusDown',
             annotations: {
-              message: 'Thanos Sidecar {{$labels.job}} {{$labels.pod}} cannot connect to Prometheus.',
+              description: 'Thanos Sidecar {{$labels.job}} {{$labels.pod}} cannot connect to Prometheus.',
+              summary: 'Thanos Sidecar cannot connect to Prometheus',
             },
             expr: |||
               sum by (job, pod) (thanos_sidecar_prometheus_up{%(selector)s} == 0)
@@ -24,7 +25,8 @@
           {
             alert: 'ThanosSidecarUnhealthy',
             annotations: {
-              message: 'Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value }} seconds.',
+              description: 'Thanos Sidecar {{$labels.job}} {{$labels.pod}} is unhealthy for {{ $value }} seconds.',
+              summary: 'Thanos Sidecar is unhealthy.',
             },
             expr: |||
               time() - max(thanos_sidecar_last_heartbeat_success_time_seconds{%(selector)s}) by (job, pod) >= 600
diff --git a/mixin/alerts/store.libsonnet b/mixin/alerts/store.libsonnet
index 04d723ffaa5..f895b4602f5 100644
--- a/mixin/alerts/store.libsonnet
+++ b/mixin/alerts/store.libsonnet
@@ -16,7 +16,8 @@
           {
             alert: 'ThanosStoreGrpcErrorRate',
             annotations: {
-              message: 'Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.',
+              description: 'Thanos Store {{$labels.job}} is failing to handle {{ $value | humanize }}% of requests.',
+              summary: 'Thanos Store is failing to handle qrpcd requests.',
             },
             expr: |||
               (
@@ -34,7 +35,8 @@
           {
             alert: 'ThanosStoreSeriesGateLatencyHigh',
             annotations: {
-              message: 'Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for store series gate requests.',
+              description: 'Thanos Store {{$labels.job}} has a 99th percentile latency of {{ $value }} seconds for store series gate requests.',
+              summary: 'Thanos Store has high latency for store series gate requests.',
             },
             expr: |||
               (
@@ -51,7 +53,8 @@
           {
             alert: 'ThanosStoreBucketHighOperationFailures',
             annotations: {
-              message: 'Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.',
+              description: 'Thanos Store {{$labels.job}} Bucket is failing to execute {{ $value | humanize }}% of operations.',
+              summary: 'Thanos Store Bucket is failing to execute operations.',
             },
             expr: |||
               (
@@ -69,7 +72,8 @@
           {
             alert: 'ThanosStoreObjstoreOperationLatencyHigh',
             annotations: {
-              message: 'Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{ $value }} seconds for the bucket operations.',
+              description: 'Thanos Store {{$labels.job}} Bucket has a 99th percentile latency of {{ $value }} seconds for the bucket operations.',
+              summary: 'Thanos Store is having high latency for bucket operations.',
             },
             expr: |||
               (

From d17cd0e89c6ce696d86813026c8278a5759da507 Mon Sep 17 00:00:00 2001
From: Ben Ye <yb532204897@gmail.com>
Date: Mon, 17 Aug 2020 09:19:13 -0400
Subject: [PATCH 2/6] add instrumentation middlewares to query frontend (#3030)

Signed-off-by: Ben Ye <yb532204897@gmail.com>
---
 cmd/thanos/query-frontend.go      | 41 ++++++++++++++++++++++++++-----
 docs/components/query-frontend.md |  7 ++++++
 2 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/cmd/thanos/query-frontend.go b/cmd/thanos/query-frontend.go
index f7e376d97cb..2e05cb62b4b 100644
--- a/cmd/thanos/query-frontend.go
+++ b/cmd/thanos/query-frontend.go
@@ -7,6 +7,7 @@ import (
 	"net/http"
 	"time"
 
+	"github.com/NYTimes/gziphandler"
 	"github.com/cortexproject/cortex/pkg/querier/frontend"
 	"github.com/cortexproject/cortex/pkg/querier/queryrange"
 	"github.com/go-kit/kit/log"
@@ -22,10 +23,14 @@ import (
 	"github.com/thanos-io/thanos/pkg/component"
 	"github.com/thanos-io/thanos/pkg/extflag"
 	"github.com/thanos-io/thanos/pkg/extprom"
+	extpromhttp "github.com/thanos-io/thanos/pkg/extprom/http"
+	"github.com/thanos-io/thanos/pkg/logging"
 	"github.com/thanos-io/thanos/pkg/prober"
 	"github.com/thanos-io/thanos/pkg/queryfrontend"
 	"github.com/thanos-io/thanos/pkg/queryfrontend/cache"
 	httpserver "github.com/thanos-io/thanos/pkg/server/http"
+	"github.com/thanos-io/thanos/pkg/server/http/middleware"
+	"github.com/thanos-io/thanos/pkg/tracing"
 )
 
 type queryFrontendConfig struct {
@@ -35,6 +40,8 @@ type queryFrontendConfig struct {
 	downstreamURL        string
 	compressResponses    bool
 	LogQueriesLongerThan time.Duration
+
+	requestLoggingDecision string
 }
 
 type queryRangeConfig struct {
@@ -77,6 +84,8 @@ func (c *queryFrontendConfig) registerFlag(cmd *kingpin.CmdClause) {
 
 	cmd.Flag("query-frontend.log_queries_longer_than", "Log queries that are slower than the specified duration. "+
 		"Set to 0 to disable. Set to < 0 to enable on all queries.").Default("0").DurationVar(&c.LogQueriesLongerThan)
+
+	cmd.Flag("log.request.decision", "Request Logging for logging the start and end of requests. LogFinishCall is enabled by default. LogFinishCall : Logs the finish call of the requests. LogStartAndFinishCall : Logs the start and finish call of the requests. NoLogCall : Disable request logging.").Default("LogFinishCall").EnumVar(&c.requestLoggingDecision, "NoLogCall", "LogFinishCall", "LogStartAndFinishCall")
 }
 
 func registerQueryFrontend(m map[string]setupFunc, app *kingpin.Application) {
@@ -85,8 +94,8 @@ func registerQueryFrontend(m map[string]setupFunc, app *kingpin.Application) {
 	conf := &queryFrontendConfig{}
 	conf.registerFlag(cmd)
 
-	m[comp.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, _ opentracing.Tracer, _ <-chan struct{}, _ bool) error {
-		return runQueryFrontend(g, logger, reg, conf, comp)
+	m[comp.String()] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ <-chan struct{}, _ bool) error {
+		return runQueryFrontend(g, logger, reg, tracer, conf, comp)
 	}
 }
 
@@ -94,6 +103,7 @@ func runQueryFrontend(
 	g *run.Group,
 	logger log.Logger,
 	reg *prometheus.Registry,
+	tracer opentracing.Tracer,
 	conf *queryFrontendConfig,
 	comp component.Component,
 ) error {
@@ -153,6 +163,13 @@ func runQueryFrontend(
 		prober.NewInstrumentation(comp, logger, extprom.WrapRegistererWithPrefix("thanos_", reg)),
 	)
 
+	// Configure Request Logging for HTTP calls.
+	opts := []logging.Option{logging.WithDecider(func() logging.Decision {
+		return logging.LogDecision[conf.requestLoggingDecision]
+	})}
+	logMiddleware := logging.NewHTTPServerMiddleware(logger, opts...)
+	ins := extpromhttp.NewInstrumentationMiddleware(reg)
+
 	// Start metrics HTTP server.
 	{
 		srv := httpserver.New(logger, reg, comp, httpProbe,
@@ -160,14 +177,26 @@ func runQueryFrontend(
 			httpserver.WithGracePeriod(time.Duration(conf.http.gracePeriod)),
 		)
 
-		injectf := func(f http.HandlerFunc) http.HandlerFunc {
+		instr := func(f http.HandlerFunc) http.HandlerFunc {
 			hf := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-				// Cortex frontend middlewares require orgID.
-				f.ServeHTTP(w, r.WithContext(user.InjectOrgID(r.Context(), "fake")))
+				name := "query-frontend"
+				ins.NewHandler(
+					name,
+					logMiddleware.HTTPMiddleware(
+						name,
+						tracing.HTTPMiddleware(
+							tracer,
+							name,
+							logger,
+							gziphandler.GzipHandler(middleware.RequestID(f)),
+						),
+					),
+					// Cortex frontend middlewares require orgID.
+				).ServeHTTP(w, r.WithContext(user.InjectOrgID(r.Context(), "fake")))
 			})
 			return hf
 		}
-		srv.Handle("/", injectf(fe.Handler().ServeHTTP))
+		srv.Handle("/", instr(fe.Handler().ServeHTTP))
 
 		g.Add(func() error {
 			statusProber.Healthy()
diff --git a/docs/components/query-frontend.md b/docs/components/query-frontend.md
index 23ffa9eb743..80211e71e63 100644
--- a/docs/components/query-frontend.md
+++ b/docs/components/query-frontend.md
@@ -125,5 +125,12 @@ Flags:
                               Log queries that are slower than the specified
                               duration. Set to 0 to disable. Set to < 0 to
                               enable on all queries.
+      --log.request.decision=LogFinishCall
+                              Request Logging for logging the start and end of
+                              requests. LogFinishCall is enabled by default.
+                              LogFinishCall : Logs the finish call of the
+                              requests. LogStartAndFinishCall : Logs the start
+                              and finish call of the requests. NoLogCall :
+                              Disable request logging.
 
 ```

From ef3bbaef313e4d2e77f30e7d76ad18466b48d1f1 Mon Sep 17 00:00:00 2001
From: Ben Ye <yb532204897@gmail.com>
Date: Mon, 17 Aug 2020 12:27:41 -0400
Subject: [PATCH 3/6] Add more validations for memcached client config (#3034)

* Add more rules when validting memcached config

Signed-off-by: Ben Ye <yb532204897@gmail.com>

* update error message description

Co-authored-by: Marco Pracucci <marco@pracucci.com>
Signed-off-by: Ben Ye <yb532204897@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 pkg/cacheutil/memcached_client.go      | 16 ++++++++++++++--
 pkg/cacheutil/memcached_client_test.go | 23 +++++++++++++++++++++--
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/pkg/cacheutil/memcached_client.go b/pkg/cacheutil/memcached_client.go
index 90431d1251d..1ac20d436b6 100644
--- a/pkg/cacheutil/memcached_client.go
+++ b/pkg/cacheutil/memcached_client.go
@@ -38,8 +38,10 @@ const (
 )
 
 var (
-	errMemcachedAsyncBufferFull = errors.New("the async buffer is full")
-	errMemcachedConfigNoAddrs   = errors.New("no memcached addresses provided")
+	errMemcachedAsyncBufferFull                = errors.New("the async buffer is full")
+	errMemcachedConfigNoAddrs                  = errors.New("no memcached addresses provided")
+	errMemcachedDNSUpdateIntervalNotPositive   = errors.New("DNS provider update interval must be positive")
+	errMemcachedMaxAsyncConcurrencyNotPositive = errors.New("max async concurrency must be positive")
 
 	defaultMemcachedClientConfig = MemcachedClientConfig{
 		Timeout:                   500 * time.Millisecond,
@@ -120,6 +122,16 @@ func (c *MemcachedClientConfig) validate() error {
 		return errMemcachedConfigNoAddrs
 	}
 
+	// Avoid panic in time ticker.
+	if c.DNSProviderUpdateInterval <= 0 {
+		return errMemcachedDNSUpdateIntervalNotPositive
+	}
+
+	// Set async only available when MaxAsyncConcurrency > 0.
+	if c.MaxAsyncConcurrency <= 0 {
+		return errMemcachedMaxAsyncConcurrencyNotPositive
+	}
+
 	return nil
 }
 
diff --git a/pkg/cacheutil/memcached_client_test.go b/pkg/cacheutil/memcached_client_test.go
index 02a96bf61f4..61729e7a575 100644
--- a/pkg/cacheutil/memcached_client_test.go
+++ b/pkg/cacheutil/memcached_client_test.go
@@ -27,16 +27,35 @@ func TestMemcachedClientConfig_validate(t *testing.T) {
 	}{
 		"should pass on valid config": {
 			config: MemcachedClientConfig{
-				Addresses: []string{"127.0.0.1:11211"},
+				Addresses:                 []string{"127.0.0.1:11211"},
+				MaxAsyncConcurrency:       1,
+				DNSProviderUpdateInterval: time.Second,
 			},
 			expected: nil,
 		},
 		"should fail on no addresses": {
 			config: MemcachedClientConfig{
-				Addresses: []string{},
+				Addresses:                 []string{},
+				MaxAsyncConcurrency:       1,
+				DNSProviderUpdateInterval: time.Second,
 			},
 			expected: errMemcachedConfigNoAddrs,
 		},
+		"should fail on max_async_concurrency <= 0": {
+			config: MemcachedClientConfig{
+				Addresses:                 []string{"127.0.0.1:11211"},
+				MaxAsyncConcurrency:       0,
+				DNSProviderUpdateInterval: time.Second,
+			},
+			expected: errMemcachedMaxAsyncConcurrencyNotPositive,
+		},
+		"should fail on dns_provider_update_interval <= 0": {
+			config: MemcachedClientConfig{
+				Addresses:           []string{"127.0.0.1:11211"},
+				MaxAsyncConcurrency: 1,
+			},
+			expected: errMemcachedDNSUpdateIntervalNotPositive,
+		},
 	}
 
 	for testName, testData := range tests {

From 032771b278722f7970f1e768f349f259bdeb56b5 Mon Sep 17 00:00:00 2001
From: Ben Ye <yb532204897@gmail.com>
Date: Tue, 18 Aug 2020 04:26:30 -0400
Subject: [PATCH 4/6] update storeMatch[] usage (#3044)

Signed-off-by: Ben Ye <yb532204897@gmail.com>
---
 CHANGELOG.md             | 2 +-
 docs/components/query.md | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9bed66cb5e1..56aefcc1e8d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -47,7 +47,7 @@ We use *breaking* word for marking changes that are not backward compatible (rel
 - [#2893](https://github.com/thanos-io/thanos/pull/2893) Store: Rename metric `thanos_bucket_store_cached_postings_compression_time_seconds` to `thanos_bucket_store_cached_postings_compression_time_seconds_total`.
 - [#2915](https://github.com/thanos-io/thanos/pull/2915) Receive,Ruler: Enable TSDB directory locking by default. Add a new flag (`--tsdb.no-lockfile`) to override behavior.
 - [#2902](https://github.com/thanos-io/thanos/pull/2902) ui: React: Separate dedupe and partial response checkboxes per panel.
-- [#2931](https://github.com/thanos-io/thanos/pull/2931) Query: Allow passing a `storeMatcher[]` to select matching stores when debugging the querier. See [documentation](https://thanos.io/components/query.md/#store-filtering)
+- [#2931](https://github.com/thanos-io/thanos/pull/2931) Query: Allow passing a `storeMatch[]` to select matching stores when debugging the querier. See [documentation](https://thanos.io/components/query.md/#store-filtering)
 - [#2991](https://github.com/thanos-io/thanos/pull/2991) store: `operation` label value `getrange` changed to `get_range` for `thanos_store_bucket_cache_operation_requests_total` and `thanos_store_bucket_cache_operation_hits_total` to be consistent with bucket operation metrics.
 - [#2876](https://github.com/thanos-io/thanos/pull/2876) Receive,Ruler: Updated TSDB and switched to ChunkIterators instead of sample one, which avoids unnecessary decoding / encoding.
 
diff --git a/docs/components/query.md b/docs/components/query.md
index 94d7fe2fa7b..bf7808ac1b3 100644
--- a/docs/components/query.md
+++ b/docs/components/query.md
@@ -226,7 +226,7 @@ Keep in mind that the maximum number of concurrent queries that are handled by q
 
 It's possible to provide a set of matchers to the Querier api to select specific stores to be used during the query using the `storeMatch[]` parameter. It is useful when debugging a slow/broken store.
 It uses the same format as the matcher of [Prometheus' federate api](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers).
-Note that at the moment the querier only supports the `__address__` which contain the address of the store as it is shown on the `/stores` endoint of the UI.
+Note that at the moment the querier only supports the `__address__` which contain the address of the store as it is shown on the `/stores` endpoint of the UI.
 
 Example:
 ```
@@ -236,7 +236,7 @@ Example:
 ```
 
 ```
-http://localhost:10901/api/v1/query?query=up&dedup=true&partial_response=true&storeMatch={__address__=~"prometheus-foo.*"}
+http://localhost:10901/api/v1/query?query=up&dedup=true&partial_response=true&storeMatch[]={__address__=~"prometheus-foo.*"}
 ```
 
 Will only return metrics from `prometheus-foo.thanos-sidecar:10901`

From cd704d07f07a7fed2420ee697beb66ce7ec4dc80 Mon Sep 17 00:00:00 2001
From: Prem Kumar <prmsrswt@gmail.com>
Date: Tue, 18 Aug 2020 15:14:24 +0530
Subject: [PATCH 5/6] ui: Fix too many redirects when using query frontend
 (#3049)

Signed-off-by: Prem Kumar <prmsrswt@gmail.com>
---
 pkg/ui/query.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/ui/query.go b/pkg/ui/query.go
index a7440ced4d9..53f7bd13396 100644
--- a/pkg/ui/query.go
+++ b/pkg/ui/query.go
@@ -84,7 +84,7 @@ func (q *Query) Register(r *route.Router, ins extpromhttp.InstrumentationMiddlew
 	// and which breaks users with a --web.route-prefix that deviates from the path derived
 	// from the external URL.
 	r.Get("/new", func(w http.ResponseWriter, r *http.Request) {
-		http.Redirect(w, r, path.Join(GetWebPrefix(q.logger, q.externalPrefix, q.prefixHeader, r), "new")+"/", http.StatusFound)
+		http.Redirect(w, r, path.Join(GetWebPrefix(q.logger, q.externalPrefix, q.prefixHeader, r), "new")+"/graph", http.StatusFound)
 	})
 	r.Get("/new/*filepath", instrf("react-static", q.serveReactUI))
 

From 73a75e638596d314ab922c244d8dfc43a37fd3df Mon Sep 17 00:00:00 2001
From: Max Neverov <1296281+mneverov@users.noreply.github.com>
Date: Tue, 18 Aug 2020 20:46:58 +0200
Subject: [PATCH 6/6] Replace leaktest with goleak (#3029)

Signed-off-by: Max Neverov <neverov.max@gmail.com>
---
 cmd/thanos/receive.go                         |  1 +
 go.mod                                        |  2 +-
 go.sum                                        |  3 +-
 pkg/api/query/v1_test.go                      |  7 +--
 pkg/block/block_test.go                       | 12 ++---
 pkg/block/metadata/deletionmark_test.go       |  9 ++--
 pkg/cacheutil/cacheutil_test.go               | 14 ++++++
 pkg/cacheutil/memcached_client_test.go        |  9 ----
 .../memcached_server_selector_test.go         | 13 +-----
 pkg/compact/downsample/aggr_test.go           |  5 +-
 pkg/compact/downsample/downsample_test.go     | 13 +++---
 pkg/objstore/objtesting/foreach.go            |  6 +--
 pkg/pool/pool_test.go                         | 13 +++---
 .../test-storeset-pre-v0.8.0/storeset_test.go | 16 +++----
 pkg/query/querier_test.go                     | 21 ---------
 pkg/query/query_test.go                       | 14 ++++++
 pkg/query/storeset_test.go                    |  8 +---
 pkg/receive/config.go                         |  6 +--
 pkg/receive/config_test.go                    | 46 +++++++++----------
 pkg/receive/handler_test.go                   |  6 +--
 pkg/receive/multitsdb_test.go                 |  5 +-
 pkg/receive/receive_test.go                   | 14 ++++++
 pkg/reloader/reloader_test.go                 | 13 +++---
 pkg/rules/manager_test.go                     | 12 +++--
 pkg/rules/prometheus_test.go                  |  5 +-
 pkg/rules/rules_test.go                       |  4 ++
 pkg/store/bucket_test.go                      | 23 ++++++----
 pkg/store/cache/cache_test.go                 |  5 ++
 pkg/store/cache/inmemory_test.go              | 12 -----
 pkg/store/cache/memcached_test.go             |  2 -
 pkg/store/multitsdb_test.go                   | 13 +++---
 pkg/store/prometheus_test.go                  | 27 ++++-------
 pkg/store/proxy_test.go                       | 28 +++++------
 pkg/store/tsdb_test.go                        | 16 +++----
 pkg/testutil/testutil.go                      | 25 ++++++++++
 pkg/tracing/stackdriver/tracer_test.go        |  8 ++--
 36 files changed, 222 insertions(+), 214 deletions(-)
 create mode 100644 pkg/cacheutil/cacheutil_test.go
 create mode 100644 pkg/query/query_test.go
 create mode 100644 pkg/receive/receive_test.go

diff --git a/cmd/thanos/receive.go b/cmd/thanos/receive.go
index f150faa7115..ef56d0a0441 100644
--- a/cmd/thanos/receive.go
+++ b/cmd/thanos/receive.go
@@ -373,6 +373,7 @@ func runReceive(
 		if cw != nil {
 			// Check the hashring configuration on before running the watcher.
 			if err := cw.ValidateConfig(); err != nil {
+				cw.Stop()
 				close(updates)
 				return errors.Wrap(err, "failed to validate hashring configuration file")
 			}
diff --git a/go.mod b/go.mod
index f93287a7ed9..63b24c27e39 100644
--- a/go.mod
+++ b/go.mod
@@ -16,7 +16,6 @@ require (
 	github.com/davecgh/go-spew v1.1.1
 	github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
 	github.com/fatih/structtag v1.1.0
-	github.com/fortytw2/leaktest v1.3.0
 	github.com/fsnotify/fsnotify v1.4.7
 	github.com/go-kit/kit v0.10.0
 	github.com/go-openapi/strfmt v0.19.5
@@ -56,6 +55,7 @@ require (
 	go.elastic.co/apm/module/apmot v1.5.0
 	go.uber.org/atomic v1.6.0
 	go.uber.org/automaxprocs v1.2.0
+	go.uber.org/goleak v1.1.0
 	golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de
 	golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
 	golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208
diff --git a/go.sum b/go.sum
index 69bb84b59aa..d99b5acb175 100644
--- a/go.sum
+++ b/go.sum
@@ -272,7 +272,6 @@ github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL
 github.com/fatih/structtag v1.1.0 h1:6j4mUV/ES2duvnAzKMFkN6/A5mCaNYPD3xfbAkLLOF8=
 github.com/fatih/structtag v1.1.0/go.mod h1:mBJUNpUnHmRKrKlQQlmCrh5PuhftFbNv8Ys4/aAZl94=
 github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
-github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
 github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
 github.com/franela/goblin v0.0.0-20200105215937-c9ffbefa60db/go.mod h1:7dvUGVsVBjqR7JHJk0brhHOZYGmfBYOrK0ZhYMEtBr4=
 github.com/franela/goreq v0.0.0-20171204163338-bcd34c9993f8/go.mod h1:ZhphrRTfi2rbfLwlschooIH4+wKKDR4Pdxhh+TRoA20=
@@ -1042,6 +1041,8 @@ go.uber.org/automaxprocs v1.2.0 h1:+RUihKM+nmYUoB9w0D0Ov5TJ2PpFO2FgenTxMJiZBZA=
 go.uber.org/automaxprocs v1.2.0/go.mod h1:YfO3fm683kQpzETxlTGZhGIVmXAhaw3gxeBADbpZtnU=
 go.uber.org/goleak v1.0.0 h1:qsup4IcBdlmsnGfqyLl4Ntn3C2XCCuKAE7DwHpScyUo=
 go.uber.org/goleak v1.0.0/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
+go.uber.org/goleak v1.1.0 h1:MJDxhkyAAWXEJf/y4NSOPYD/bBx7JAzIjUbv12/4FFs=
+go.uber.org/goleak v1.1.0/go.mod h1:8a7PlsEVH3e/a/GLqe5IIrQx6GzcnRmZEufDUTk4A7A=
 go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0=
 go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4=
 go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4=
diff --git a/pkg/api/query/v1_test.go b/pkg/api/query/v1_test.go
index 91c222a3d88..05a91dcc15d 100644
--- a/pkg/api/query/v1_test.go
+++ b/pkg/api/query/v1_test.go
@@ -29,7 +29,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/prometheus/common/route"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/pkg/timestamp"
@@ -51,9 +50,11 @@ import (
 	"github.com/thanos-io/thanos/pkg/testutil/testpromcompatibility"
 )
 
-func TestEndpoints(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+func TestMain(m *testing.M) {
+	testutil.TolerantVerifyLeakMain(m)
+}
 
+func TestEndpoints(t *testing.T) {
 	lbls := []labels.Labels{
 		{
 			labels.Label{Name: "__name__", Value: "test_metric1"},
diff --git a/pkg/block/block_test.go b/pkg/block/block_test.go
index 2c35197e7c9..1df38bce718 100644
--- a/pkg/block/block_test.go
+++ b/pkg/block/block_test.go
@@ -14,8 +14,8 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
+	"github.com/oklog/ulid"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	promtest "github.com/prometheus/client_golang/prometheus/testutil"
@@ -24,8 +24,6 @@ import (
 	"github.com/thanos-io/thanos/pkg/objstore"
 	"github.com/thanos-io/thanos/pkg/testutil"
 	"github.com/thanos-io/thanos/pkg/testutil/e2eutil"
-
-	"github.com/oklog/ulid"
 )
 
 func TestIsBlockDir(t *testing.T) {
@@ -75,7 +73,7 @@ func TestIsBlockDir(t *testing.T) {
 }
 
 func TestUpload(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	ctx := context.Background()
 
@@ -179,8 +177,7 @@ func TestUpload(t *testing.T) {
 }
 
 func TestDelete(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
+	defer testutil.TolerantVerifyLeak(t)
 	ctx := context.Background()
 
 	tmpDir, err := ioutil.TempDir("", "test-block-delete")
@@ -226,8 +223,7 @@ func TestDelete(t *testing.T) {
 }
 
 func TestMarkForDeletion(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
+	defer testutil.TolerantVerifyLeak(t)
 	ctx := context.Background()
 
 	tmpDir, err := ioutil.TempDir("", "test-block-mark-for-delete")
diff --git a/pkg/block/metadata/deletionmark_test.go b/pkg/block/metadata/deletionmark_test.go
index 02f259f856d..220c9dc1718 100644
--- a/pkg/block/metadata/deletionmark_test.go
+++ b/pkg/block/metadata/deletionmark_test.go
@@ -13,16 +13,19 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
+	"go.uber.org/goleak"
+
 	"github.com/thanos-io/thanos/pkg/objstore"
 	"github.com/thanos-io/thanos/pkg/testutil"
 )
 
-func TestReadDeletionMark(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+func TestMain(m *testing.M) {
+	goleak.VerifyTestMain(m)
+}
 
+func TestReadDeletionMark(t *testing.T) {
 	ctx := context.Background()
 
 	tmpDir, err := ioutil.TempDir("", "test-read-deletion-mark")
diff --git a/pkg/cacheutil/cacheutil_test.go b/pkg/cacheutil/cacheutil_test.go
new file mode 100644
index 00000000000..4c422fa4822
--- /dev/null
+++ b/pkg/cacheutil/cacheutil_test.go
@@ -0,0 +1,14 @@
+// Copyright (c) The Thanos Authors.
+// Licensed under the Apache License 2.0.
+
+package cacheutil
+
+import (
+	"testing"
+
+	"go.uber.org/goleak"
+)
+
+func TestMain(m *testing.M) {
+	goleak.VerifyTestMain(m)
+}
diff --git a/pkg/cacheutil/memcached_client_test.go b/pkg/cacheutil/memcached_client_test.go
index 61729e7a575..209de5dfb76 100644
--- a/pkg/cacheutil/memcached_client_test.go
+++ b/pkg/cacheutil/memcached_client_test.go
@@ -10,7 +10,6 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
@@ -66,8 +65,6 @@ func TestMemcachedClientConfig_validate(t *testing.T) {
 }
 
 func TestNewMemcachedClient(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	// Should return error on empty YAML config.
 	conf := []byte{}
 	cache, err := NewMemcachedClient(log.NewNopLogger(), "test", conf, nil)
@@ -130,8 +127,6 @@ dns_provider_update_interval: 1s
 }
 
 func TestMemcachedClient_SetAsync(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	ctx := context.Background()
 	config := defaultMemcachedClientConfig
 	config.Addresses = []string{"127.0.0.1:11211"}
@@ -157,8 +152,6 @@ func TestMemcachedClient_SetAsync(t *testing.T) {
 }
 
 func TestMemcachedClient_SetAsyncWithCustomMaxItemSize(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	ctx := context.Background()
 	config := defaultMemcachedClientConfig
 	config.Addresses = []string{"127.0.0.1:11211"}
@@ -185,8 +178,6 @@ func TestMemcachedClient_SetAsyncWithCustomMaxItemSize(t *testing.T) {
 }
 
 func TestMemcachedClient_GetMulti(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	tests := map[string]struct {
 		maxBatchSize          int
 		maxConcurrency        int
diff --git a/pkg/cacheutil/memcached_server_selector_test.go b/pkg/cacheutil/memcached_server_selector_test.go
index ab5848bb077..a827d2c1290 100644
--- a/pkg/cacheutil/memcached_server_selector_test.go
+++ b/pkg/cacheutil/memcached_server_selector_test.go
@@ -7,12 +7,11 @@ import (
 	"fmt"
 	"net"
 	"testing"
-	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/facette/natsort"
-	"github.com/fortytw2/leaktest"
 	"github.com/pkg/errors"
+
 	"github.com/thanos-io/thanos/pkg/testutil"
 )
 
@@ -42,8 +41,6 @@ func TestNatSort(t *testing.T) {
 }
 
 func TestMemcachedJumpHashSelector_PickServer(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	tests := []struct {
 		addrs        []string
 		key          string
@@ -90,8 +87,6 @@ func TestMemcachedJumpHashSelector_PickServer(t *testing.T) {
 }
 
 func TestMemcachedJumpHashSelector_Each_ShouldRespectServersOrdering(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	tests := []struct {
 		input    []string
 		expected []string
@@ -123,8 +118,6 @@ func TestMemcachedJumpHashSelector_Each_ShouldRespectServersOrdering(t *testing.
 }
 
 func TestMemcachedJumpHashSelector_PickServer_ShouldEvenlyDistributeKeysToServers(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	servers := []string{"127.0.0.1:11211", "127.0.0.2:11211", "127.0.0.3:11211"}
 	selector := MemcachedJumpHashSelector{}
 	testutil.Ok(t, selector.SetServers(servers...))
@@ -151,8 +144,6 @@ func TestMemcachedJumpHashSelector_PickServer_ShouldEvenlyDistributeKeysToServer
 }
 
 func TestMemcachedJumpHashSelector_PickServer_ShouldUseConsistentHashing(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	servers := []string{
 		"127.0.0.1:11211",
 		"127.0.0.2:11211",
@@ -205,8 +196,6 @@ func TestMemcachedJumpHashSelector_PickServer_ShouldUseConsistentHashing(t *test
 }
 
 func TestMemcachedJumpHashSelector_PickServer_ShouldReturnErrNoServersOnNoServers(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	s := MemcachedJumpHashSelector{}
 	_, err := s.PickServer("foo")
 	testutil.Equals(t, memcache.ErrNoServers, err)
diff --git a/pkg/compact/downsample/aggr_test.go b/pkg/compact/downsample/aggr_test.go
index 62a1b7fe1ab..52f92b65df2 100644
--- a/pkg/compact/downsample/aggr_test.go
+++ b/pkg/compact/downsample/aggr_test.go
@@ -5,16 +5,13 @@ package downsample
 
 import (
 	"testing"
-	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
+
 	"github.com/thanos-io/thanos/pkg/testutil"
 )
 
 func TestAggrChunk(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	var input [5][]sample
 
 	input[AggrCount] = []sample{{100, 30}, {200, 50}, {300, 60}, {400, 67}}
diff --git a/pkg/compact/downsample/downsample_test.go b/pkg/compact/downsample/downsample_test.go
index 15360a29987..bfbd32ac75e 100644
--- a/pkg/compact/downsample/downsample_test.go
+++ b/pkg/compact/downsample/downsample_test.go
@@ -10,9 +10,7 @@ import (
 	"path/filepath"
 	"sort"
 	"testing"
-	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -23,13 +21,18 @@ import (
 	"github.com/prometheus/prometheus/tsdb/chunks"
 	"github.com/prometheus/prometheus/tsdb/index"
 	"github.com/prometheus/prometheus/tsdb/tombstones"
+	"go.uber.org/goleak"
+
 	"github.com/thanos-io/thanos/pkg/block"
 	"github.com/thanos-io/thanos/pkg/block/metadata"
 	"github.com/thanos-io/thanos/pkg/testutil"
 )
 
-func TestDownsampleCounterBoundaryReset(t *testing.T) {
+func TestMain(m *testing.M) {
+	goleak.VerifyTestMain(m)
+}
 
+func TestDownsampleCounterBoundaryReset(t *testing.T) {
 	toAggrChunks := func(t *testing.T, cm []chunks.Meta) (res []*AggrChunk) {
 		for i := range cm {
 			achk, ok := cm[i].Chunk.(*AggrChunk)
@@ -207,8 +210,6 @@ var (
 )
 
 func TestDownsample(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	type downsampleTestCase struct {
 		name string
 
@@ -594,8 +595,6 @@ var (
 )
 
 func TestApplyCounterResetsIterator(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	for _, tcase := range []struct {
 		name string
 
diff --git a/pkg/objstore/objtesting/foreach.go b/pkg/objstore/objtesting/foreach.go
index a4eb15d5211..3cc54a27e02 100644
--- a/pkg/objstore/objtesting/foreach.go
+++ b/pkg/objstore/objtesting/foreach.go
@@ -74,7 +74,7 @@ func ForeachStore(t *testing.T, testFn func(t *testing.T, bkt objstore.Bucket))
 			t.Parallel()
 			defer closeFn()
 
-			// TODO(bwplotka): Add leaktest when https://github.com/GoogleCloudPlatform/google-cloud-go/issues/1025 is resolved.
+			// TODO(bwplotka): Add goleak when https://github.com/GoogleCloudPlatform/google-cloud-go/issues/1025 is resolved.
 			testFn(t, bkt)
 		})
 	}
@@ -89,8 +89,8 @@ func ForeachStore(t *testing.T, testFn func(t *testing.T, bkt objstore.Bucket))
 			t.Parallel()
 			defer closeFn()
 
-			// TODO(bwplotka): Add leaktest when we fix potential leak in minio library.
-			// We cannot use leaktest for detecting our own potential leaks, when leaktest detects leaks in minio itself.
+			// TODO(bwplotka): Add goleak when we fix potential leak in minio library.
+			// We cannot use goleak for detecting our own potential leaks, when goleak detects leaks in minio itself.
 			// This needs to be investigated more.
 
 			testFn(t, bkt)
diff --git a/pkg/pool/pool_test.go b/pkg/pool/pool_test.go
index 8bf3c302c3b..6e530ac0ef3 100644
--- a/pkg/pool/pool_test.go
+++ b/pkg/pool/pool_test.go
@@ -10,11 +10,16 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/pkg/errors"
+	"go.uber.org/goleak"
+
 	"github.com/thanos-io/thanos/pkg/testutil"
 )
 
+func TestMain(m *testing.M) {
+	goleak.VerifyTestMain(m)
+}
+
 func TestBytesPool(t *testing.T) {
 	chunkPool, err := NewBucketedBytesPool(10, 100, 2, 1000)
 	testutil.Ok(t, err)
@@ -63,7 +68,6 @@ func TestBytesPool(t *testing.T) {
 func TestRacePutGet(t *testing.T) {
 	chunkPool, err := NewBucketedBytesPool(3, 100, 2, 5000)
 	testutil.Ok(t, err)
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
 
 	s := sync.WaitGroup{}
 
@@ -74,16 +78,15 @@ func TestRacePutGet(t *testing.T) {
 	stop := make(chan bool, 2)
 
 	f := func(txt string) {
+		defer s.Done()
 		for {
 			select {
 			case <-stop:
-				s.Done()
 				return
 			default:
 				c, err := chunkPool.Get(3)
 				if err != nil {
 					errs <- errors.Wrapf(err, "goroutine %s", txt)
-					s.Done()
 					return
 				}
 
@@ -92,13 +95,11 @@ func TestRacePutGet(t *testing.T) {
 				_, err = fmt.Fprintf(buf, "%s", txt)
 				if err != nil {
 					errs <- errors.Wrapf(err, "goroutine %s", txt)
-					s.Done()
 					return
 				}
 
 				if buf.String() != txt {
 					errs <- errors.New("expected to get the data just written")
-					s.Done()
 					return
 				}
 
diff --git a/pkg/query/internal/test-storeset-pre-v0.8.0/storeset_test.go b/pkg/query/internal/test-storeset-pre-v0.8.0/storeset_test.go
index 14b3635a804..706758957ae 100644
--- a/pkg/query/internal/test-storeset-pre-v0.8.0/storeset_test.go
+++ b/pkg/query/internal/test-storeset-pre-v0.8.0/storeset_test.go
@@ -9,19 +9,17 @@ import (
 	"math"
 	"net"
 	"os"
+	"sort"
 	"testing"
 	"time"
 
+	"github.com/thanos-io/thanos/pkg/component"
 	"github.com/thanos-io/thanos/pkg/store"
+	"github.com/thanos-io/thanos/pkg/store/storepb"
+	"github.com/thanos-io/thanos/pkg/testutil"
 
-	"sort"
-
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
-	"github.com/thanos-io/thanos/pkg/component"
-	"github.com/thanos-io/thanos/pkg/store/storepb"
-	"github.com/thanos-io/thanos/pkg/testutil"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
 	"google.golang.org/grpc/status"
@@ -124,9 +122,11 @@ func specsFromAddrFunc(addrs []string) func() []StoreSpec {
 	}
 }
 
-func TestPre0_8_0_StoreSet_AgainstNewStoreGW(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+func TestMain(m *testing.M) {
+	testutil.TolerantVerifyLeakMain(m)
+}
 
+func TestPre0_8_0_StoreSet_AgainstNewStoreGW(t *testing.T) {
 	st, err := startTestStores([]testStoreMeta{
 		{
 			storeType: component.Sidecar,
diff --git a/pkg/query/querier_test.go b/pkg/query/querier_test.go
index 9ac6196c4e3..18d24f6efa6 100644
--- a/pkg/query/querier_test.go
+++ b/pkg/query/querier_test.go
@@ -16,7 +16,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/pkg/gate"
@@ -39,7 +38,6 @@ type sample struct {
 }
 
 func TestQueryableCreator_MaxResolution(t *testing.T) {
-	t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
 	testProxy := &storeServer{resps: []*storepb.SeriesResponse{}}
 	queryableCreator := NewQueryableCreator(nil, nil, testProxy, 2, 5*time.Second)
 
@@ -59,7 +57,6 @@ func TestQueryableCreator_MaxResolution(t *testing.T) {
 
 // Tests E2E how PromQL works with downsampled data.
 func TestQuerier_DownsampledData(t *testing.T) {
-	t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
 	testProxy := &storeServer{
 		resps: []*storepb.SeriesResponse{
 			storeSeriesResponse(t, labels.FromStrings("__name__", "a", "zzz", "a", "aaa", "bbb"), []sample{{99, 1}, {199, 5}}),                   // Downsampled chunk from Store.
@@ -515,8 +512,6 @@ func TestQuerier_Select(t *testing.T) {
 
 				t.Run(fmt.Sprintf("dedup=%v", sc.dedup), func(t *testing.T) {
 					t.Run("querier.Select", func(t *testing.T) {
-						t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 						res := q.Select(false, tcase.hints, tcase.matchers...)
 						testSelectResponse(t, sc.expected, res)
 
@@ -528,8 +523,6 @@ func TestQuerier_Select(t *testing.T) {
 					})
 					// Integration test: Make sure the PromQL would select exactly the same.
 					t.Run("through PromQL with 100s step", func(t *testing.T) {
-						t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 						catcher := &querierResponseCatcher{t: t, Querier: q}
 						q, err := e.NewRangeQuery(&mockedQueryable{catcher}, tcase.equivalentQuery, timestamp.Time(tcase.mint), timestamp.Time(tcase.maxt), 100*time.Second)
 						testutil.Ok(t, err)
@@ -691,8 +684,6 @@ func TestQuerierWithDedupUnderstoodByPromQL_Rate(t *testing.T) {
 			MaxSamples: math.MaxInt64,
 		})
 		t.Run("Rate=5mStep=100s", func(t *testing.T) {
-			t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 			q, err := e.NewRangeQuery(&mockedQueryable{q}, `rate(gitlab_transaction_cache_read_hit_count_total[5m])`, timestamp.Time(realSeriesWithStaleMarkerMint).Add(5*time.Minute), timestamp.Time(realSeriesWithStaleMarkerMaxt), 100*time.Second)
 			testutil.Ok(t, err)
 
@@ -722,8 +713,6 @@ func TestQuerierWithDedupUnderstoodByPromQL_Rate(t *testing.T) {
 			}, vec)
 		})
 		t.Run("Rate=30mStep=500s", func(t *testing.T) {
-			t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 			q, err := e.NewRangeQuery(&mockedQueryable{q}, `rate(gitlab_transaction_cache_read_hit_count_total[30m])`, timestamp.Time(realSeriesWithStaleMarkerMint).Add(30*time.Minute), timestamp.Time(realSeriesWithStaleMarkerMaxt), 500*time.Second)
 			testutil.Ok(t, err)
 
@@ -765,8 +754,6 @@ func TestQuerierWithDedupUnderstoodByPromQL_Rate(t *testing.T) {
 			MaxSamples: math.MaxInt64,
 		})
 		t.Run("Rate=5mStep=100s", func(t *testing.T) {
-			t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 			q, err := e.NewRangeQuery(&mockedQueryable{q}, `rate(gitlab_transaction_cache_read_hit_count_total[5m])`, timestamp.Time(realSeriesWithStaleMarkerMint).Add(5*time.Minute), timestamp.Time(realSeriesWithStaleMarkerMaxt), 100*time.Second)
 			testutil.Ok(t, err)
 
@@ -791,8 +778,6 @@ func TestQuerierWithDedupUnderstoodByPromQL_Rate(t *testing.T) {
 			}, vec)
 		})
 		t.Run("Rate=30mStep=500s", func(t *testing.T) {
-			t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 			q, err := e.NewRangeQuery(&mockedQueryable{q}, `rate(gitlab_transaction_cache_read_hit_count_total[30m])`, timestamp.Time(realSeriesWithStaleMarkerMint).Add(30*time.Minute), timestamp.Time(realSeriesWithStaleMarkerMaxt), 500*time.Second)
 			testutil.Ok(t, err)
 
@@ -815,8 +800,6 @@ func TestQuerierWithDedupUnderstoodByPromQL_Rate(t *testing.T) {
 }
 
 func TestSortReplicaLabel(t *testing.T) {
-	t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 	tests := []struct {
 		input       []storepb.Series
 		exp         []storepb.Series
@@ -882,8 +865,6 @@ func expandSeries(t testing.TB, it chunkenc.Iterator) (res []sample) {
 }
 
 func TestDedupSeriesSet(t *testing.T) {
-	t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 	tests := []struct {
 		input       []series
 		exp         []series
@@ -1213,8 +1194,6 @@ func TestDedupSeriesSet(t *testing.T) {
 }
 
 func TestDedupSeriesIterator(t *testing.T) {
-	t.Cleanup(leaktest.CheckTimeout(t, 10*time.Second))
-
 	// The deltas between timestamps should be at least 10000 to not be affected
 	// by the initial penalty of 5000, that will cause the second iterator to seek
 	// ahead this far at least once.
diff --git a/pkg/query/query_test.go b/pkg/query/query_test.go
new file mode 100644
index 00000000000..d72a6d72dde
--- /dev/null
+++ b/pkg/query/query_test.go
@@ -0,0 +1,14 @@
+// Copyright (c) The Thanos Authors.
+// Licensed under the Apache License 2.0.
+
+package query
+
+import (
+	"testing"
+
+	"github.com/thanos-io/thanos/pkg/testutil"
+)
+
+func TestMain(m *testing.M) {
+	testutil.TolerantVerifyLeakMain(m)
+}
diff --git a/pkg/query/storeset_test.go b/pkg/query/storeset_test.go
index 739c5434f74..c9eaf8a7556 100644
--- a/pkg/query/storeset_test.go
+++ b/pkg/query/storeset_test.go
@@ -12,7 +12,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/pkg/errors"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/codes"
@@ -131,8 +130,6 @@ func (s *testStores) CloseOne(addr string) {
 }
 
 func TestStoreSet_Update(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	stores, err := startTestStores([]testStoreMeta{
 		{
 			storeType: component.Sidecar,
@@ -500,8 +497,6 @@ func TestStoreSet_Update(t *testing.T) {
 }
 
 func TestStoreSet_Update_NoneAvailable(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	st, err := startTestStores([]testStoreMeta{
 		{
 			extlsetFn: func(addr string) []storepb.LabelSet {
@@ -565,8 +560,6 @@ func TestStoreSet_Update_NoneAvailable(t *testing.T) {
 
 // TestQuerierStrict tests what happens when the strict mode is enabled/disabled.
 func TestQuerierStrict(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 5*time.Second)()
-
 	st, err := startTestStores([]testStoreMeta{
 		{
 			minTime: 12345,
@@ -767,6 +760,7 @@ func TestStoreSet_Update_Rules(t *testing.T) {
 			testGRPCOpts, time.Minute)
 
 		t.Run(tc.name, func(t *testing.T) {
+			defer storeSet.Close()
 			storeSet.Update(context.Background())
 			testutil.Equals(t, tc.expectedStores, len(storeSet.stores))
 
diff --git a/pkg/receive/config.go b/pkg/receive/config.go
index f03ffbfced6..1eb198cace6 100644
--- a/pkg/receive/config.go
+++ b/pkg/receive/config.go
@@ -126,7 +126,7 @@ func NewConfigWatcher(logger log.Logger, reg prometheus.Registerer, path string,
 
 // Run starts the ConfigWatcher until the given context is canceled.
 func (cw *ConfigWatcher) Run(ctx context.Context) {
-	defer cw.stop()
+	defer cw.Stop()
 
 	cw.refresh(ctx)
 
@@ -238,8 +238,8 @@ func (cw *ConfigWatcher) refresh(ctx context.Context) {
 	}
 }
 
-// stop shuts down the config watcher.
-func (cw *ConfigWatcher) stop() {
+// Stop shuts down the config watcher.
+func (cw *ConfigWatcher) Stop() {
 	level.Debug(cw.logger).Log("msg", "stopping hashring configuration watcher...", "path", cw.path)
 
 	done := make(chan struct{})
diff --git a/pkg/receive/config_test.go b/pkg/receive/config_test.go
index 1fca1c8ab85..bf5bf05c0ea 100644
--- a/pkg/receive/config_test.go
+++ b/pkg/receive/config_test.go
@@ -10,6 +10,8 @@ import (
 	"testing"
 
 	"github.com/pkg/errors"
+
+	"github.com/thanos-io/thanos/pkg/testutil"
 )
 
 func TestValidateConfig(t *testing.T) {
@@ -43,34 +45,30 @@ func TestValidateConfig(t *testing.T) {
 			err: nil, // means it's valid.
 		},
 	} {
-		var content []byte
-		var err error
-		if content, err = json.Marshal(tc.cfg); err != nil {
-			t.Error(err)
-		}
+		t.Run(tc.name, func(t *testing.T) {
+			content, err := json.Marshal(tc.cfg)
+			testutil.Ok(t, err)
+
+			tmpfile, err := ioutil.TempFile("", "configwatcher_test.*.json")
+			testutil.Ok(t, err)
 
-		tmpfile, err := ioutil.TempFile("", "configwatcher_test.*.json")
-		if err != nil {
-			t.Fatalf("case %q: unexpectedly failed creating the temp file: %v", tc.name, err)
-		}
-		defer os.Remove(tmpfile.Name())
+			defer func() {
+				testutil.Ok(t, os.Remove(tmpfile.Name()))
+			}()
 
-		if _, err := tmpfile.Write(content); err != nil {
-			t.Fatalf("case %q: unexpectedly failed writing to the temp file: %v", tc.name, err)
-		}
+			_, err = tmpfile.Write(content)
+			testutil.Ok(t, err)
 
-		if err := tmpfile.Close(); err != nil {
-			t.Fatalf("case %q: unexpectedly failed closing the temp file: %v", tc.name, err)
-		}
+			err = tmpfile.Close()
+			testutil.Ok(t, err)
 
-		cw, err := NewConfigWatcher(nil, nil, tmpfile.Name(), 1)
-		if err != nil {
-			t.Fatalf("case %q: unexpectedly failed creating config watcher: %v", tc.name, err)
-		}
+			cw, err := NewConfigWatcher(nil, nil, tmpfile.Name(), 1)
+			testutil.Ok(t, err)
+			defer cw.Stop()
 
-		if err := cw.ValidateConfig(); err != nil && !errors.Is(err, tc.err) {
-			t.Errorf("case %q: got unexpected error: %v", tc.name, err)
-			continue
-		}
+			if err := cw.ValidateConfig(); err != nil && !errors.Is(err, tc.err) {
+				t.Errorf("case %q: got unexpected error: %v", tc.name, err)
+			}
+		})
 	}
 }
diff --git a/pkg/receive/handler_test.go b/pkg/receive/handler_test.go
index 7cefb844ff1..efd12ce7ce2 100644
--- a/pkg/receive/handler_test.go
+++ b/pkg/receive/handler_test.go
@@ -15,7 +15,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/gogo/protobuf/proto"
 	"github.com/golang/snappy"
@@ -23,9 +22,10 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/storage"
 	terrors "github.com/prometheus/prometheus/tsdb/errors"
+	"google.golang.org/grpc"
+
 	"github.com/thanos-io/thanos/pkg/store/storepb"
 	"github.com/thanos-io/thanos/pkg/store/storepb/prompb"
-	"google.golang.org/grpc"
 )
 
 func TestCountCause(t *testing.T) {
@@ -183,7 +183,6 @@ func newHandlerHashring(appendables []*fakeAppendable, replicationFactor uint64)
 }
 
 func TestReceiveQuorum(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
 	appenderErrFn := func() error { return errors.New("failed to get appender") }
 	conflictErrFn := func() error { return storage.ErrOutOfBounds }
 	commitErrFn := func() error { return errors.New("failed to commit") }
@@ -520,7 +519,6 @@ func TestReceiveQuorum(t *testing.T) {
 }
 
 func TestReceiveWithConsistencyDelay(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
 	appenderErrFn := func() error { return errors.New("failed to get appender") }
 	conflictErrFn := func() error { return storage.ErrOutOfBounds }
 	commitErrFn := func() error { return errors.New("failed to commit") }
diff --git a/pkg/receive/multitsdb_test.go b/pkg/receive/multitsdb_test.go
index ff21ac788c7..eb3274e6af4 100644
--- a/pkg/receive/multitsdb_test.go
+++ b/pkg/receive/multitsdb_test.go
@@ -10,21 +10,20 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/gogo/protobuf/types"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/tsdb"
+	"golang.org/x/sync/errgroup"
+
 	"github.com/thanos-io/thanos/pkg/runutil"
 	"github.com/thanos-io/thanos/pkg/store/storepb"
 	"github.com/thanos-io/thanos/pkg/testutil"
-	"golang.org/x/sync/errgroup"
 )
 
 func TestMultiTSDB(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
 	dir, err := ioutil.TempDir("", "test")
 	testutil.Ok(t, err)
 	defer func() { testutil.Ok(t, os.RemoveAll(dir)) }()
diff --git a/pkg/receive/receive_test.go b/pkg/receive/receive_test.go
new file mode 100644
index 00000000000..8bc7dfa0034
--- /dev/null
+++ b/pkg/receive/receive_test.go
@@ -0,0 +1,14 @@
+// Copyright (c) The Thanos Authors.
+// Licensed under the Apache License 2.0.
+
+package receive
+
+import (
+	"testing"
+
+	"github.com/thanos-io/thanos/pkg/testutil"
+)
+
+func TestMain(m *testing.M) {
+	testutil.TolerantVerifyLeakMain(m)
+}
diff --git a/pkg/reloader/reloader_test.go b/pkg/reloader/reloader_test.go
index 5d4cf01f551..952ed47e01e 100644
--- a/pkg/reloader/reloader_test.go
+++ b/pkg/reloader/reloader_test.go
@@ -18,14 +18,17 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
-	"github.com/thanos-io/thanos/pkg/testutil"
 	"go.uber.org/atomic"
+	"go.uber.org/goleak"
+
+	"github.com/thanos-io/thanos/pkg/testutil"
 )
 
-func TestReloader_ConfigApply(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+func TestMain(m *testing.M) {
+	goleak.VerifyTestMain(m)
+}
 
+func TestReloader_ConfigApply(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
 	defer cancel()
 
@@ -159,8 +162,6 @@ config:
 }
 
 func TestReloader_RuleApply(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	l, err := net.Listen("tcp", "localhost:0")
 	testutil.Ok(t, err)
 
diff --git a/pkg/rules/manager_test.go b/pkg/rules/manager_test.go
index 5cebfd2d902..231c33a9b38 100644
--- a/pkg/rules/manager_test.go
+++ b/pkg/rules/manager_test.go
@@ -14,15 +14,15 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/prometheus/prometheus/rules"
 	"github.com/prometheus/prometheus/storage"
+	"gopkg.in/yaml.v3"
+
 	"github.com/thanos-io/thanos/pkg/store/storepb"
 	"github.com/thanos-io/thanos/pkg/testutil"
-	"gopkg.in/yaml.v3"
 )
 
 type nopAppendable struct{}
@@ -249,6 +249,12 @@ groups:
 			testutil.Equals(t, exp[i].file, p.File)
 		})
 	}
+	defer func() {
+		// Update creates go routines. We don't need rules mngrs to run, just to parse things, but let it start and stop
+		// at the end to correctly test leaked go routines.
+		thanosRuleMgr.Run()
+		thanosRuleMgr.Stop()
+	}()
 }
 
 func TestConfigRuleAdapterUnmarshalMarshalYAML(t *testing.T) {
@@ -280,8 +286,6 @@ func TestConfigRuleAdapterUnmarshalMarshalYAML(t *testing.T) {
 }
 
 func TestManager_Rules(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	dir, err := ioutil.TempDir("", "test_rule_run")
 	testutil.Ok(t, err)
 	defer func() { testutil.Ok(t, os.RemoveAll(dir)) }()
diff --git a/pkg/rules/prometheus_test.go b/pkg/rules/prometheus_test.go
index 49b7134d09c..726313fe366 100644
--- a/pkg/rules/prometheus_test.go
+++ b/pkg/rules/prometheus_test.go
@@ -9,18 +9,15 @@ import (
 	"os"
 	"path/filepath"
 	"testing"
-	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/prometheus/prometheus/pkg/labels"
+
 	"github.com/thanos-io/thanos/pkg/promclient"
 	"github.com/thanos-io/thanos/pkg/testutil"
 	"github.com/thanos-io/thanos/pkg/testutil/e2eutil"
 )
 
 func TestPrometheus_Rules_e2e(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	p, err := e2eutil.NewPrometheus()
 	testutil.Ok(t, err)
 	defer func() { testutil.Ok(t, p.Stop()) }()
diff --git a/pkg/rules/rules_test.go b/pkg/rules/rules_test.go
index e3c7f580a1c..e5178eddec8 100644
--- a/pkg/rules/rules_test.go
+++ b/pkg/rules/rules_test.go
@@ -18,6 +18,10 @@ import (
 	"github.com/thanos-io/thanos/pkg/testutil"
 )
 
+func TestMain(m *testing.M) {
+	testutil.TolerantVerifyLeakMain(m)
+}
+
 // testRulesAgainstExamples tests against alerts.yaml and rules.yaml examples.
 func testRulesAgainstExamples(t *testing.T, dir string, server rulespb.RulesServer) {
 	t.Helper()
diff --git a/pkg/store/bucket_test.go b/pkg/store/bucket_test.go
index 7ea52149125..521f33adbc0 100644
--- a/pkg/store/bucket_test.go
+++ b/pkg/store/bucket_test.go
@@ -20,9 +20,7 @@ import (
 	"strconv"
 	"sync"
 	"testing"
-	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/gogo/protobuf/proto"
 	"github.com/gogo/protobuf/types"
@@ -36,7 +34,6 @@ import (
 	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/tsdb"
 	"github.com/prometheus/prometheus/tsdb/encoding"
-
 	"go.uber.org/atomic"
 
 	"github.com/thanos-io/thanos/pkg/block"
@@ -187,7 +184,7 @@ func TestBucketBlock_Property(t *testing.T) {
 }
 
 func TestBucketBlock_matchLabels(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	dir, err := ioutil.TempDir("", "bucketblock-test")
 	testutil.Ok(t, err)
@@ -285,7 +282,7 @@ func TestBucketBlock_matchLabels(t *testing.T) {
 }
 
 func TestBucketBlockSet_addGet(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	set := newBucketBlockSet(labels.Labels{})
 
@@ -396,7 +393,7 @@ func TestBucketBlockSet_addGet(t *testing.T) {
 }
 
 func TestBucketBlockSet_remove(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	set := newBucketBlockSet(labels.Labels{})
 
@@ -426,7 +423,7 @@ func TestBucketBlockSet_remove(t *testing.T) {
 }
 
 func TestBucketBlockSet_labelMatchers(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	set := newBucketBlockSet(labels.FromStrings("a", "b", "c", "d"))
 
@@ -495,7 +492,7 @@ func TestBucketBlockSet_labelMatchers(t *testing.T) {
 }
 
 func TestGapBasedPartitioner_Partition(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	const maxGapSize = 1024 * 512
 
@@ -555,7 +552,7 @@ func TestGapBasedPartitioner_Partition(t *testing.T) {
 }
 
 func TestBucketStore_Info(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -1735,6 +1732,8 @@ func TestBigEndianPostingsCount(t *testing.T) {
 }
 
 func TestBlockWithLargeChunks(t *testing.T) {
+	defer testutil.TolerantVerifyLeak(t)
+
 	tmpDir, err := ioutil.TempDir(os.TempDir(), "large-chunk-test")
 	testutil.Ok(t, err)
 	t.Cleanup(func() {
@@ -1830,10 +1829,16 @@ func createBlockWithLargeChunk(t testutil.TB, dir string, lbls labels.Labels, ra
 	}
 
 	db, err := tsdb.Open(dir, nil, nil, tsdb.DefaultOptions())
+	defer func() {
+		testutil.Ok(t, db.Close())
+	}()
 	testutil.Ok(t, err)
 	bs := db.Blocks()
 	testutil.Equals(t, 1, len(bs))
 	cr, err := bs[0].Chunks()
+	defer func() {
+		testutil.Ok(t, cr.Close())
+	}()
 	testutil.Ok(t, err)
 	// Ref is (<segment file index> << 32 + offset in the file). In TSDB v1 first chunk is always at offset 8.
 	c, err := cr.Chunk(8)
diff --git a/pkg/store/cache/cache_test.go b/pkg/store/cache/cache_test.go
index 670fd4d1333..9b59f75b95b 100644
--- a/pkg/store/cache/cache_test.go
+++ b/pkg/store/cache/cache_test.go
@@ -13,9 +13,14 @@ import (
 	"github.com/oklog/ulid"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/thanos-io/thanos/pkg/testutil"
+	"go.uber.org/goleak"
 	"golang.org/x/crypto/blake2b"
 )
 
+func TestMain(m *testing.M) {
+	goleak.VerifyTestMain(m)
+}
+
 func TestCacheKey_string(t *testing.T) {
 	t.Parallel()
 
diff --git a/pkg/store/cache/inmemory_test.go b/pkg/store/cache/inmemory_test.go
index c10cb2dcf6b..119a5a32c4a 100644
--- a/pkg/store/cache/inmemory_test.go
+++ b/pkg/store/cache/inmemory_test.go
@@ -10,9 +10,7 @@ import (
 	"fmt"
 	"math"
 	"testing"
-	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/hashicorp/golang-lru/simplelru"
 	"github.com/oklog/ulid"
@@ -23,8 +21,6 @@ import (
 )
 
 func TestNewInMemoryIndexCache(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	// Should return error on invalid YAML config.
 	conf := []byte("invalid")
 	cache, err := NewInMemoryIndexCache(log.NewNopLogger(), nil, conf)
@@ -51,8 +47,6 @@ max_item_size: 2KB
 }
 
 func TestInMemoryIndexCache_AvoidsDeadlock(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	metrics := prometheus.NewRegistry()
 	cache, err := NewInMemoryIndexCacheWithConfig(log.NewNopLogger(), metrics, InMemoryIndexCacheConfig{
 		MaxItemSize: sliceHeaderSize + 5,
@@ -85,8 +79,6 @@ func TestInMemoryIndexCache_AvoidsDeadlock(t *testing.T) {
 }
 
 func TestInMemoryIndexCache_UpdateItem(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	const maxSize = 2 * (sliceHeaderSize + 1)
 
 	var errorLogs []string
@@ -190,8 +182,6 @@ func TestInMemoryIndexCache_UpdateItem(t *testing.T) {
 
 // This should not happen as we hardcode math.MaxInt, but we still add test to check this out.
 func TestInMemoryIndexCache_MaxNumberOfItemsHit(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	metrics := prometheus.NewRegistry()
 	cache, err := NewInMemoryIndexCacheWithConfig(log.NewNopLogger(), metrics, InMemoryIndexCacheConfig{
 		MaxItemSize: 2*sliceHeaderSize + 10,
@@ -224,8 +214,6 @@ func TestInMemoryIndexCache_MaxNumberOfItemsHit(t *testing.T) {
 }
 
 func TestInMemoryIndexCache_Eviction_WithMetrics(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	metrics := prometheus.NewRegistry()
 	cache, err := NewInMemoryIndexCacheWithConfig(log.NewNopLogger(), metrics, InMemoryIndexCacheConfig{
 		MaxItemSize: 2*sliceHeaderSize + 5,
diff --git a/pkg/store/cache/memcached_test.go b/pkg/store/cache/memcached_test.go
index 7710a2d543a..5daacc4be0f 100644
--- a/pkg/store/cache/memcached_test.go
+++ b/pkg/store/cache/memcached_test.go
@@ -8,7 +8,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/oklog/ulid"
 	"github.com/pkg/errors"
@@ -110,7 +109,6 @@ func TestMemcachedIndexCache_FetchMultiPostings(t *testing.T) {
 
 func TestMemcachedIndexCache_FetchMultiSeries(t *testing.T) {
 	t.Parallel()
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
 
 	// Init some data to conveniently define test cases later one.
 	block1 := ulid.MustNew(1, nil)
diff --git a/pkg/store/multitsdb_test.go b/pkg/store/multitsdb_test.go
index b4fa2767aa0..0c04490dc5d 100644
--- a/pkg/store/multitsdb_test.go
+++ b/pkg/store/multitsdb_test.go
@@ -12,12 +12,11 @@ import (
 	"os"
 	"path/filepath"
 	"testing"
-	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/tsdb"
+
 	"github.com/thanos-io/thanos/pkg/component"
 	"github.com/thanos-io/thanos/pkg/store/storepb"
 	storetestutil "github.com/thanos-io/thanos/pkg/store/storepb/testutil"
@@ -25,7 +24,7 @@ import (
 )
 
 func TestMultiTSDBSeries(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	tb := testutil.NewTB(t)
 	storetestutil.RunSeriesInterestingCases(tb, 200e3, 200e3, func(t testutil.TB, samplesPerSeries, series int) {
@@ -178,9 +177,9 @@ func (m *mockedStoreServer) Series(_ *storepb.SeriesRequest, server storepb.Stor
 
 // Regression test against https://github.com/thanos-io/thanos/issues/2823.
 func TestTenantSeriesSetServert_NotLeakingIfNotExhausted(t *testing.T) {
-	t.Run("exhausted StoreSet", func(t *testing.T) {
-		defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
+	t.Run("exhausted StoreSet", func(t *testing.T) {
 		s := newTenantSeriesSetServer(context.Background(), "a", nil)
 
 		resps := []*storepb.SeriesResponse{
@@ -210,7 +209,7 @@ func TestTenantSeriesSetServert_NotLeakingIfNotExhausted(t *testing.T) {
 	})
 
 	t.Run("canceled, not exhausted StoreSet", func(t *testing.T) {
-		defer leaktest.CheckTimeout(t, 10*time.Second)()
+		defer testutil.TolerantVerifyLeak(t)
 
 		ctx, cancel := context.WithCancel(context.Background())
 		s := newTenantSeriesSetServer(ctx, "a", nil)
@@ -245,7 +244,7 @@ func (s *mockedSeriesServer) Context() context.Context { return s.ctx }
 // Regression test against https://github.com/thanos-io/thanos/issues/2823.
 // This is different leak than in TestTenantSeriesSetServert_NotLeakingIfNotExhausted.
 func TestMultiTSDBStore_NotLeakingOnPrematureFinish(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	m := NewMultiTSDBStore(log.NewNopLogger(), nil, component.Receive, func() map[string]storepb.StoreServer {
 		return map[string]storepb.StoreServer{
diff --git a/pkg/store/prometheus_test.go b/pkg/store/prometheus_test.go
index d8ee68ce024..dd853e9b908 100644
--- a/pkg/store/prometheus_test.go
+++ b/pkg/store/prometheus_test.go
@@ -11,12 +11,12 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/pkg/timestamp"
 	"github.com/prometheus/prometheus/storage"
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
+
 	"github.com/thanos-io/thanos/pkg/component"
 	"github.com/thanos-io/thanos/pkg/promclient"
 	"github.com/thanos-io/thanos/pkg/store/storepb"
@@ -37,7 +37,7 @@ func TestPrometheusStore_Series_promOnPath_e2e(t *testing.T) {
 func testPrometheusStoreSeriesE2e(t *testing.T, prefix string) {
 	t.Helper()
 
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	p, err := e2eutil.NewPrometheusOnPath(prefix)
 	testutil.Ok(t, err)
@@ -171,7 +171,7 @@ func getExternalLabels() labels.Labels {
 func TestPrometheusStore_SeriesLabels_e2e(t *testing.T) {
 	t.Helper()
 
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	p, err := e2eutil.NewPrometheus()
 	testutil.Ok(t, err)
@@ -351,8 +351,9 @@ func TestPrometheusStore_SeriesLabels_e2e(t *testing.T) {
 		})
 	}
 }
+
 func TestPrometheusStore_LabelNames_e2e(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	p, err := e2eutil.NewPrometheus()
 	testutil.Ok(t, err)
@@ -397,7 +398,7 @@ func TestPrometheusStore_LabelNames_e2e(t *testing.T) {
 }
 
 func TestPrometheusStore_LabelValues_e2e(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	p, err := e2eutil.NewPrometheus()
 	testutil.Ok(t, err)
@@ -445,7 +446,7 @@ func TestPrometheusStore_LabelValues_e2e(t *testing.T) {
 
 // Test to check external label values retrieve.
 func TestPrometheusStore_ExternalLabelValues_e2e(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	p, err := e2eutil.NewPrometheus()
 	testutil.Ok(t, err)
@@ -485,7 +486,7 @@ func TestPrometheusStore_ExternalLabelValues_e2e(t *testing.T) {
 }
 
 func TestPrometheusStore_Series_MatchExternalLabel_e2e(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	p, err := e2eutil.NewPrometheus()
 	testutil.Ok(t, err)
@@ -550,7 +551,7 @@ func TestPrometheusStore_Series_MatchExternalLabel_e2e(t *testing.T) {
 }
 
 func TestPrometheusStore_Info(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -621,7 +622,7 @@ func testSeries_SplitSamplesIntoChunksWithMaxSizeOf120(t *testing.T, appender st
 
 // Regression test for https://github.com/thanos-io/thanos/issues/396.
 func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOf120(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	p, err := e2eutil.NewPrometheus()
 	testutil.Ok(t, err)
@@ -644,11 +645,3 @@ func TestPrometheusStore_Series_SplitSamplesIntoChunksWithMaxSizeOf120(t *testin
 		return proxy
 	})
 }
-
-func TestRuleGroupToProto(t *testing.T) {
-
-}
-
-func TestRuleGroupFromProto(t *testing.T) {
-
-}
diff --git a/pkg/store/proxy_test.go b/pkg/store/proxy_test.go
index 3da59567734..ff53f79bfda 100644
--- a/pkg/store/proxy_test.go
+++ b/pkg/store/proxy_test.go
@@ -15,7 +15,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/go-kit/kit/log"
 	"github.com/gogo/protobuf/proto"
 	"github.com/gogo/protobuf/types"
@@ -23,13 +22,14 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/pkg/timestamp"
 	"github.com/prometheus/prometheus/tsdb/chunkenc"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+
 	"github.com/thanos-io/thanos/pkg/component"
 	"github.com/thanos-io/thanos/pkg/store/storepb"
 	storetestutil "github.com/thanos-io/thanos/pkg/store/storepb/testutil"
 	"github.com/thanos-io/thanos/pkg/testutil"
-	"google.golang.org/grpc"
-	"google.golang.org/grpc/codes"
-	"google.golang.org/grpc/status"
 )
 
 type testClient struct {
@@ -58,7 +58,7 @@ func (c testClient) Addr() string {
 }
 
 func TestProxyStore_Info(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -79,7 +79,7 @@ func TestProxyStore_Info(t *testing.T) {
 }
 
 func TestProxyStore_Series(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	for _, tc := range []struct {
 		title          string
@@ -452,7 +452,7 @@ func TestProxyStore_SeriesSlowStores(t *testing.T) {
 		t.Skip("enable THANOS_ENABLE_STORE_READ_TIMEOUT_TESTS to run store-read-timeout tests")
 	}
 
-	defer leaktest.CheckTimeout(t, 20*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	for _, tc := range []struct {
 		title          string
@@ -973,7 +973,7 @@ func TestProxyStore_SeriesSlowStores(t *testing.T) {
 }
 
 func TestProxyStore_Series_RequestParamsProxied(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	m := &mockedStoreAPI{
 		RespSeries: []*storepb.SeriesResponse{
@@ -1016,7 +1016,7 @@ func TestProxyStore_Series_RequestParamsProxied(t *testing.T) {
 }
 
 func TestProxyStore_Series_RegressionFillResponseChannel(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	var cls []Client
 	for i := 0; i < 10; i++ {
@@ -1071,7 +1071,7 @@ func TestProxyStore_Series_RegressionFillResponseChannel(t *testing.T) {
 }
 
 func TestProxyStore_LabelValues(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	m1 := &mockedStoreAPI{
 		RespLabelValues: &storepb.LabelValuesResponse{
@@ -1111,7 +1111,7 @@ func TestProxyStore_LabelValues(t *testing.T) {
 }
 
 func TestProxyStore_LabelNames(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	for _, tc := range []struct {
 		title     string
@@ -1225,6 +1225,8 @@ func TestProxyStore_LabelNames(t *testing.T) {
 }
 
 func TestProxyStore_storeMatch(t *testing.T) {
+	defer testutil.TolerantVerifyLeak(t)
+
 	storeAPIs := []Client{
 		&testClient{
 			StoreClient: &mockedStoreAPI{
@@ -1310,8 +1312,6 @@ func seriesEquals(t *testing.T, expected []rawSeries, got []storepb.Series) {
 }
 
 func TestStoreMatches(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	cases := []struct {
 		s          Client
 		mint, maxt int64
@@ -1718,7 +1718,7 @@ func benchProxySeries(t testutil.TB, totalSamples, totalSeries int) {
 }
 
 func TestProxyStore_NotLeakingOnPrematureFinish(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	clients := []Client{
 		&testClient{
diff --git a/pkg/store/tsdb_test.go b/pkg/store/tsdb_test.go
index 468d9cec032..cf4413492d5 100644
--- a/pkg/store/tsdb_test.go
+++ b/pkg/store/tsdb_test.go
@@ -9,9 +9,9 @@ import (
 	"testing"
 	"time"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/pkg/timestamp"
+
 	"github.com/thanos-io/thanos/pkg/component"
 	"github.com/thanos-io/thanos/pkg/store/storepb"
 	"github.com/thanos-io/thanos/pkg/testutil"
@@ -19,7 +19,7 @@ import (
 )
 
 func TestTSDBStore_Info(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -53,7 +53,7 @@ func TestTSDBStore_Info(t *testing.T) {
 }
 
 func TestTSDBStore_Series(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
@@ -178,9 +178,9 @@ func TestTSDBStore_Series(t *testing.T) {
 }
 
 func TestTSDBStore_LabelNames(t *testing.T) {
-	var err error
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
+	var err error
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
@@ -284,9 +284,9 @@ func TestTSDBStore_LabelNames(t *testing.T) {
 }
 
 func TestTSDBStore_LabelValues(t *testing.T) {
-	var err error
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
+	var err error
 	ctx, cancel := context.WithCancel(context.Background())
 	defer cancel()
 
@@ -383,7 +383,7 @@ func TestTSDBStore_LabelValues(t *testing.T) {
 
 // Regression test for https://github.com/thanos-io/thanos/issues/1038.
 func TestTSDBStore_Series_SplitSamplesIntoChunksWithMaxSizeOf120(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
+	defer testutil.TolerantVerifyLeak(t)
 
 	db, err := e2eutil.NewTSDB()
 	defer func() { testutil.Ok(t, db.Close()) }()
diff --git a/pkg/testutil/testutil.go b/pkg/testutil/testutil.go
index 8b1e8e1d4f1..d98e02f60f1 100644
--- a/pkg/testutil/testutil.go
+++ b/pkg/testutil/testutil.go
@@ -14,6 +14,7 @@ import (
 	"github.com/pmezard/go-difflib/difflib"
 	"github.com/prometheus/client_golang/prometheus"
 	dto "github.com/prometheus/client_model/go"
+	"go.uber.org/goleak"
 )
 
 // Assert fails the test if the condition is false.
@@ -152,3 +153,27 @@ func GatherAndCompare(t *testing.T, g1 prometheus.Gatherer, g2 prometheus.Gather
 	}
 	Equals(t, m1.String(), m2.String())
 }
+
+// TolerantVerifyLeakMain verifies go leaks but excludes the go routines that are
+// launched as side effects of some of our dependencies.
+func TolerantVerifyLeakMain(m *testing.M) {
+	goleak.VerifyTestMain(m,
+		// https://github.com/census-instrumentation/opencensus-go/blob/d7677d6af5953e0506ac4c08f349c62b917a443a/stats/view/worker.go#L34
+		goleak.IgnoreTopFunction("go.opencensus.io/stats/view.(*worker).start"),
+		// https://github.com/kubernetes/klog/blob/c85d02d1c76a9ebafa81eb6d35c980734f2c4727/klog.go#L417
+		goleak.IgnoreTopFunction("k8s.io/klog/v2.(*loggingT).flushDaemon"),
+		goleak.IgnoreTopFunction("k8s.io/klog.(*loggingT).flushDaemon"),
+	)
+}
+
+// TolerantVerifyLeak verifies go leaks but excludes the go routines that are
+// launched as side effects of some of our dependencies.
+func TolerantVerifyLeak(t *testing.T) {
+	goleak.VerifyNone(t,
+		// https://github.com/census-instrumentation/opencensus-go/blob/d7677d6af5953e0506ac4c08f349c62b917a443a/stats/view/worker.go#L34
+		goleak.IgnoreTopFunction("go.opencensus.io/stats/view.(*worker).start"),
+		// https://github.com/kubernetes/klog/blob/c85d02d1c76a9ebafa81eb6d35c980734f2c4727/klog.go#L417
+		goleak.IgnoreTopFunction("k8s.io/klog/v2.(*loggingT).flushDaemon"),
+		goleak.IgnoreTopFunction("k8s.io/klog.(*loggingT).flushDaemon"),
+	)
+}
diff --git a/pkg/tracing/stackdriver/tracer_test.go b/pkg/tracing/stackdriver/tracer_test.go
index b0d4790475e..f869e090fdd 100644
--- a/pkg/tracing/stackdriver/tracer_test.go
+++ b/pkg/tracing/stackdriver/tracer_test.go
@@ -9,20 +9,20 @@ package stackdriver
 import (
 	"context"
 	"testing"
-	"time"
 
 	"github.com/thanos-io/thanos/pkg/testutil"
 	"github.com/thanos-io/thanos/pkg/tracing"
 
-	"github.com/fortytw2/leaktest"
 	"github.com/opentracing/basictracer-go"
 )
 
+func TestMain(m *testing.M) {
+	testutil.TolerantVerifyLeakMain(m)
+}
+
 // This test shows that if sample factor will enable tracing on client process, even when it would be disabled on server
 // it will be still enabled for all spans within this span.
 func TestContextTracing_ClientEnablesTracing(t *testing.T) {
-	defer leaktest.CheckTimeout(t, 10*time.Second)()
-
 	m := &basictracer.InMemorySpanRecorder{}
 	r := &forceRecorder{wrapped: m}