Skip to content
This repository has been archived by the owner on Jun 24, 2021. It is now read-only.

Commit

Permalink
Update TiKV alert rules (#898)
Browse files Browse the repository at this point in the history
  • Loading branch information
liubo0127 authored Aug 15, 2019
1 parent e3bea41 commit 26d4830
Showing 1 changed file with 4 additions and 16 deletions.
20 changes: 4 additions & 16 deletions roles/prometheus/files/tikv.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,12 @@ groups:
summary: TiKV coprocessor request wait seconds more than 10s

- alert: TiKV_raftstore_thread_cpu_seconds_total
expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"raftstore_.*"}[1m])) by (instance, name) > 0.8
expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"raftstore_.*"}[1m])) by (instance, name) > 1.6
for: 1m
labels:
env: ENV_LABELS_ENV
level: critical
expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"raftstore_.*"}[1m])) by (instance, name) > 0.8
expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"raftstore_.*"}[1m])) by (instance, name) > 1.6
annotations:
description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}'
value: '{{ $value }}'
Expand Down Expand Up @@ -158,12 +158,12 @@ groups:
summary: TiKV scheduler latch wait duration seconds more than 1s

- alert: TiKV_thread_apply_worker_cpu_seconds
expr: sum(rate(tikv_thread_cpu_seconds_total{name="apply_worker"}[1m])) by (instance) > 0.9
expr: sum(rate(tikv_thread_cpu_seconds_total{name="apply_worker"}[1m])) by (instance) > 1.8
for: 1m
labels:
env: ENV_LABELS_ENV
level: critical
expr: sum(rate(tikv_thread_cpu_seconds_total{name="apply_worker"}[1m])) by (instance) > 0.9
expr: sum(rate(tikv_thread_cpu_seconds_total{name="apply_worker"}[1m])) by (instance) > 1.8
annotations:
description: 'cluster: ENV_LABELS_ENV, type: {{ $labels.type }}, instance: {{ $labels.instance }}, values: {{ $value }}'
value: '{{ $value }}'
Expand Down Expand Up @@ -241,18 +241,6 @@ groups:
value: '{{ $value }}'
summary: TiKV scheduler command duration seconds more than 1s

- alert: TiKV_thread_storage_scheduler_cpu_seconds
expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"storage_schedul.*"}[1m])) by (instance) > 0.8
for: 1m
labels:
env: ENV_LABELS_ENV
level: warning
expr: sum(rate(tikv_thread_cpu_seconds_total{name=~"storage_schedul.*"}[1m])) by (instance) > 0.8
annotations:
description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, values:{{ $value }}'
value: '{{ $value }}'
summary: TiKV storage scheduler cpu seconds more than 80%

- alert: TiKV_coprocessor_outdated_request_wait_seconds
expr: delta( tikv_coprocessor_outdated_request_wait_seconds_count[10m] ) > 0
for: 1m
Expand Down

0 comments on commit 26d4830

Please sign in to comment.