Skip to content

Commit

Permalink
remove "os" "hosts" "plugin" and "module" from stock alarms (netdata#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ilyam8 committed Mar 5, 2024
1 parent 291004e commit 746ebfd
Show file tree
Hide file tree
Showing 35 changed files with 1,124 additions and 1,306 deletions.
4 changes: 0 additions & 4 deletions src/health/health.d/apcupsd.conf
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
class: Utilization
type: Power Supply
component: UPS
os: *
hosts: *
lookup: average -10m unaligned of percentage
units: %
every: 1m
Expand All @@ -23,8 +21,6 @@ component: UPS
class: Errors
type: Power Supply
component: UPS
os: *
hosts: *
lookup: average -60s unaligned of charge
units: %
every: 60s
Expand Down
10 changes: 1 addition & 9 deletions src/health/health.d/boinc.conf
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
# Alarms for various BOINC issues.
# you can disable an alarm notification by setting the 'to' line to: silent

# Warn on any compute errors encountered.
template: boinc_compute_errors
on: boinc.states
class: Errors
type: Computing
component: BOINC
os: *
hosts: *
lookup: average -10m unaligned of comperror
units: tasks
every: 1m
Expand All @@ -23,8 +21,6 @@ component: BOINC
class: Errors
type: Computing
component: BOINC
os: *
hosts: *
lookup: average -10m unaligned of upload_failed
units: tasks
every: 1m
Expand All @@ -40,8 +36,6 @@ component: BOINC
class: Utilization
type: Computing
component: BOINC
os: *
hosts: *
lookup: average -10m unaligned of total
units: tasks
every: 1m
Expand All @@ -57,8 +51,6 @@ component: BOINC
class: Utilization
type: Computing
component: BOINC
os: *
hosts: *
lookup: average -10m unaligned of active
calc: ($boinc_total_tasks >= 1) ? ($this) : (inf)
units: tasks
Expand Down
19 changes: 1 addition & 18 deletions src/health/health.d/btrfs.conf
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# you can disable an alarm notification by setting the 'to' line to: silent

template: btrfs_allocated
on: btrfs.disk
class: Utilization
type: System
component: File system
os: *
hosts: *
calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free))
units: %
every: 10s
Expand All @@ -20,8 +19,6 @@ component: File system
class: Utilization
type: System
component: File system
os: *
hosts: *
calc: $used * 100 / ($used + $free)
units: %
every: 10s
Expand All @@ -37,8 +34,6 @@ component: File system
class: Utilization
type: System
component: File system
os: *
hosts: *
calc: ($used + $reserved) * 100 / ($used + $free + $reserved)
units: %
every: 10s
Expand All @@ -54,8 +49,6 @@ component: File system
class: Utilization
type: System
component: File system
os: *
hosts: *
calc: $used * 100 / ($used + $free)
units: %
every: 10s
Expand All @@ -71,8 +64,6 @@ component: File system
class: Errors
type: System
component: File system
os: *
hosts: *
units: errors
lookup: max -10m every 1m of read_errs
warn: $this > 0
Expand All @@ -86,8 +77,6 @@ component: File system
class: Errors
type: System
component: File system
os: *
hosts: *
units: errors
lookup: max -10m every 1m of write_errs
crit: $this > 0
Expand All @@ -101,8 +90,6 @@ component: File system
class: Errors
type: System
component: File system
os: *
hosts: *
units: errors
lookup: max -10m every 1m of flush_errs
crit: $this > 0
Expand All @@ -116,8 +103,6 @@ component: File system
class: Errors
type: System
component: File system
os: *
hosts: *
units: errors
lookup: max -10m every 1m of corruption_errs
warn: $this > 0
Expand All @@ -131,8 +116,6 @@ component: File system
class: Errors
type: System
component: File system
os: *
hosts: *
units: errors
lookup: max -10m every 1m of generation_errs
warn: $this > 0
Expand Down
125 changes: 60 additions & 65 deletions src/health/health.d/cgroups.conf
Original file line number Diff line number Diff line change
@@ -1,72 +1,67 @@

# you can disable an alarm notification by setting the 'to' line to: silent

template: cgroup_10min_cpu_usage
on: cgroup.cpu_limit
class: Utilization
type: Cgroups
component: CPU
os: linux
hosts: *
lookup: average -10m unaligned
units: %
every: 1m
warn: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
summary: Cgroup ${label:cgroup_name} CPU utilization
info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes
to: silent
template: cgroup_10min_cpu_usage
on: cgroup.cpu_limit
class: Utilization
type: Cgroups
component: CPU
host labels: _os=linux
lookup: average -10m unaligned
units: %
every: 1m
warn: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
summary: Cgroup ${label:cgroup_name} CPU utilization
info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes
to: silent

template: cgroup_ram_in_use
on: cgroup.mem_usage
class: Utilization
type: Cgroups
component: Memory
os: linux
hosts: *
calc: ($ram) * 100 / $memory_limit
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
summary: Cgroup ${label:cgroup_name} memory utilization
info: Cgroup ${label:cgroup_name} memory utilization
to: silent
template: cgroup_ram_in_use
on: cgroup.mem_usage
class: Utilization
type: Cgroups
component: Memory
host labels: _os=linux
calc: ($ram) * 100 / $memory_limit
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
summary: Cgroup ${label:cgroup_name} memory utilization
info: Cgroup ${label:cgroup_name} memory utilization
to: silent

# ---------------------------------K8s containers--------------------------------------------

template: k8s_cgroup_10min_cpu_usage
on: k8s.cgroup.cpu_limit
class: Utilization
type: Cgroups
component: CPU
os: linux
hosts: *
lookup: average -10m unaligned
units: %
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
delay: down 15m multiplier 1.5 max 1h
summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization
info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
average CPU utilization over the last 10 minutes
to: silent
template: k8s_cgroup_10min_cpu_usage
on: k8s.cgroup.cpu_limit
class: Utilization
type: Cgroups
component: CPU
host labels: _os=linux
lookup: average -10m unaligned
units: %
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
delay: down 15m multiplier 1.5 max 1h
summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization
info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
average CPU utilization over the last 10 minutes
to: silent

template: k8s_cgroup_ram_in_use
on: k8s.cgroup.mem_usage
class: Utilization
type: Cgroups
component: Memory
os: linux
hosts: *
calc: ($ram) * 100 / $memory_limit
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization
info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
memory utilization
to: silent
template: k8s_cgroup_ram_in_use
on: k8s.cgroup.mem_usage
class: Utilization
type: Cgroups
component: Memory
host labels: _os=linux
calc: ($ram) * 100 / $memory_limit
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization
info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
memory utilization
to: silent
Loading

0 comments on commit 746ebfd

Please sign in to comment.