-
-
Notifications
You must be signed in to change notification settings - Fork 196
/
Copy pathmonitrc
375 lines (334 loc) · 16.7 KB
/
monitrc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
set init
set daemon 13
set log syslog
set statefile /var/run/monit.state
set idfile /usr/local/etc/monit.id
set httpd unixsocket /var/run/monit.sock
allow localhost
# hss_led service monitoring
check process hss_led with pidfile /var/run/hss_led.pid
group homematic
start = "/sbin/start-stop-daemon -S -q -b -m -p /var/run/hss_led.pid --exec /bin/hss_led -- -l 6"
stop = "/sbin/start-stop-daemon -K -q -p /var/run/hss_led.pid"
#if failed port 8182 type udp for 5 cycles then restart
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'hss_led restarted' 'WatchDog: hssled-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'hss_led: high filedescriptor usage (>95%)' 'WatchDog: hssled-highfd' true"
depends on hmlangwDisabled
# syslog daemon monitoring
check process syslogd with pidfile /var/run/syslogd.pid
group system
start = "/etc/init.d/S07logging start"
stop = "/etc/init.d/S07logging stop"
restart = "/etc/init.d/S07logging restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'syslogd restarted' 'WatchDog: syslogd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'syslogd: high filedescriptor usage (>95%)' 'WatchDog: syslogd-highfd' true"
# udev daemon monitoring
check process udevd with matching /sbin/udevd
group system
start = "/etc/init.d/S10udev start"
stop = "/etc/init.d/S10udev stop"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'udevd restarted' 'WatchDog: udevd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'udevd: high filedescriptor usage (>95%)' 'WatchDog: udevd-highfd' true"
depends on udevdEnabled
check program udevdEnabled with path "/bin/grep -q 'HM_RUNNING_IN_HA=' /var/hm_mode"
group system
if status = 0 for 1 cycles then unmonitor
# eq3config daemon monitoring
check process eq3configd with pidfile /var/run/eq3configd.pid
group homematic
start = "/etc/init.d/S50eq3configd start"
stop = "/etc/init.d/S50eq3configd stop"
restart = "/etc/init.d/S50eq3configd restart"
if not exist for 1 cycles then restart
if failed port 43439 type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'eq3configd restarted' 'WatchDog: eq3configd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'eq3configd: high filedescriptor usage (>95%)' 'WatchDog: eq3configd-highfd' true"
# lighttpd web server daemon monitoring
check process lighttpd with pidfile /var/run/lighttpd-angel.pid
group system
start = "/etc/init.d/S50lighttpd start"
stop = "/etc/init.d/S50lighttpd stop"
restart = "/etc/init.d/S50lighttpd restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'lighttpd restarted' 'WatchDog: lighttpd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'lighttpd: high filedescriptor usage (>95%)' 'WatchDog: lighttpd-highfd' true"
if failed port 443 and certificate valid > 30 days for 5 cycles then
exec "/bin/triggerAlarm.tcl 'HTTPS certificate expiring in < 30 days' 'WatchDog: lighttpd-expired-sslcert' true"
if failed port 80 for 5 cycles then restart
if failed port 443 for 5 cycles then restart
if failed port 1999 for 5 cycles then restart
if failed port 2000 for 5 cycles then restart
if failed port 2001 for 5 cycles then restart
if failed port 2010 for 5 cycles then restart
if failed port 8181 for 5 cycles then restart
if failed port 9292 for 5 cycles then restart
if failed port 41999 for 5 cycles then restart
if failed port 42000 for 5 cycles then restart
if failed port 42001 for 5 cycles then restart
if failed port 42010 for 5 cycles then restart
if failed port 48181 for 5 cycles then restart
if failed port 49292 for 5 cycles then restart
depends hmlangwDisabled
# ssdpd UPnP daemon monitoring
check process ssdpd with pidfile /var/run/ssdpd.pid
group homematic
start = "/etc/init.d/S50ssdpd start"
stop = "/etc/init.d/S50ssdpd stop"
restart = "/etc/init.d/S50ssdpd restart"
if not exist for 1 cycles then restart
if failed port 1900 type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'ssdpd restarted' 'WatchDog: ssdpd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'ssdpd: high filedescriptor usage (>95%)' 'WatchDog: ssdpd-highfd' true"
# ssh daemon monitoring
check process sshd with pidfile /var/run/sshd.pid
group system
start = "/etc/init.d/S50sshd start"
stop = "/etc/init.d/S50sshd stop"
restart = "/etc/init.d/S50sshd restart"
if not exist for 1 cycles then restart
#if failed port 22 for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'sshd restarted' 'WatchDog: sshd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'sshd: high filedescriptor usage (>95%)' 'WatchDog: sshd-highfd' true"
depends on sshdEnabled
check program sshdEnabled with path "/usr/bin/test -f /etc/config/sshEnabled"
group system
if status != 0 for 1 cycles then unmonitor
# hs485 (BidCos-Wired) daemon monitoring
check process hs485d with pidfile /var/run/hs485dLoader.pid
group homematic
start = "/etc/init.d/S60hs485d start"
stop = "/etc/init.d/S60hs485d stop"
restart = "/etc/init.d/S60hs485d restart"
if not exist for 1 cycles then restart
if failed port 32000 for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'hs485d restarted' 'WatchDog: hs485d-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'hs485d: high filedescriptor usage (>95%)' 'WatchDog: hs485d-highfd' true"
depends on hs485dEnabled, hmlangwDisabled
check program hs485dEnabled with path "/bin/grep -q '^\[Interface .\]' /var/etc/hs485d.conf"
group homematic
if status != 0 for 1 cycles then unmonitor
# multimac daemon monitoring
check process multimacd with pidfile /var/run/multimacd.pid
group homematic
start = "/etc/init.d/S60multimacd start"
stop = "/etc/init.d/S60multimacd stop"
restart = "/etc/init.d/S60multimacd restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'multimacd restarted' 'WatchDog: multimacd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'multimacd: high filedescriptor usage (>95%)' 'WatchDog: multimacd-highfd' true"
depends on multimacdEnabled
check program multimacdEnabled with path "/usr/bin/test -f /var/etc/multimacd.conf"
group homematic
if status != 0 for 1 cycles then unmonitor
# hmlangw software emulation daemon monitoring
check process hmlangw with pidfile /var/run/hmlangw.pid
group homematic
start = "/etc/init.d/S61hmlangw start"
stop = "/etc/init.d/S61hmlangw stop"
restart = "/etc/init.d/S61hmlangw restart"
if not exist for 1 cycles then restart
depends on hmlangwEnabled
check program hmlangwEnabled with path "/usr/bin/test -e /usr/local/HMLGW"
group system
if status != 0 for 1 cycles then unmonitor
check program hmlangwDisabled with path "/usr/bin/test -e /usr/local/HMLGW"
group system
if status = 0 for 1 cycles then unmonitor
# rfd (BidCos-RF) daemon monitoring
check process rfd with pidfile /var/run/rfd.pid
group homematic
start = "/etc/init.d/S61rfd start"
stop = "/etc/init.d/S61rfd stop"
restart = "/etc/init.d/S61rfd restart"
if not exist for 1 cycles then restart
if failed port 32001 for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'rfd restarted' 'WatchDog: rfd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'rfd: high filedescriptor usage (>95%)' 'WatchDog: rfd-highfd' true"
depends on rfdEnabled, hmlangwDisabled
check program rfdEnabled with path "/bin/grep -q '^\[Interface .\]' /var/etc/rfd.conf"
group homematic
if status != 0 for 1 cycles then unmonitor
# HMIPServer (HmIP-RF/HmIP-Wired) daemon monitoring
check process HMIPServer with pidfile /var/run/HMIPServer.pid
group homematic
start = "/etc/init.d/S62HMServer start"
stop = "/etc/init.d/S62HMServer stop"
restart = "/etc/init.d/S62HMServer restart"
if not exist for 1 cycles then restart
#if failed port 9293 for 5 cycles then restart
#if failed port 32010 for 5 cycles then restart
if failed port 39292 for 5 cycles then restart
#if failed port 43438 type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'HMIPServer restarted' 'WatchDog: hmipserver-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'HMIPServer: high filedescriptor usage (>95%)' 'WatchDog: hmipserver-highfd' true"
depends on hmlangwDisabled
# ReGaHss logic engine daemon monitoring
check process ReGaHss with pidfile /var/run/ReGaHss.pid
group homematic
start = "/etc/init.d/S70ReGaHss start"
stop = "/etc/init.d/S70ReGaHss stop"
restart = "/etc/init.d/S70ReGaHss restart"
if not exist for 1 cycles then restart
if failed port 8183 protocol http for 5 cycles then restart
if failed port 31999 for 5 cycles then restart
if failed port 1998 type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'ReGaHss restarted' 'WatchDog: regahss-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'ReGaHss: high filedescriptor usage (>95%)' 'WatchDog: regahss-highfd' true"
depends on hmlangwDisabled
# cronjob daemon monitoring
check process crond with pidfile /var/run/crond.pid
group system
start = "/etc/init.d/S98crond start"
stop = "/etc/init.d/S98crond stop"
restart = "/etc/init.d/S98crond restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'crond restarted' 'WatchDog: crond-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'crond: high filedescriptor usage (>95%)' 'WatchDog: crond-highfd' true"
# system cpu/memory monitoring
check system $HOST
group system
if cpu usage > 95% for 24 cycles then
exec "/bin/triggerAlarm.tcl 'high CPU usage (>95%) detected' 'WatchDog: high-cpu' true"
if memory usage > 95% for 24 cycles then
exec "/bin/triggerAlarm.tcl 'high memory usage (>95%) detected' 'WatchDog: high-memory' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'high filedescriptor usage (>95%) detected' 'WatchDog: high-fd' true"
# filesystem space monitoring
check filesystem rootfs with path /
group system
if space usage > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'rootfs (/): low disk space' 'WatchDog: rootfs-low-space' true"
check filesystem userfs with path /usr/local
group system
if space usage > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'userfs (/usr/local): low disk space' 'WatchDog: userfs-low-space' true"
if changed fsflags then
exec "/bin/triggerAlarm.tcl 'userfs (/usr/local): changed fsflags (potential disk error)' 'WatchDog: userfs-fsflags' true"
check filesystem usb1 with path /media/usb1
group system
if space usage > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'usb1 (/media/usb1): low disk space' 'WatchDog: usb1-low-space' true"
if changed fsflags then
exec "/bin/triggerAlarm.tcl 'usb1 (/media/usb1): changed fsflags (potential disk error)' 'WatchDog: usb1-fsflags' true"
depends on hasUSB
check program hasUSB with path "/usr/bin/test -e /var/status/hasUSB"
group system
if status != 0 for 5 cycles then unmonitor
# check system temperature limits
check program temperature with path /bin/sh -c "(cat /sys/class/thermal/thermal_zone0/temp 2>/dev/null || echo 0) | awk '{ print $1/1000; if($1/1000 < 80.0) { exit 10 } else { exit 20 } }';"
group system
if status != 10 for 5 cycles then
exec "/bin/triggerAlarm.tcl 'high system temperature (> 80 deg) detected' 'WatchDog: high-temp' true"
repeat every 24 cycles
# internet connectivity monitoring
check program internetCheck with path "/usr/bin/test -e /var/status/hasInternet"
group system
if status != 0 for 24 cycles then
exec "/bin/triggerAlarm.tcl 'No internet connection detected' 'WatchDog: no-internet' true"
repeat every 24 cycles
depends on internetCheckEnabled
check program internetCheckEnabled with path "/usr/bin/test -e /etc/config/internetCheckDisabled"
group system
if status = 0 for 1 cycles then unmonitor
# check for an unclean shutdown by checking for /var/status/uncleanShutdown
check program uncleanShutdownCheck with path "/usr/bin/test -e /var/status/uncleanShutdown"
group system
if status = 0 for 1 cycles then
exec /bin/sh -c "/bin/triggerAlarm.tcl 'Unclean shutdown or system crash identified' 'WatchDog: unclean-shutdown' true ; rm -f /var/status/uncleanShutdown"
repeat every 24 cycles
# check if a HB-RF-ETH is supposed to be connected and if it is not rais an alarm
check program hb_rf_eth-Check with path "/bin/grep -q 1 /sys/class/hb-rf-eth/hb-rf-eth/is_connected"
group homematic
if status != 0 then
exec /bin/sh -c "/bin/triggerAlarm.tcl 'HB-RF-ETH: no connection' 'WatchDog: hb-rf-eth-connection' true"
repeat every 5 cycles
depends on hb_rf_eth-CheckEnabled
check program hb_rf_eth-CheckEnabled with path "/usr/bin/test -e /etc/config/hb_rf_eth"
group homematic
if status != 0 for 1 cycles then unmonitor
# check if a copro exists and if so we perform a regular check
check program coProcessorCheck with path "/bin/checkCoProcessor.sh"
group homematic
if status = 2 then
exec /bin/sh -c "/bin/triggerAlarm.tcl 'RF-Module/Co-Processor connection lost' 'WatchDog: copro-lost' true"
repeat every 5 cycles
if status = 1 for 1 cycles then unmonitor
# check if a pi4 with usb3+gpio rf module is used
check program rpi4usb3Check with path "/bin/checkRpi4Usb3.sh"
group system
if status == 1 for 3 cycles then unmonitor
if status == 2 for 5 cycles then
exec "/bin/triggerAlarm.tcl 'Critical RaspberryPi4+USB3+GPIO use detected' 'WatchDog: rpi4-usb3' true"
repeat every 24 cycles
# tailscale monitoring
check process tailscaled with pidfile /var/run/tailscale/tailscaled.pid
group system
start = "/etc/init.d/S46tailscaled start"
stop = "/etc/init.d/S46tailscaled stop"
restart = "/etc/init.d/S46tailscaled restart"
if not exist for 1 cycles then restart
if failed unixsocket /var/run/tailscale/tailscaled.sock for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'tailscaled restarted' 'WatchDog: tailscaled-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'tailscaled: high filedescriptor usage (>95%)' 'WatchDog: tailscaled-highfd' true"
depends on tailscaleEnabled
# network interface (tailscale0) monitoring
check network tailscale0 interface tailscale0
group system
if failed link then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'tailscale0 interface: link down' 'WatchDog: tailscale0-down' true"
depends on tailscaleEnabled
# check if tailscale is enabled at all
check program tailscaleEnabled with path "/usr/bin/test -f /etc/config/tailscaleEnabled"
group system
if status != 0 for 1 cycles then unmonitor
# ha-proxy monitoring
check process ha-proxy with pidfile /var/run/ha-proxy.pid
group system
start = "/etc/init.d/S51ha-proxy start"
stop = "/etc/init.d/S51ha-proxy stop"
restart = "/etc/init.d/S51ha-proxy restart"
if not exist for 1 cycles then restart
if failed port 8099 for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'ha-proxy restarted' 'WatchDog: ha-proxy-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'ha-proxy: high filedescriptor usage (>95%)' 'WatchDog: ha-proxy-highfd' true"
depends on ha-proxyEnabled
# check if ha-proxyEnabled is enabled at all
check program ha-proxyEnabled with path "/bin/grep -q 'HM_RUNNING_IN_HA=' /var/hm_mode"
group system
if status != 0 for 1 cycles then unmonitor
# include user-defined configuration files
include /usr/local/etc/monit*.cfg