-
-
Notifications
You must be signed in to change notification settings - Fork 195
/
Copy pathmonitrc
525 lines (467 loc) · 23.4 KB
/
monitrc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
set init
set daemon 13
set log syslog
set statefile /var/run/monit.state
set idfile /usr/local/etc/monit.id
set httpd unixsocket /var/run/monit.sock
allow localhost
# hardware watchdog monitoring
check process hw-watchdog with matching /sbin/watchdog
group system
start = "/etc/init.d/S00watchdog start"
stop = "/etc/init.d/S00watchdog stop"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'hw-watchdog restarted' 'WatchDog: hw-watchdog-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'hw-watchdog: high filedescriptor usage (>95%)' 'WatchDog: hw-watchdog-highfd' true"
depends on hw-watchdogEnabled
check program hw-watchdogEnabled with path "/usr/bin/test -c /dev/watchdog"
group system
if status != 0 for 1 cycles then unmonitor
# hss_led service monitoring
check process hss_led with pidfile /var/run/hss_led.pid
group homematic
start = "/sbin/start-stop-daemon -S -q -b -m -c hssled:hssled -p /var/run/hss_led.pid --exec /bin/hss_led -- -l 6"
stop = "/sbin/start-stop-daemon -K -q -p /var/run/hss_led.pid"
#if failed port 8182 type udp for 5 cycles then restart
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'hss_led restarted' 'WatchDog: hssled-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'hss_led: high filedescriptor usage (>95%)' 'WatchDog: hssled-highfd' true"
depends on hmlangwDisabled
# syslog/klog daemons monitoring
check process klogd with pidfile /var/run/klogd.pid
group system
start = "/etc/init.d/S07logging start"
stop = "/etc/init.d/S07logging stop"
restart = "/etc/init.d/S07logging restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'klogd restarted' 'WatchDog: klogd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'klogd: high filedescriptor usage (>95%)' 'WatchDog: klogd-highfd' true"
check process syslogd with pidfile /var/run/syslogd.pid
group system
start = "/etc/init.d/S07logging start"
stop = "/etc/init.d/S07logging stop"
restart = "/etc/init.d/S07logging restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'syslogd restarted' 'WatchDog: syslogd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'syslogd: high filedescriptor usage (>95%)' 'WatchDog: syslogd-highfd' true"
# udev daemon monitoring
check process udevd with matching /sbin/udevd
group system
start = "/etc/init.d/S10udev start"
stop = "/etc/init.d/S10udev stop"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'udevd restarted' 'WatchDog: udevd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'udevd: high filedescriptor usage (>95%)' 'WatchDog: udevd-highfd' true"
# irqbalance daemon monitoring
check process irqbalance with pidfile /var/run/irqbalance.pid
group system
start = "/etc/init.d/S13irqbalance start"
stop = "/etc/init.d/S13irqbalance stop"
restart = "/etc/init.d/S13irqbalance restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'irqbalance restarted' 'WatchDog: irqbalance-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'irqbalance: high filedescriptor usage (>95%)' 'WatchDog: irqbalance-highfd' true"
depends on irqbalanceEnabled
check program irqbalanceEnabled with path /bin/sh -c "/usr/bin/test $(nproc) -gt 1"
group system
if status != 0 for 1 cycles then unmonitor
# dbus daemon monitoring
check process dbus with pidfile /run/messagebus.pid
group system
start = "/etc/init.d/S30dbus start"
stop = "/etc/init.d/S30dbus stop"
restart = "/etc/init.d/S30dbus restart"
if not exist for 1 cycles then restart
if failed unixsocket /run/dbus/system_bus_socket for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'dbus restarted' 'WatchDog: dbus-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'dbus: high filedescriptor usage (>95%)' 'WatchDog: dbus-highfd' true"
# ifplug.eth0 daemon monitoring
check process ifplugd.eth0 with pidfile /var/run/ifplugd.eth0.pid
group system
start = "/etc/init.d/S45ifplugd start eth0"
stop = "/etc/init.d/S45ifplugd stop eth0"
restart = "/etc/init.d/S45ifplugd restart eth0"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'ifplugd(eth0) restarted' 'WatchDog: eth0-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'ifplugd(eth0): high filedescriptor usage (>95%)' 'WatchDog: eth0-highfd' true"
depends on eth0Exists
# eth0Exists
check program eth0Exists with path "/usr/bin/test -d /sys/class/net/eth0"
group system
if status != 0 for 1 cycles then unmonitor
# ifplug.wlan0 daemon monitoring
check process ifplugd.wlan0 with pidfile /var/run/ifplugd.wlan0.pid
group system
start = "/etc/init.d/S45ifplugd start wlan0"
stop = "/etc/init.d/S45ifplugd stop wlan0"
restart = "/etc/init.d/S45ifplugd restart wlan0"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'ifplugd(wlan0) restarted' 'WatchDog: wlan0-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'ifplugd(wlan0): high filedescriptor usage (>95%)' WatchDog: wlan0-highfd' true"
depends on wlan0Exists, wlan0CheckEnabled
# wlan0Exists
check program wlan0Exists with path "/usr/bin/test -d /sys/class/net/wlan0"
group system
if status != 0 for 1 cycles then unmonitor
# chrony/ntp daemon/status monitoring
check process chronyd with pidfile /var/run/chrony/chronyd.pid
group system
start = "/etc/init.d/S46chrony start"
stop = "/etc/init.d/S46chrony stop"
restart = "/etc/init.d/S46chrony restart"
if not exist for 1 cycles then restart
if failed port 323 type udp for 5 cycles then restart
if failed unixsocket /var/run/chrony/chronyd.sock type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'Chrony-NTP restarted' 'WatchDog: chronyd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'chronyd: high filedescriptor usage (>95%)' 'WatchDog: chronyd-highfd' true"
check program ntpOffsetCheck with path /bin/sh -c "/usr/bin/chronyc -n tracking | grep 'Last offset' | awk '{ print $4; if($4 < 1.0 && $4 > -1.0) { exit 10 } else { exit 20 } }';"
group system
if status != 10 for 3 cycles then
exec "/bin/triggerAlarm.tcl 'NTP offset > 1 second' 'WatchDog: chronyd-highoffset' true"
repeat every 24 cycles
depends on chronyd
# eq3config daemon monitoring
check process eq3configd with pidfile /var/run/eq3configd.pid
group homematic
start = "/etc/init.d/S50eq3configd start"
stop = "/etc/init.d/S50eq3configd stop"
restart = "/etc/init.d/S50eq3configd restart"
if not exist for 1 cycles then restart
if failed port 43439 type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'eq3configd restarted' 'WatchDog: eq3configd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'eq3configd: high filedescriptor usage (>95%)' 'WatchDog: eq3configd-highfd' true"
# lighttpd web server daemon monitoring
check process lighttpd with pidfile /var/run/lighttpd-angel.pid
group system
start = "/etc/init.d/S50lighttpd start"
stop = "/etc/init.d/S50lighttpd stop"
restart = "/etc/init.d/S50lighttpd restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'lighttpd restarted' 'WatchDog: lighttpd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'lighttpd: high filedescriptor usage (>95%)' 'WatchDog: lighttpd-highfd' true"
if failed port 443 and certificate valid > 30 days for 5 cycles then
exec "/bin/triggerAlarm.tcl 'HTTPS certificate expiring in < 30 days' 'WatchDog: lighttpd-expired-sslcert' true"
if failed port 80 for 5 cycles then restart
if failed port 443 for 5 cycles then restart
if failed port 1999 for 5 cycles then restart
if failed port 2000 for 5 cycles then restart
if failed port 2001 for 5 cycles then restart
if failed port 2010 for 5 cycles then restart
if failed port 8181 for 5 cycles then restart
if failed port 9292 for 5 cycles then restart
if failed port 41999 for 5 cycles then restart
if failed port 42000 for 5 cycles then restart
if failed port 42001 for 5 cycles then restart
if failed port 42010 for 5 cycles then restart
if failed port 48181 for 5 cycles then restart
if failed port 49292 for 5 cycles then restart
depends hmlangwDisabled
# ssdpd UPnP daemon monitoring
check process ssdpd with pidfile /var/run/ssdpd.pid
group homematic
start = "/etc/init.d/S50ssdpd start"
stop = "/etc/init.d/S50ssdpd stop"
restart = "/etc/init.d/S50ssdpd restart"
if not exist for 1 cycles then restart
if failed port 1900 type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'ssdpd restarted' 'WatchDog: ssdpd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'ssdpd: high filedescriptor usage (>95%)' 'WatchDog: ssdpd-highfd' true"
# ssh daemon monitoring
check process sshd with pidfile /var/run/sshd.pid
group system
start = "/etc/init.d/S50sshd start"
stop = "/etc/init.d/S50sshd stop"
restart = "/etc/init.d/S50sshd restart"
if not exist for 1 cycles then restart
#if failed port 22 for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'sshd restarted' 'WatchDog: sshd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'sshd: high filedescriptor usage (>95%)' 'WatchDog: sshd-highfd' true"
depends on sshdEnabled
check program sshdEnabled with path "/usr/bin/test -f /etc/config/sshEnabled"
group system
if status != 0 for 1 cycles then unmonitor
# hs485 (BidCos-Wired) daemon monitoring
check process hs485d with pidfile /var/run/hs485dLoader.pid
group homematic
start = "/etc/init.d/S60hs485d start"
stop = "/etc/init.d/S60hs485d stop"
restart = "/etc/init.d/S60hs485d restart"
if not exist for 1 cycles then restart
if failed port 32000 for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'hs485d restarted' 'WatchDog: hs485d-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'hs485d: high filedescriptor usage (>95%)' 'WatchDog: hs485d-highfd' true"
depends on hs485dEnabled, hmlangwDisabled
check program hs485dEnabled with path "/bin/grep -q '^\[Interface .\]' /var/etc/hs485d.conf"
group homematic
if status != 0 for 1 cycles then unmonitor
# multimac daemon monitoring
check process multimacd with pidfile /var/run/multimacd.pid
group homematic
start = "/etc/init.d/S60multimacd start"
stop = "/etc/init.d/S60multimacd stop"
restart = "/etc/init.d/S60multimacd restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'multimacd restarted' 'WatchDog: multimacd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'multimacd: high filedescriptor usage (>95%)' 'WatchDog: multimacd-highfd' true"
depends on multimacdEnabled
check program multimacdEnabled with path "/usr/bin/test -f /var/etc/multimacd.conf"
group homematic
if status != 0 for 1 cycles then unmonitor
# hmlangw software emulation daemon monitoring
check process hmlangw with pidfile /var/run/hmlangw.pid
group homematic
start = "/etc/init.d/S61hmlangw start"
stop = "/etc/init.d/S61hmlangw stop"
restart = "/etc/init.d/S61hmlangw restart"
if not exist for 1 cycles then restart
depends on hmlangwEnabled
check program hmlangwEnabled with path "/usr/bin/test -e /usr/local/HMLGW"
group system
if status != 0 for 1 cycles then unmonitor
check program hmlangwDisabled with path "/usr/bin/test -e /usr/local/HMLGW"
group system
if status = 0 for 1 cycles then unmonitor
# rfd (BidCos-RF) daemon monitoring
check process rfd with pidfile /var/run/rfd.pid
group homematic
start = "/etc/init.d/S61rfd start"
stop = "/etc/init.d/S61rfd stop"
restart = "/etc/init.d/S61rfd restart"
if not exist for 1 cycles then restart
if failed port 32001 for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'rfd restarted' 'WatchDog: rfd-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'rfd: high filedescriptor usage (>95%)' 'WatchDog: rfd-highfd' true"
depends on rfdEnabled, hmlangwDisabled
check program rfdEnabled with path "/bin/grep -q '^\[Interface .\]' /var/etc/rfd.conf"
group homematic
if status != 0 for 1 cycles then unmonitor
# HMIPServer (HmIP-RF/HmIP-Wired) daemon monitoring
check process HMIPServer with pidfile /var/run/HMIPServer.pid
group homematic
start = "/etc/init.d/S62HMServer start"
stop = "/etc/init.d/S62HMServer stop"
restart = "/etc/init.d/S62HMServer restart"
if not exist for 1 cycles then restart
#if failed port 9293 for 5 cycles then restart
#if failed port 32010 for 5 cycles then restart
if failed port 39292 for 5 cycles then restart
#if failed port 43438 type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'HMIPServer restarted' 'WatchDog: hmipserver-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'HMIPServer: high filedescriptor usage (>95%)' 'WatchDog: hmipserver-highfd' true"
depends on hmlangwDisabled
# ReGaHss logic engine daemon monitoring
check process ReGaHss with pidfile /var/run/ReGaHss.pid
group homematic
start = "/etc/init.d/S70ReGaHss start"
stop = "/etc/init.d/S70ReGaHss stop"
restart = "/etc/init.d/S70ReGaHss restart"
if not exist for 1 cycles then restart
if failed port 8183 protocol http for 5 cycles then restart
if failed port 31999 for 5 cycles then restart
if failed port 1998 type udp for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'ReGaHss restarted' 'WatchDog: regahss-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'ReGaHss: high filedescriptor usage (>95%)' 'WatchDog: regahss-highfd' true"
depends on hmlangwDisabled
# cronjob daemon monitoring
check process crond with pidfile /var/run/crond.pid
group system
start = "/etc/init.d/S98crond start"
stop = "/etc/init.d/S98crond stop"
restart = "/etc/init.d/S98crond restart"
if not exist for 1 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'crond restarted' 'WatchDog: crond-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'crond: high filedescriptor usage (>95%)' 'WatchDog: crond-highfd' true"
# system cpu/memory monitoring
check system $HOST
group system
if cpu usage > 95% for 24 cycles then
exec "/bin/triggerAlarm.tcl 'high CPU usage (>95%) detected' 'WatchDog: high-cpu' true"
if memory usage > 95% for 24 cycles then
exec "/bin/triggerAlarm.tcl 'high memory usage (>95%) detected' 'WatchDog: high-memory' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'high filedescriptor usage (>95%) detected' 'WatchDog: high-fd' true"
# filesystem space monitoring
check filesystem rootfs with path /
group system
if space usage > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'rootfs (/): low disk space' 'WatchDog: rootfs-low-space' true"
check filesystem userfs with path /usr/local
group system
if space usage > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'userfs (/usr/local): low disk space' 'WatchDog: userfs-low-space' true"
if changed fsflags then
exec "/bin/triggerAlarm.tcl 'userfs (/usr/local): changed fsflags (potential disk error)' 'WatchDog: userfs-fsflags' true"
check filesystem usb1 with path /media/usb1
group system
if space usage > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'usb1 (/media/usb1): low disk space' 'WatchDog: usb1-low-space' true"
if changed fsflags then
exec "/bin/triggerAlarm.tcl 'usb1 (/media/usb1): changed fsflags (potential disk error)' 'WatchDog: usb1-fsflags' true"
depends on hasUSB
check program hasUSB with path "/usr/bin/test -e /var/status/hasUSB"
group system
if status != 0 for 5 cycles then unmonitor
# check system temperature limits
check program temperature with path /bin/sh -c "(cat /sys/class/thermal/thermal_zone0/temp 2>/dev/null || echo 0) | awk '{ print $1/1000; if($1/1000 < 80.0) { exit 10 } else { exit 20 } }';"
group system
if status != 10 for 5 cycles then
exec "/bin/triggerAlarm.tcl 'high system temperature (> 80 deg) detected' 'WatchDog: high-temp' true"
repeat every 24 cycles
# network interface (eth0) monitoring
check network eth0 interface eth0
group system
if failed link then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'eth0 interface: link down' 'WatchDog: eth0-down' true"
if changed link capacity then
exec "/bin/triggerAlarm.tcl 'eth0 interface: link speed changed' 'WatchDog: eth0-speed' true"
depends on eth0LinkCheck
check program eth0LinkCheck with path "/bin/grep -q 1 /sys/class/net/eth0/carrier"
group system
if status != 0 for 1 cycles then unmonitor
# network interface (wlan0) monitoring
check network wlan0 interface wlan0
group system
start = "/sbin/ifup wlan0"
stop = "/sbin/ifdown wlan0"
if failed link then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'wlan0 interface link down' 'WatchDog: wlan0-down' true"
depends on wlan0CheckEnabled, wlan0Exists
check program wlan0CheckEnabled with path /bin/sh -c "[[ -e /etc/config/wpa_supplicant.conf ]] && /bin/grep 0 /sys/class/net/eth0/carrier 2>/dev/null"
group system
if status != 0 for 1 cycles then unmonitor
depends on wlan0Exists
# monitor under-voltage using vcgencmd (RaspberryPi only)
check program voltageCheck with path /bin/sh -c "[ $(( $(/usr/bin/vcgencmd get_throttled | cut -f2 -d=) & 0x1 )) -eq $(( 0x1 )) ]"
group system
if status = 0 for 2 cycles then
exec "/bin/triggerAlarm.tcl 'under-voltage detected' 'WatchDog: low-voltage' true"
repeat every 24 cycles
depends on voltageCheckEnabled
check program voltageCheckEnabled with path "/usr/bin/test -e /usr/bin/vcgencmd"
group system
if status != 0 for 1 cycles then unmonitor
# internet connectivity monitoring
check program internetCheck with path "/usr/bin/test -e /var/status/hasInternet"
group system
if status != 0 for 24 cycles then
exec "/bin/triggerAlarm.tcl 'No internet connection detected' 'WatchDog: no-internet' true"
repeat every 24 cycles
depends on internetCheckEnabled
check program internetCheckEnabled with path "/usr/bin/test -e /etc/config/internetCheckDisabled"
group system
if status = 0 for 1 cycles then unmonitor
# check for a non-empty /tmp/badblocks.txt file and warn the user accordingly
check program badblocksCheck with path "/usr/bin/test -s /tmp/badblocks.txt"
group system
if status = 0 for 1 cycles then
exec "/bin/triggerAlarm.tcl 'Bad blocks on main disk device detected' 'WatchDog: bad-blocks' true"
repeat every 6700 cycles
depends on crond
# check for an unclean shutdown by checking for /var/status/uncleanShutdown
check program uncleanShutdownCheck with path "/usr/bin/test -e /var/status/uncleanShutdown"
group system
if status = 0 for 1 cycles then
exec /bin/sh -c "/bin/triggerAlarm.tcl 'Unclean shutdown or system crash identified' 'WatchDog: unclean-shutdown' true ; rm -f /var/status/uncleanShutdown"
repeat every 24 cycles
# check if a HB-RF-ETH is supposed to be connected and if it is not rais an alarm
check program hb_rf_eth-Check with path "/bin/grep -q 1 /sys/class/hb-rf-eth/hb-rf-eth/is_connected"
group homematic
if status != 0 then
exec /bin/sh -c "/bin/triggerAlarm.tcl 'HB-RF-ETH: no connection' 'WatchDog: hb-rf-eth-connection' true"
repeat every 5 cycles
depends on hb_rf_eth-CheckEnabled
check program hb_rf_eth-CheckEnabled with path "/usr/bin/test -e /etc/config/hb_rf_eth"
group homematic
if status != 0 for 1 cycles then unmonitor
# check if a copro exists and if so we perform a regular check
check program coProcessorCheck with path "/bin/checkCoProcessor.sh"
group homematic
if status = 2 then
exec /bin/sh -c "/bin/triggerAlarm.tcl 'RF-Module/Co-Processor connection lost' 'WatchDog: copro-lost' true"
repeat every 5 cycles
if status = 1 for 1 cycles then unmonitor
# check remaining eMMC life-time
check program eMMCLifeTimeCheck with path "/bin/checkEMMCLifeTime.sh"
group system
if status == 1 for 3 cycles then unmonitor
if status == 2 for 5 cycles then
exec "/bin/triggerAlarm.tcl 'eMMC life time >90% exceeded' 'WatchDog: emmc-lifetime90' true"
repeat every 280 cycles
if status == 3 for 5 cycles then
exec "/bin/triggerAlarm.tcl 'eMMC life time 100% exceeded' 'WatchDog: emmc-lifetime100' true"
repeat every 280 cycles
# check if a pi4 with usb3+gpio rf module is used
check program rpi4usb3Check with path "/bin/checkRpi4Usb3.sh"
group system
if status == 1 for 3 cycles then unmonitor
if status == 2 for 5 cycles then
exec "/bin/triggerAlarm.tcl 'Critical RaspberryPi4+USB3+GPIO use detected' 'WatchDog: rpi4-usb3' true"
repeat every 24 cycles
# tailscale monitoring
check process tailscaled with pidfile /var/run/tailscale/tailscaled.pid
group system
start = "/etc/init.d/S46tailscaled start"
stop = "/etc/init.d/S46tailscaled stop"
restart = "/etc/init.d/S46tailscaled restart"
if not exist for 1 cycles then restart
if failed unixsocket /var/run/tailscale/tailscaled.sock for 5 cycles then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'tailscaled restarted' 'WatchDog: tailscaled-restart' true"
if filedescriptors > 95% for 5 cycles then
exec "/bin/triggerAlarm.tcl 'tailscaled: high filedescriptor usage (>95%)' 'WatchDog: tailscaled-highfd' true"
depends on tailscaleEnabled
# network interface (tailscale0) monitoring
check network tailscale0 interface tailscale0
group system
if failed link then restart
if 1 restart within 1 cycles then
exec "/bin/triggerAlarm.tcl 'tailscale0 interface: link down' 'WatchDog: tailscale0-down' true"
depends on tailscaleEnabled
# check if tailscale is enabled at all
check program tailscaleEnabled with path "/usr/bin/test -f /etc/config/tailscaleEnabled"
group system
if status != 0 for 1 cycles then unmonitor
# include user-defined configuration files
include /usr/local/etc/monit*.cfg