-
Notifications
You must be signed in to change notification settings - Fork 1.3k
/
alerts.go
141 lines (126 loc) · 3.55 KB
/
alerts.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package modules
import (
"net"
"os"
"strconv"
"syscall"
"github.com/filecoin-project/lotus/journal/alerting"
"github.com/filecoin-project/lotus/lib/ulimit"
)
func CheckFdLimit(min uint64) func(al *alerting.Alerting) {
return func(al *alerting.Alerting) {
soft, _, err := ulimit.GetLimit()
if err == ulimit.ErrUnsupported {
log.Warn("FD limit monitoring not available")
return
}
alert := al.AddAlertType("process", "fd-limit")
if err != nil {
al.Raise(alert, map[string]string{
"message": "failed to get FD limit",
"error": err.Error(),
})
}
if soft < min {
al.Raise(alert, map[string]interface{}{
"message": "soft FD limit is low",
"soft_limit": soft,
"recommended_min": min,
})
}
}
}
func CheckUDPBufferSize(wanted int) func(al *alerting.Alerting) {
return func(al *alerting.Alerting) {
conn, err := net.Dial("udp", "localhost:0")
if err != nil {
alert := al.AddAlertType("process", "udp-buffer-size")
al.Raise(alert, map[string]string{
"message": "Failed to create UDP connection",
"error": err.Error(),
})
return
}
defer func() {
if err := conn.Close(); err != nil {
log.Warnf("Failed to close connection: %s", err)
}
}()
udpConn, ok := conn.(*net.UDPConn)
if !ok {
alert := al.AddAlertType("process", "udp-buffer-size")
al.Raise(alert, map[string]string{
"message": "Failed to cast connection to UDPConn",
})
return
}
file, err := udpConn.File()
if err != nil {
alert := al.AddAlertType("process", "udp-buffer-size")
al.Raise(alert, map[string]string{
"message": "Failed to get file descriptor from UDPConn",
"error": err.Error(),
})
return
}
defer func() {
if err := file.Close(); err != nil {
log.Warnf("Failed to close file: %s", err)
}
}()
size, err := syscall.GetsockoptInt(int(file.Fd()), syscall.SOL_SOCKET, syscall.SO_RCVBUF)
if err != nil {
alert := al.AddAlertType("process", "udp-buffer-size")
al.Raise(alert, map[string]string{
"message": "Failed to get UDP buffer size",
"error": err.Error(),
})
return
}
if size < wanted {
alert := al.AddAlertType("process", "udp-buffer-size")
al.Raise(alert, map[string]interface{}{
"message": "UDP buffer size is low",
"current_size": size,
"wanted_size": wanted,
"help": "See https://github.com/quic-go/quic-go/wiki/UDP-Buffer-Sizes for details.",
})
}
}
}
func CheckFvmConcurrency() func(al *alerting.Alerting) {
return func(al *alerting.Alerting) {
fvmConcurrency, ok := os.LookupEnv("LOTUS_FVM_CONCURRENCY")
if !ok {
return
}
fvmConcurrencyVal, err := strconv.Atoi(fvmConcurrency)
if err != nil {
alert := al.AddAlertType("process", "fvm-concurrency")
al.Raise(alert, map[string]string{
"message": "LOTUS_FVM_CONCURRENCY is not an integer",
"error": err.Error(),
})
return
}
// Raise alert if LOTUS_FVM_CONCURRENCY is set to a high value
if fvmConcurrencyVal > 24 {
alert := al.AddAlertType("process", "fvm-concurrency")
al.Raise(alert, map[string]interface{}{
"message": "LOTUS_FVM_CONCURRENCY is set to a high value that can cause chain sync panics on network migrations/upgrades",
"set_value": fvmConcurrencyVal,
"recommended": "24 or less during network upgrades",
})
}
}
}
// TODO: More things:
// * Space in repo dirs (taking into account mounts)
// * Miner
// * Faulted partitions
// * Low balances
// * Market provider
// * Reachability
// * on-chain config
// * Low memory (maybe)
// * Network / sync issues