-
Notifications
You must be signed in to change notification settings - Fork 0
/
SSD_Misses_Alert.sh
86 lines (78 loc) · 2.81 KB
/
SSD_Misses_Alert.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/sh
# Script that should alert if we are having a lot
# of SSD misses in Scale cluster
# SSD misses limit for each category
PATH=$PATH:/opt/scale/bin
ONESEC_LIMIT=100
ONEMIN_LIMIT=100
FIVEMN_LIMIT=100
FIFTMN_LIMIT=100
ALERT_EMAIL_FROM=hc3@papersolve.com
ALERT_EMAIL_TO=mike@papersolve.com
EMAIL_SERVER=10.2.68.3
# if SSD priority is less than this value, ignore it
# we set it that way so we don't care as much about it
SSD_PRIORITY_THRESH=8
alertMe() {
# mail is not configured so we have to do this the hard way
SUBJ="SSD Misses Alert!"
BODY="The following VM and DISK triggered the SSD Misses alert:"
echo "HELO $HOSTNAME" > /tmp/ssd_misses_email
echo "MAIL FROM:<$ALERT_EMAIL_FROM>" >> /tmp/ssd_misses_email
echo "RCPT TO:<$ALERT_EMAIL_TO>" >> /tmp/ssd_misses_email
echo "DATA" >> /tmp/ssd_misses_email
echo "From: [$USER] <$ALERT_EMAIL_FROM>" >> /tmp/ssd_misses_email
echo "To: <$ALERT_EMAIL_TO>" >> /tmp/ssd_misses_email
echo "Date: `date`" >> /tmp/ssd_misses_email
echo "Subject: $SUBJ" >> /tmp/ssd_misses_email
echo "" >> /tmp/ssd_misses_email
echo $BODY >> /tmp/ssd_misses_email
echo $vmrow >> /tmp/ssd_misses_email
cat /tmp/$vmdisk >> /tmp/ssd_misses_email
echo "" >> /tmp/ssd_misses_email
echo "." >> /tmp/ssd_misses_email
echo "" >> /tmp/ssd_misses_email
echo "QUIT" >> /tmp/ssd_misses_email
# need to send email with delays
cat /tmp/ssd_misses_email | while read line; do
sleep 0.1
echo "$line"
done | nc -C $EMAIL_SERVER 25
}
# get list of VMs and their UUIDs
sc vm show > /tmp/vmrows
readarray vmrows < /tmp/vmrows
for vmrow in "${vmrows[@]}"; do
# extract what we care about
vmuuid=`echo $vmrow | awk '{print $1}'`
vmname=`echo $vmrow | awk '{print $7}'`
# get list of the disks attached to each VM
sc vm show display detail uuid $vmuuid | grep VIRTIO_DISK > /tmp/$vmuuid
readarray vmdisks < /tmp/$vmuuid
for diskrow in "${vmdisks[@]}"; do
# get misses for each disk
vmdisk=`echo $diskrow | awk '{print $5}'`
# if it's a low priority disk we'll ignore it
ssdpri=`sc vsd show display list uuid $vmdisk | grep $vmdisk | awk '{print $6}'`
if [ $ssdpri -lt $SSD_PRIORITY_THRESH ]; then
echo "Ignoring $vmdisk because it is priority $ssdpri"
continue
fi
sc vsd show display performance uuid $vmdisk > /tmp/$vmdisk
vmdisk_misses=`grep Misses /tmp/$vmdisk`
onesec=`echo $vmdisk_misses | awk '{print $7}'`; onesec=${onesec%.*}
onemin=`echo $vmdisk_misses | awk '{print $8}'`; onemin=${onemin%.*}
fivemn=`echo $vmdisk_misses | awk '{print $9}'`; fivemn=${fivemn%.*}
fiftmn=`echo $vmdisk_misses | awk '{print $10}'`; fiftmn=${fiftmn%.*}
# alert me if misses are "too high" consistently
if [ $onesec -gt $ONESEC_LIMIT ]; then
if [ $onemin -gt $ONEMIN_LIMIT ]; then
if [ $fivemn -gt $FIVEMN_LIMIT ]; then
if [ $fiftmn -gt $FIFTMN_LIMIT ]; then
alertMe
fi
fi
fi
fi
done
done