Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

zed: Control NVMe fault LEDs #12695

Merged
merged 1 commit into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 68 additions & 5 deletions cmd/zed/zed.d/statechange-led.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
[ -f "${ZED_ZEDLET_DIR}/zed.rc" ] && . "${ZED_ZEDLET_DIR}/zed.rc"
. "${ZED_ZEDLET_DIR}/zed-functions.sh"

if [ ! -d /sys/class/enclosure ] ; then
if [ ! -d /sys/class/enclosure ] && [ ! -d /sys/bus/pci/slots ] ; then
# No JBOD enclosure or NVMe slots
exit 1
fi

Expand Down Expand Up @@ -92,6 +93,29 @@ check_and_set_led()
done
}

# Fault LEDs for JBODs and NVMe drives are handled a little differently.
#
# On JBODs the fault LED is called 'fault' and on a path like this:
#
# /sys/class/enclosure/0:0:1:0/SLOT 10/fault
#
# On NVMe it's called 'attention' and on a path like this:
#
# /sys/bus/pci/slot/0/attention
#
# This function returns the full path to the fault LED file for a given
# enclosure/slot directory.
#
path_to_led()
{
dir=$1
if [ -f "$dir/fault" ] ; then
echo "$dir/fault"
elif [ -f "$dir/attention" ] ; then
echo "$dir/attention"
fi
}

state_to_val()
{
state="$1"
Expand All @@ -105,6 +129,38 @@ state_to_val()
esac
}

#
# Given a nvme name like 'nvme0n1', pass back its slot directory
# like "/sys/bus/pci/slots/0"
#
nvme_dev_to_slot()
{
dev="$1"

# Get the address "0000:01:00.0"
address=$(cat "/sys/class/block/$dev/device/address")

# For each /sys/bus/pci/slots subdir that is an actual number
# (rather than weird directories like "1-3/").
# shellcheck disable=SC2010
for i in $(ls /sys/bus/pci/slots/ | grep -E "^[0-9]+$") ; do
this_address=$(cat "/sys/bus/pci/slots/$i/address")

# The format of address is a little different between
# /sys/class/block/$dev/device/address and
# /sys/bus/pci/slots/
#
# address= "0000:01:00.0"
# this_address = "0000:01:00"
#
if echo "$address" | grep -Eq ^"$this_address" ; then
echo "/sys/bus/pci/slots/$i"
break
fi
done
}


# process_pool (pool)
#
# Iterate through a pool and set the vdevs' enclosure slot LEDs to
Expand Down Expand Up @@ -134,6 +190,11 @@ process_pool()
# Get dev name (like 'sda')
dev=$(basename "$(echo "$therest" | awk '{print $(NF-1)}')")
vdev_enc_sysfs_path=$(realpath "/sys/class/block/$dev/device/enclosure_device"*)
if [ ! -d "$vdev_enc_sysfs_path" ] ; then
# This is not a JBOD disk, but it could be a PCI NVMe drive
vdev_enc_sysfs_path=$(nvme_dev_to_slot "$dev")
fi

current_val=$(echo "$therest" | awk '{print $NF}')

if [ "$current_val" != "0" ] ; then
Expand All @@ -145,9 +206,10 @@ process_pool()
continue
fi

if [ ! -e "$vdev_enc_sysfs_path/fault" ] ; then
led_path=$(path_to_led "$vdev_enc_sysfs_path")
if [ ! -e "$led_path" ] ; then
rc=3
zed_log_msg "vdev $vdev '$file/fault' doesn't exist"
zed_log_msg "vdev $vdev '$led_path' doesn't exist"
continue
fi

Expand All @@ -158,7 +220,7 @@ process_pool()
continue
fi

if ! check_and_set_led "$vdev_enc_sysfs_path/fault" "$val"; then
if ! check_and_set_led "$led_path" "$val"; then
rc=3
fi
done
Expand All @@ -169,7 +231,8 @@ if [ -n "$ZEVENT_VDEV_ENC_SYSFS_PATH" ] && [ -n "$ZEVENT_VDEV_STATE_STR" ] ; the
# Got a statechange for an individual vdev
val=$(state_to_val "$ZEVENT_VDEV_STATE_STR")
vdev=$(basename "$ZEVENT_VDEV_PATH")
check_and_set_led "$ZEVENT_VDEV_ENC_SYSFS_PATH/fault" "$val"
ledpath=$(path_to_led "$ZEVENT_VDEV_ENC_SYSFS_PATH")
check_and_set_led "$ledpath" "$val"
else
# Process the entire pool
poolname=$(zed_guid_to_pool "$ZEVENT_POOL_GUID")
Expand Down
4 changes: 2 additions & 2 deletions cmd/zed/zed.d/zed.rc
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,8 @@

##
# Turn on/off enclosure LEDs when drives get DEGRADED/FAULTED. This works for
# device mapper and multipath devices as well. Your enclosure must be
# supported by the Linux SES driver for this to work.
# device mapper and multipath devices as well. This works with JBOD enclosures
# and NVMe PCI drives (assuming they're supported by Linux in sysfs).
#
ZED_USE_ENCLOSURE_LEDS=1

Expand Down
8 changes: 7 additions & 1 deletion cmd/zpool/zpool.d/ses
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,13 @@ for i in $scripts ; do
val=$(ls "$VDEV_ENC_SYSFS_PATH/../device/scsi_generic" 2>/dev/null)
;;
fault_led)
val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
# JBODs fault LED is called 'fault', NVMe fault LED is called
# 'attention'.
if [ -f "$VDEV_ENC_SYSFS_PATH/fault" ] ; then
val=$(cat "$VDEV_ENC_SYSFS_PATH/fault" 2>/dev/null)
elif [ -f "$VDEV_ENC_SYSFS_PATH/attention" ] ; then
val=$(cat "$VDEV_ENC_SYSFS_PATH/attention" 2>/dev/null)
fi
;;
locate_led)
val=$(cat "$VDEV_ENC_SYSFS_PATH/locate" 2>/dev/null)
Expand Down
146 changes: 143 additions & 3 deletions lib/libzutil/os/linux/zutil_device_path_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,18 +154,148 @@ zfs_strip_path(char *path)
return (strrchr(path, '/') + 1);
}

/*
* Read the contents of a sysfs file into an allocated buffer and remove the
* last newline.
*
* This is useful for reading sysfs files that return a single string. Return
* an allocated string pointer on success, NULL otherwise. Returned buffer
* must be freed by the user.
*/
static char *
zfs_read_sysfs_file(char *filepath)
{
char buf[4096]; /* all sysfs files report 4k size */
char *str = NULL;

FILE *fp = fopen(filepath, "r");
if (fp == NULL) {
return (NULL);
}
tonyhutter marked this conversation as resolved.
Show resolved Hide resolved
if (fgets(buf, sizeof (buf), fp) == buf) {
/* success */

/* Remove the last newline (if any) */
size_t len = strlen(buf);
if (buf[len - 1] == '\n') {
buf[len - 1] = '\0';
}
str = strdup(buf);
}

fclose(fp);

return (str);
}

/*
* Given a dev name like "nvme0n1", return the full PCI slot sysfs path to
* the drive (in /sys/bus/pci/slots).
*
* For example:
* dev: "nvme0n1"
* returns: "/sys/bus/pci/slots/0"
*
* 'dev' must be an NVMe device.
*
* Returned string must be freed. Returns NULL on error or no sysfs path.
*/
static char *
zfs_get_pci_slots_sys_path(const char *dev_name)
{
DIR *dp = NULL;
struct dirent *ep;
char *address1 = NULL;
char *address2 = NULL;
char *path = NULL;
char buf[MAXPATHLEN];
char *tmp;

/* If they preface 'dev' with a path (like "/dev") then strip it off */
tmp = strrchr(dev_name, '/');
if (tmp != NULL)
dev_name = tmp + 1; /* +1 since we want the chr after '/' */

if (strncmp("nvme", dev_name, 4) != 0)
return (NULL);

(void) snprintf(buf, sizeof (buf), "/sys/block/%s/device/address",
dev_name);

address1 = zfs_read_sysfs_file(buf);
if (!address1)
return (NULL);

/*
* /sys/block/nvme0n1/device/address format will
* be "0000:01:00.0" while /sys/bus/pci/slots/0/address will be
* "0000:01:00". Just NULL terminate at the '.' so they match.
*/
tmp = strrchr(address1, '.');
if (tmp != NULL)
*tmp = '\0';

dp = opendir("/sys/bus/pci/slots/");
if (dp == NULL) {
free(address1);
return (NULL);
}

/*
* Look through all the /sys/bus/pci/slots/ subdirs
*/
while ((ep = readdir(dp))) {
/*
* We only care about directory names that are a single number.
* Sometimes there's other directories like
* "/sys/bus/pci/slots/0-3/" in there - skip those.
*/
if (!zfs_isnumber(ep->d_name))
continue;

(void) snprintf(buf, sizeof (buf),
"/sys/bus/pci/slots/%s/address", ep->d_name);

address2 = zfs_read_sysfs_file(buf);
if (!address2)
continue;

if (strcmp(address1, address2) == 0) {
/* Addresses match, we're all done */
free(address2);
if (asprintf(&path, "/sys/bus/pci/slots/%s",
ep->d_name) == -1) {
free(tmp);
continue;
}
break;
}
free(address2);
}

closedir(dp);
free(address1);

return (path);
}

/*
* Given a dev name like "sda", return the full enclosure sysfs path to
* the disk. You can also pass in the name with "/dev" prepended
* to it (like /dev/sda).
* to it (like /dev/sda). This works for both JBODs and NVMe PCI devices.
*
* For example, disk "sda" in enclosure slot 1:
* dev: "sda"
* dev_name: "sda"
* returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
*
* Or:
*
* dev_name: "nvme0n1"
* returns: "/sys/bus/pci/slots/0"
*
* 'dev' must be a non-devicemapper device.
*
* Returned string must be freed.
* Returned string must be freed. Returns NULL on error.
*/
char *
zfs_get_enclosure_sysfs_path(const char *dev_name)
Expand Down Expand Up @@ -252,6 +382,16 @@ zfs_get_enclosure_sysfs_path(const char *dev_name)
if (dp != NULL)
closedir(dp);

if (!path) {
/*
* This particular disk isn't in a JBOD. It could be an NVMe
* drive. If so, look up the NVMe device's path in
* /sys/bus/pci/slots/. Within that directory is a 'attention'
* file which controls the NVMe fault LED.
*/
path = zfs_get_pci_slots_sys_path(dev_name);
}

return (path);
}

Expand Down
9 changes: 9 additions & 0 deletions lib/libzutil/zutil_nicenum.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <math.h>
#include <stdio.h>
#include <libzutil.h>
#include <string.h>

/*
* Return B_TRUE if "str" is a number string, B_FALSE otherwise.
Expand All @@ -42,6 +43,14 @@ zfs_isnumber(const char *str)
if (!(isdigit(*str) || (*str == '.')))
return (B_FALSE);

/*
* Numbers should not end with a period ("." ".." or "5." are
* not valid)
*/
if (str[strlen(str) - 1] == '.') {
return (B_FALSE);
}

return (B_TRUE);
}

Expand Down