Skip to content

Commit

Permalink
Ratelimit deadman zevents as with delay zevents
Browse files Browse the repository at this point in the history
Just as delay zevents can flood the zevent pipe when a vdev becomes
unresponsive, so do the deadman zevents.

Ratelimit deadman zevents according to the same tunable as for delay
zevents.

Enable deadman tests on FreeBSD and add a test for deadman event
ratelimiting. 

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Don Brady <don.brady@delphix.com>
Signed-off-by: Ryan Moeller <ryan@iXsystems.com>
Closes openzfs#11786
  • Loading branch information
Ryan Moeller authored and Ryan Moeller committed May 6, 2021
1 parent 1566457 commit 33506c3
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 12 deletions.
3 changes: 2 additions & 1 deletion include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,10 +441,11 @@ struct vdev {
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */

/*
* We rate limit ZIO delay and ZIO checksum events, since they
* We rate limit ZIO delay, deadman, and checksum events, since they
* can flood ZED with tons of events when a drive is acting up.
*/
zfs_ratelimit_t vdev_delay_rl;
zfs_ratelimit_t vdev_deadman_rl;
zfs_ratelimit_t vdev_checksum_rl;
};

Expand Down
3 changes: 2 additions & 1 deletion man/man5/zfs-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -1620,7 +1620,8 @@ Default value: \fB64\fR.
\fBzfs_slow_io_events_per_second\fR (int)
.ad
.RS 12n
Rate limit delay zevents (which report slow I/Os) to this many per second.
Rate limit delay and deadman zevents (which report slow I/Os) to this many per
second.
.sp
Default value: 20
.RE
Expand Down
3 changes: 3 additions & 0 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
*/
zfs_ratelimit_init(&vd->vdev_delay_rl, &zfs_slow_io_events_per_second,
1);
zfs_ratelimit_init(&vd->vdev_deadman_rl, &zfs_slow_io_events_per_second,
1);
zfs_ratelimit_init(&vd->vdev_checksum_rl,
&zfs_checksum_events_per_second, 1);

Expand Down Expand Up @@ -1033,6 +1035,7 @@ vdev_free(vdev_t *vd)
cv_destroy(&vd->vdev_rebuild_io_cv);

zfs_ratelimit_fini(&vd->vdev_delay_rl);
zfs_ratelimit_fini(&vd->vdev_deadman_rl);
zfs_ratelimit_fini(&vd->vdev_checksum_rl);

if (vd == spa->spa_root_vdev)
Expand Down
8 changes: 5 additions & 3 deletions module/zfs/zfs_fm.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,8 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
}

/*
* We want to rate limit ZIO delay and checksum events so as to not
* flood ZED when a disk is acting up.
* We want to rate limit ZIO delay, deadman, and checksum events so as to not
* flood zevent consumers when a disk is acting up.
*
* Returns 1 if we're ratelimiting, 0 if not.
*/
Expand All @@ -367,11 +367,13 @@ zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd)
{
int rc = 0;
/*
* __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
* zfs_ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
* are. Invert it to get our return value.
*/
if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
rc = !zfs_ratelimit(&vd->vdev_delay_rl);
} else if (strcmp(subclass, FM_EREPORT_ZFS_DEADMAN) == 0) {
rc = !zfs_ratelimit(&vd->vdev_deadman_rl);
} else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
rc = !zfs_ratelimit(&vd->vdev_checksum_rl);
}
Expand Down
6 changes: 6 additions & 0 deletions tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,12 @@ tags = ['functional', 'cp_files']
tests = ['ctime_001_pos' ]
tags = ['functional', 'ctime']

[tests/functional/deadman]
tests = ['deadman_ratelimit', 'deadman_sync', 'deadman_zio']
pre =
post =
tags = ['functional', 'deadman']

[tests/functional/delegate]
tests = ['zfs_allow_001_pos', 'zfs_allow_002_pos', 'zfs_allow_003_pos',
'zfs_allow_004_pos', 'zfs_allow_005_pos', 'zfs_allow_006_pos',
Expand Down
6 changes: 0 additions & 6 deletions tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,6 @@ tags = ['functional', 'cli_root', 'zpool_split']
tests = ['compress_004_pos']
tags = ['functional', 'compression']

[tests/functional/deadman:Linux]
tests = ['deadman_sync', 'deadman_zio']
pre =
post =
tags = ['functional', 'deadman']

[tests/functional/devices:Linux]
tests = ['devices_001_pos', 'devices_002_neg', 'devices_003_pos']
tags = ['functional', 'devices']
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/functional/deadman/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/deadman
dist_pkgdata_SCRIPTS = \
deadman_ratelimit.ksh \
deadman_sync.ksh \
deadman_zio.ksh

Expand Down
78 changes: 78 additions & 0 deletions tests/zfs-tests/tests/functional/deadman/deadman_ratelimit.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Portions Copyright 2021 iXsystems, Inc.
#

# DESCRIPTION:
# Verify spa deadman events are rate limited
#
# STRATEGY:
# 1. Reduce the zfs_slow_io_events_per_second to 1.
# 2. Reduce the zfs_deadman_ziotime_ms to 1ms.
# 3. Write data to a pool and read it back.
# 4. Verify deadman events have been produced at a reasonable rate.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/deadman/deadman.cfg

verify_runnable "both"

function cleanup
{
zinject -c all
default_cleanup_noexit

set_tunable64 SLOW_IO_EVENTS_PER_SECOND $OLD_SLOW_IO_EVENTS
set_tunable64 DEADMAN_ZIOTIME_MS $ZIOTIME_DEFAULT
}

log_assert "Verify spa deadman events are rate limited"
log_onexit cleanup

OLD_SLOW_IO_EVENTS=$(get_tunable SLOW_IO_EVENTS_PER_SECOND)
log_must set_tunable64 SLOW_IO_EVENTS_PER_SECOND 1
log_must set_tunable64 DEADMAN_ZIOTIME_MS 1

# Create a new pool in order to use the updated deadman settings.
default_setup_noexit $DISK1
log_must zpool events -c

mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
log_must file_write -b 1048576 -c 8 -o create -d 0 -f $mntpnt/file
log_must zpool export $TESTPOOL
log_must zpool import $TESTPOOL
log_must zinject -d $DISK1 -D 5:1 $TESTPOOL
log_must dd if=$mntpnt/file of=$TEST_BASE_DIR/devnull oflag=sync

events=$(zpool events $TESTPOOL | grep -c ereport.fs.zfs.deadman)
log_note "events=$events"
if [ "$events" -lt 1 ]; then
log_fail "Expect >= 1 deadman events, $events found"
fi
if [ "$events" -gt 10 ]; then
log_fail "Expect <= 10 deadman events, $events found"
fi

log_pass "Verify spa deadman events are rate limited"
6 changes: 5 additions & 1 deletion tests/zfs-tests/tests/functional/deadman/deadman_sync.ksh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,11 @@ log_must zinject -c all
log_must zpool sync

# Log txg sync times for reference and the zpool event summary.
log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
if is_freebsd; then
log_must sysctl -n kstat.zfs.$TESTPOOL.txgs
else
log_must cat /proc/spl/kstat/zfs/$TESTPOOL/txgs
fi
log_must zpool events

# Verify at least 5 deadman events were logged. The first after 5 seconds,
Expand Down

0 comments on commit 33506c3

Please sign in to comment.