Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restrict raidz faulted vdev count #16569

Merged
merged 1 commit into from
Oct 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 33 additions & 10 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3149,9 +3149,9 @@ vdev_dtl_should_excise(vdev_t *vd, boolean_t rebuild_done)
* Reassess DTLs after a config change or scrub completion. If txg == 0 no
* write operations will be issued to the pool.
*/
void
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
boolean_t scrub_done, boolean_t rebuild_done)
static void
vdev_dtl_reassess_impl(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
boolean_t scrub_done, boolean_t rebuild_done, boolean_t faulting)
{
spa_t *spa = vd->vdev_spa;
avl_tree_t reftree;
Expand All @@ -3160,8 +3160,8 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);

for (int c = 0; c < vd->vdev_children; c++)
vdev_dtl_reassess(vd->vdev_child[c], txg,
scrub_txg, scrub_done, rebuild_done);
vdev_dtl_reassess_impl(vd->vdev_child[c], txg,
scrub_txg, scrub_done, rebuild_done, faulting);

if (vd == spa->spa_root_vdev || !vdev_is_concrete(vd) || vd->vdev_aux)
return;
Expand Down Expand Up @@ -3255,11 +3255,21 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
if (scrub_done)
range_tree_vacate(vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
range_tree_vacate(vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
if (!vdev_readable(vd))

/*
* For the faulting case, treat members of a replacing vdev
* as if they are not available. It's more likely than not that
* a vdev in a replacing vdev could encounter read errors so
* treat it as not being able to contribute.
*/
if (!vdev_readable(vd) ||
(faulting && vd->vdev_parent != NULL &&
vd->vdev_parent->vdev_ops == &vdev_replacing_ops)) {
range_tree_add(vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
else
} else {
range_tree_walk(vd->vdev_dtl[DTL_MISSING],
range_tree_add, vd->vdev_dtl[DTL_OUTAGE]);
}

/*
* If the vdev was resilvering or rebuilding and no longer
Expand Down Expand Up @@ -3321,6 +3331,14 @@ vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
}
}

void
vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
boolean_t scrub_done, boolean_t rebuild_done)
{
return (vdev_dtl_reassess_impl(vd, txg, scrub_txg, scrub_done,
rebuild_done, B_FALSE));
}

/*
* Iterate over all the vdevs except spare, and post kobj events
*/
Expand Down Expand Up @@ -3548,7 +3566,11 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
}

/*
* Determine whether the specified vdev can be offlined/detached/removed
* Determine whether the specified vdev can be
* - offlined
* - detached
* - removed
* - faulted
* without losing data.
*/
boolean_t
Expand All @@ -3558,6 +3580,7 @@ vdev_dtl_required(vdev_t *vd)
vdev_t *tvd = vd->vdev_top;
uint8_t cant_read = vd->vdev_cant_read;
boolean_t required;
boolean_t faulting = vd->vdev_state == VDEV_STATE_FAULTED;

ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);

Expand All @@ -3570,10 +3593,10 @@ vdev_dtl_required(vdev_t *vd)
* If not, we can safely offline/detach/remove the device.
*/
vd->vdev_cant_read = B_TRUE;
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting);
required = !vdev_dtl_empty(tvd, DTL_OUTAGE);
vd->vdev_cant_read = cant_read;
vdev_dtl_reassess(tvd, 0, 0, B_FALSE, B_FALSE);
vdev_dtl_reassess_impl(tvd, 0, 0, B_FALSE, B_FALSE, faulting);

if (!required && zio_injection_enabled) {
required = !!zio_handle_device_injection(vd, NULL,
Expand Down
3 changes: 2 additions & 1 deletion tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_online_002_pos',
'auto_replace_001_pos', 'auto_replace_002_pos', 'auto_spare_001_pos',
'auto_spare_002_pos', 'auto_spare_multiple', 'auto_spare_ashift',
'auto_spare_shared', 'decrypt_fault', 'decompress_fault',
'scrub_after_resilver', 'suspend_resume_single', 'zpool_status_-s']
'fault_limits', 'scrub_after_resilver', 'suspend_resume_single',
'zpool_status_-s']
tags = ['functional', 'fault']

[tests/functional/features/large_dnode:Linux]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1525,6 +1525,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/fault/cleanup.ksh \
functional/fault/decompress_fault.ksh \
functional/fault/decrypt_fault.ksh \
functional/fault/fault_limits.ksh \
functional/fault/scrub_after_resilver.ksh \
functional/fault/suspend_resume_single.ksh \
functional/fault/setup.ksh \
Expand Down
96 changes: 96 additions & 0 deletions tests/zfs-tests/tests/functional/fault/fault_limits.ksh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright (c) 2024 by Klara, Inc.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/fault/fault.cfg

#
# DESCRIPTION: Verify that raidz children vdev fault count is restricted
#
# STRATEGY:
# 1. Create a raidz2 or raidz3 pool and add some data to it
# 2. Replace one of the child vdevs to create a replacing vdev
# 3. While it is resilvering, attempt to fault disks
# 4. Verify that less than parity count was faulted while replacing
#

TESTPOOL="fault-test-pool"
PARITY=$((RANDOM%(2) + 2))
VDEV_CNT=$((4 + (2 * PARITY)))
VDEV_SIZ=512M

function cleanup
{
poolexists "$TESTPOOL" && log_must_busy zpool destroy "$TESTPOOL"

for i in {0..$((VDEV_CNT - 1))}; do
log_must rm -f "$TEST_BASE_DIR/dev-$i"
done
}

log_onexit cleanup
log_assert "restricts raidz children vdev fault count"

log_note "creating $VDEV_CNT vdevs for parity $PARITY test"
typeset -a disks
for i in {0..$((VDEV_CNT - 1))}; do
device=$TEST_BASE_DIR/dev-$i
log_must truncate -s $VDEV_SIZ $device
disks[${#disks[*]}+1]=$device
done

log_must zpool create -f ${TESTPOOL} raidz${PARITY} ${disks[1..$((VDEV_CNT - 1))]}

# Add some data to the pool
log_must zfs create $TESTPOOL/fs
MNTPOINT="$(get_prop mountpoint $TESTPOOL/fs)"
log_must fill_fs $MNTPOINT $PARITY 200 32768 1000 Z
sync_pool $TESTPOOL

# Replace the last child vdev to form a replacing vdev
log_must zpool replace ${TESTPOOL} ${disks[$((VDEV_CNT - 1))]} ${disks[$VDEV_CNT]}
# imediately offline replacement disk to keep replacing vdev around
log_must zpool offline ${TESTPOOL} ${disks[$VDEV_CNT]}

# Fault disks while a replacing vdev is still active
for disk in ${disks[0..$PARITY]}; do
log_must zpool offline -tf ${TESTPOOL} $disk
done

zpool status $TESTPOOL

# Count the faults that succeeded
faults=0
for disk in ${disks[0..$PARITY]}; do
state=$(zpool get -H -o value state ${TESTPOOL} ${disk})
if [ "$state" = "FAULTED" ] ; then
((faults=faults+1))
fi
done

log_must test "$faults" -lt "$PARITY"
log_must test "$faults" -gt 0

log_pass "restricts raidz children vdev fault count"
Loading