Skip to content

Commit

Permalink
Fix sequential resilver drive failure race condition
Browse files Browse the repository at this point in the history
This patch handles the race condition on simultaneous failure of
2 drives, which misses the vdev_rebuild_reset_wanted signal in
vdev_rebuild_thread. We retry to catch this inside the
vdev_rebuild_complete_sync function.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Richard Yao <richard.yao@alumni.stonybrook.edu>
Reviewed-by: Dipak Ghosh <dipak.ghosh@hpe.com>
Reviewed-by: Akash B <akash-b@hpe.com>
Signed-off-by: Samuel Wycliffe J <samwyc@hpe.com>
Closes openzfs#14041
Closes openzfs#14050
  • Loading branch information
samwyc authored and andrewc12 committed Nov 9, 2022
1 parent aeeeaa0 commit d77435d
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion module/zfs/vdev_rebuild.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*
* Copyright (c) 2018, Intel Corporation.
* Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
* Copyright (c) 2022 Hewlett Packard Enterprise Development LP.
*/

#include <sys/vdev_impl.h>
Expand Down Expand Up @@ -134,6 +135,7 @@ static int zfs_rebuild_scrub_enabled = 1;
* For vdev_rebuild_initiate_sync() and vdev_rebuild_reset_sync().
*/
static __attribute__((noreturn)) void vdev_rebuild_thread(void *arg);
static void vdev_rebuild_reset_sync(void *arg, dmu_tx_t *tx);

/*
* Clear the per-vdev rebuild bytes value for a vdev tree.
Expand Down Expand Up @@ -307,6 +309,17 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx)
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;

mutex_enter(&vd->vdev_rebuild_lock);

/*
* Handle a second device failure if it occurs after all rebuild I/O
* has completed but before this sync task has been executed.
*/
if (vd->vdev_rebuild_reset_wanted) {
mutex_exit(&vd->vdev_rebuild_lock);
vdev_rebuild_reset_sync(arg, tx);
return;
}

vrp->vrp_rebuild_state = VDEV_REBUILD_COMPLETE;
vrp->vrp_end_time = gethrestime_sec();

Expand Down Expand Up @@ -760,7 +773,6 @@ vdev_rebuild_thread(void *arg)
ASSERT(vd->vdev_rebuilding);
ASSERT(spa_feature_is_active(spa, SPA_FEATURE_DEVICE_REBUILD));
ASSERT3B(vd->vdev_rebuild_cancel_wanted, ==, B_FALSE);
ASSERT3B(vd->vdev_rebuild_reset_wanted, ==, B_FALSE);

vdev_rebuild_t *vr = &vd->vdev_rebuild_config;
vdev_rebuild_phys_t *vrp = &vr->vr_rebuild_phys;
Expand Down

0 comments on commit d77435d

Please sign in to comment.