Skip to content

Commit

Permalink
Fix ztest deadlock in spa_vdev_remove()
Browse files Browse the repository at this point in the history
This patch corrects an issue where spa_vdev_remove() would
call spa_history_log_internal() while holding the spa config
lock. This function may decide to block until the next txg if
the current one seems too full. However, since the thread is
holding the config log, the txg sync thread cannot progress
and the system ends up deadlocked. This patch simply moves
all calls to spa_history_log_internal() outside of the config
lock.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #8162
  • Loading branch information
Tom Caputi authored and behlendorf committed Dec 4, 2018
1 parent 0b606cb commit fedef6d
Showing 1 changed file with 19 additions and 12 deletions.
31 changes: 19 additions & 12 deletions module/zfs/vdev_removal.c
Original file line number Diff line number Diff line change
Expand Up @@ -1894,10 +1894,6 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
vdev_dirty_leaves(vd, VDD_DTL, *txg);
vdev_config_dirty(vd);

spa_history_log_internal(spa, "vdev remove", NULL,
"%s vdev %llu (log) %s", spa_name(spa), vd->vdev_id,
(vd->vdev_path != NULL) ? vd->vdev_path : "-");

spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG);

*txg = spa_vdev_config_enter(spa);
Expand Down Expand Up @@ -2122,6 +2118,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
int error = 0;
boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
sysevent_t *ev = NULL;
char *vd_type = NULL, *vd_path = NULL;

ASSERT(spa_writeable(spa));

Expand Down Expand Up @@ -2155,11 +2152,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
ev = spa_event_create(spa, vd, NULL,
ESC_ZFS_VDEV_REMOVE_AUX);

char *nvstr = fnvlist_lookup_string(nv,
ZPOOL_CONFIG_PATH);
spa_history_log_internal(spa, "vdev remove", NULL,
"%s vdev (%s) %s", spa_name(spa),
VDEV_TYPE_SPARE, nvstr);
vd_type = VDEV_TYPE_SPARE;
vd_path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH);
spa_vdev_remove_aux(spa->spa_spares.sav_config,
ZPOOL_CONFIG_SPARES, spares, nspares, nv);
spa_load_spares(spa);
Expand All @@ -2171,9 +2165,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 &&
(nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) {
char *nvstr = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH);
spa_history_log_internal(spa, "vdev remove", NULL,
"%s vdev (%s) %s", spa_name(spa), VDEV_TYPE_L2CACHE, nvstr);
vd_type = VDEV_TYPE_L2CACHE;
vd_path = fnvlist_lookup_string(nv, ZPOOL_CONFIG_PATH);
/*
* Cache devices can always be removed.
*/
Expand All @@ -2185,6 +2178,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
spa->spa_l2cache.sav_sync = B_TRUE;
} else if (vd != NULL && vd->vdev_islog) {
ASSERT(!locked);
vd_type = "log";
vd_path = (vd->vdev_path != NULL) ? vd->vdev_path : "-";
error = spa_vdev_remove_log(vd, &txg);
} else if (vd != NULL) {
ASSERT(!locked);
Expand All @@ -2199,6 +2194,18 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
if (!locked)
error = spa_vdev_exit(spa, NULL, txg, error);

/*
* Logging must be done outside the spa config lock. Otherwise,
* this code path could end up holding the spa config lock while
* waiting for a txg_sync so it can write to the internal log.
* Doing that would prevent the txg sync from actually happening,
* causing a deadlock.
*/
if (error == 0 && vd_type != NULL && vd_path != NULL) {
spa_history_log_internal(spa, "vdev remove", NULL,
"%s vdev (%s) %s", spa_name(spa), vd_type, vd_path);
}

if (ev != NULL)
spa_event_post(ev);

Expand Down

0 comments on commit fedef6d

Please sign in to comment.