Skip to content

Commit

Permalink
Race condition between spa async threads and export
Browse files Browse the repository at this point in the history
In the past we've seen multiple race conditions that have
to do with open-context threads async threads and concurrent
calls to spa_export()/spa_destroy() (including the one
referenced in issue openzfs#9015).

This patch ensures that only one thread can execute the
main body of spa_export_common() at a time, with subsequent
threads returning with a new error code created just for
this situation, eliminating this way any race condition
bugs introduced by concurrent calls to this function.

Reviewed by: Matt Ahrens <matt@delphix.com>
Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Serapheim Dimitropoulos <serapheim@delphix.com>
Closes openzfs#9015 
Closes openzfs#9044
  • Loading branch information
sdimitro authored and TulsiJain committed Jul 20, 2019
1 parent 2ba964e commit 4101708
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 2 deletions.
18 changes: 17 additions & 1 deletion cmd/ztest/ztest.c
Original file line number Diff line number Diff line change
Expand Up @@ -2719,8 +2719,24 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
VERIFY3U(EEXIST, ==,
spa_create(zo->zo_pool, nvroot, NULL, NULL, NULL));
nvlist_free(nvroot);

/*
* We open a reference to the spa and then we try to export it
* expecting one of the following errors:
*
* EBUSY
* Because of the reference we just opened.
*
* ZFS_ERR_EXPORT_IN_PROGRESS
* For the case that there is another ztest thread doing
* an export concurrently.
*/
VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
int error = spa_destroy(zo->zo_pool);
if (error != EBUSY && error != ZFS_ERR_EXPORT_IN_PROGRESS) {
fatal(0, "spa_destroy(%s) returned unexpected value %d",
spa->spa_name, error);
}
spa_close(spa, FTAG);

(void) pthread_rwlock_unlock(&ztest_name_lock);
Expand Down
1 change: 1 addition & 0 deletions include/libzfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ typedef enum zfs_error {
EZFS_NO_TRIM, /* no active trim */
EZFS_TRIM_NOTSUP, /* device does not support trim */
EZFS_NO_RESILVER_DEFER, /* pool doesn't support resilver_defer */
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
EZFS_UNKNOWN
} zfs_error_t;

Expand Down
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1324,6 +1324,7 @@ typedef enum {
ZFS_ERR_FROM_IVSET_GUID_MISMATCH,
ZFS_ERR_SPILL_BLOCK_FLAG_MISSING,
ZFS_ERR_UNKNOWN_SEND_STREAM_FEATURE,
ZFS_ERR_EXPORT_IN_PROGRESS,
} zfs_errno_t;

/*
Expand Down
1 change: 1 addition & 0 deletions include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ struct spa {
spa_taskqs_t spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
boolean_t spa_is_initializing; /* true while opening pool */
boolean_t spa_is_exporting; /* true while exporting pool */
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
metaslab_class_t *spa_special_class; /* special allocation class */
Expand Down
5 changes: 5 additions & 0 deletions lib/libzfs/libzfs_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_NO_RESILVER_DEFER:
return (dgettext(TEXT_DOMAIN, "this action requires the "
"resilver_defer feature"));
case EZFS_EXPORT_IN_PROGRESS:
return (dgettext(TEXT_DOMAIN, "pool export in progress"));
case EZFS_UNKNOWN:
return (dgettext(TEXT_DOMAIN, "unknown error"));
default:
Expand Down Expand Up @@ -599,6 +601,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
case ZFS_ERR_VDEV_TOO_BIG:
zfs_verror(hdl, EZFS_VDEV_TOO_BIG, fmt, ap);
break;
case ZFS_ERR_EXPORT_IN_PROGRESS:
zfs_verror(hdl, EZFS_EXPORT_IN_PROGRESS, fmt, ap);
break;
case ZFS_ERR_IOC_CMD_UNAVAIL:
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "the loaded zfs "
"module does not support this operation. A reboot may "
Expand Down
18 changes: 17 additions & 1 deletion module/zfs/spa.c
Original file line number Diff line number Diff line change
Expand Up @@ -5790,6 +5790,13 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
return (SET_ERROR(ENOENT));
}

if (spa->spa_is_exporting) {
/* the pool is being exported by another thread */
mutex_exit(&spa_namespace_lock);
return (SET_ERROR(ZFS_ERR_EXPORT_IN_PROGRESS));
}
spa->spa_is_exporting = B_TRUE;

/*
* Put a hold on the pool, drop the namespace lock, stop async tasks,
* reacquire the namespace lock, and see if we can export.
Expand Down Expand Up @@ -5825,6 +5832,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
(spa->spa_inject_ref != 0 &&
new_state != POOL_STATE_UNINITIALIZED)) {
spa_async_resume(spa);
spa->spa_is_exporting = B_FALSE;
mutex_exit(&spa_namespace_lock);
return (SET_ERROR(EBUSY));
}
Expand All @@ -5839,6 +5847,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
if (!force && new_state == POOL_STATE_EXPORTED &&
spa_has_active_shared_spare(spa)) {
spa_async_resume(spa);
spa->spa_is_exporting = B_FALSE;
mutex_exit(&spa_namespace_lock);
return (SET_ERROR(EXDEV));
}
Expand Down Expand Up @@ -5890,9 +5899,16 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
if (!hardforce)
spa_write_cachefile(spa, B_TRUE, B_TRUE);
spa_remove(spa);
} else {
/*
* If spa_remove() is not called for this spa_t and
* there is any possibility that it can be reused,
* we make sure to reset the exporting flag.
*/
spa->spa_is_exporting = B_FALSE;
}
mutex_exit(&spa_namespace_lock);

mutex_exit(&spa_namespace_lock);
return (0);
}

Expand Down

0 comments on commit 4101708

Please sign in to comment.