From 48431b7da604b11c9ab6178a25c2e8da0833933e Mon Sep 17 00:00:00 2001 From: smh Date: Thu, 16 Oct 2014 02:23:27 +0000 Subject: [PATCH] Prevent ZFS leaking pool free space When processing async destroys ZFS would leak space every txg timeout (5 seconds by default), if no writes occurred, until the pool is totally full. At this point it would be unfixable without a pool recreation. In addition if the machine was rebooted with the pool in this situation would fail to import on boot, hanging indefinitely, as the import process requires the ability to write data to the pool. Any attempts to query the pool status during the hung import would not return as the import holds the pool lock. The only way to import such a pool would be to specify -o readonly=on to the zpool import. zdb -bb can be used to check for "deferred free" size which is where this lost space will be counted. MFC after: 3 days Sponsored by: Multiplay --- .../opensolaris/uts/common/fs/zfs/dsl_scan.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c index bcf04c1ea9b241..4a27c641ad8b54 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c @@ -1459,13 +1459,6 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) "traverse_dataset_destroyed()", err); } - /* - * If we didn't make progress, mark the async destroy as - * stalled, so that we will not initiate a spa_sync() on - * its behalf. - */ - scn->scn_async_stalled = (scn->scn_visited_this_txg == 0); - if (bptree_is_empty(dp->dp_meta_objset, dp->dp_bptree_obj)) { /* finished; deactivate async destroy feature */ spa_feature_decr(spa, SPA_FEATURE_ASYNC_DESTROY, tx); @@ -1478,6 +1471,14 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) dp->dp_bptree_obj, tx)); dp->dp_bptree_obj = 0; scn->scn_async_destroying = B_FALSE; + } else { + /* + * If we didn't make progress, mark the async destroy as + * stalled, so that we will not initiate a spa_sync() on + * its behalf. + */ + scn->scn_async_stalled = + (scn->scn_visited_this_txg == 0); } } if (scn->scn_visited_this_txg) {