Skip to content

Commit

Permalink
Fix scn_queue races on very old pools
Browse files Browse the repository at this point in the history
Code for pools before version 11 uses dmu_objset_find_dp() to scan
for children datasets/clones.  It calls enqueue_clones_cb() and
enqueue_cb() callbacks in parallel from multiple taskq threads.
It ends up bad for scan_ds_queue_insert(), corrupting scn_queue
AVL-tree.  Fix it by introducing a mutex to protect those two
scan_ds_queue_insert() calls.  All other calls are done from the
sync thread and so serialized.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Brian Atkinson <batkinson@lanl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored by:	iXsystems, Inc.
Closes openzfs#16162
  • Loading branch information
amotin authored and ixhamza committed May 23, 2024
1 parent 9efebfb commit 22bf2c7
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/sys/dsl_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ typedef struct dsl_scan {
dsl_scan_phys_t scn_phys; /* on disk representation of scan */
dsl_scan_phys_t scn_phys_cached;
avl_tree_t scn_queue; /* queue of datasets to scan */
kmutex_t scn_queue_lock; /* serializes scn_queue inserts */
uint64_t scn_queues_pending; /* outstanding data to issue */
/* members needed for syncing error scrub status to disk */
dsl_errorscrub_phys_t errorscrub_phys;
Expand Down
6 changes: 6 additions & 0 deletions module/zfs/dsl_scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,7 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)

avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
offsetof(scan_ds_t, sds_node));
mutex_init(&scn->scn_queue_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&scn->scn_prefetch_queue, scan_prefetch_queue_compare,
sizeof (scan_prefetch_issue_ctx_t),
offsetof(scan_prefetch_issue_ctx_t, spic_avl_node));
Expand Down Expand Up @@ -646,6 +647,7 @@ dsl_scan_fini(dsl_pool_t *dp)

scan_ds_queue_clear(scn);
avl_destroy(&scn->scn_queue);
mutex_destroy(&scn->scn_queue_lock);
scan_ds_prefetch_queue_clear(scn);
avl_destroy(&scn->scn_prefetch_queue);

Expand Down Expand Up @@ -2727,8 +2729,10 @@ enqueue_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
return (err);
ds = prev;
}
mutex_enter(&scn->scn_queue_lock);
scan_ds_queue_insert(scn, ds->ds_object,
dsl_dataset_phys(ds)->ds_prev_snap_txg);
mutex_exit(&scn->scn_queue_lock);
dsl_dataset_rele(ds, FTAG);
return (0);
}
Expand Down Expand Up @@ -2919,8 +2923,10 @@ enqueue_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
ds = prev;
}

mutex_enter(&scn->scn_queue_lock);
scan_ds_queue_insert(scn, ds->ds_object,
dsl_dataset_phys(ds)->ds_prev_snap_txg);
mutex_exit(&scn->scn_queue_lock);
dsl_dataset_rele(ds, FTAG);
return (0);
}
Expand Down

0 comments on commit 22bf2c7

Please sign in to comment.