From 6bf4b3e8dfe035a8d97196f21befa93367d87f06 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 24 Jan 2023 14:05:45 -0800 Subject: [PATCH] Increase default zfs_scan_vdev_limit to 16MB For HDD based pools the default zfs_scan_vdev_limit of 4M per-vdev can significantly limit the maximum scrub performance. Increasing the default to 16M can double the scrub speed from 80 MB/s per disk to 160 MB/s per disk. This does increase the memory footprint during scrub/resilver but given the performance win this is a reasonable trade off. Memory usage is capped at 1/4 of arc_c_max. Note that number of outstanding I/Os has not changed and is still limited by zfs_vdev_scrub_max_active. Reviewed-by: Akash B Reviewed-by: Tony Nguyen Reviewed-by: Alexander Motin Signed-off-by: Brian Behlendorf Closes #14428 --- man/man4/zfs.4 | 2 +- module/zfs/dsl_scan.c | 28 ++++++++++++++++------------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index d58523f831df..c61753ae1508 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1846,7 +1846,7 @@ When disabled, the memory limit may be exceeded by fast disks. Freezes a scrub/resilver in progress without actually pausing it. Intended for testing/debugging. . -.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int +.It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int Maximum amount of data that can be concurrently issued at once for scrubs and resilvers per leaf device, given in bytes. . diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 49ea1d47cf71..f0a851ff53a9 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -126,7 +127,7 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj, static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); -static uint64_t dsl_scan_count_data_disks(vdev_t *vd); +static uint64_t dsl_scan_count_data_disks(spa_t *spa); extern int zfs_vdev_async_write_active_min_dirty_percent; static int zfs_scan_blkstats = 0; @@ -156,7 +157,7 @@ int zfs_scan_strict_mem_lim = B_FALSE; * overload the drives with I/O, since that is protected by * zfs_vdev_scrub_max_active. */ -unsigned long zfs_scan_vdev_limit = 4 << 20; +unsigned long zfs_scan_vdev_limit = 16 << 20; int zfs_scan_issue_strategy = 0; int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */ @@ -459,11 +460,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) /* * Calculate the max number of in-flight bytes for pool-wide - * scanning operations (minimum 1MB). Limits for the issuing - * phase are done per top-level vdev and are handled separately. + * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). + * Limits for the issuing phase are done per top-level vdev and + * are handled separately. */ - scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * - dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); + scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, + zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), offsetof(scan_ds_t, sds_node)); @@ -2809,8 +2811,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) } static uint64_t -dsl_scan_count_data_disks(vdev_t *rvd) +dsl_scan_count_data_disks(spa_t *spa) { + vdev_t *rvd = spa->spa_root_vdev; uint64_t i, leaves = 0; for (i = 0; i < rvd->vdev_children; i++) { @@ -3715,12 +3718,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) taskqid_t prefetch_tqid; /* - * Recalculate the max number of in-flight bytes for pool-wide - * scanning operations (minimum 1MB). Limits for the issuing - * phase are done per top-level vdev and are handled separately. + * Calculate the max number of in-flight bytes for pool-wide + * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). + * Limits for the issuing phase are done per top-level vdev and + * are handled separately. */ - scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * - dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); + scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, + zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); if (scnp->scn_ddt_bookmark.ddb_class <= scnp->scn_ddt_class_max) {