From 6aa0e32cc9810ab50e974979e904ec96b22dc4e5 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Wed, 2 Jun 2021 16:06:35 -0400 Subject: [PATCH] Optimize small random numbers generation. In all places except two spa_get_random() is used for small values, and the consumers do not require well seeded high quality values. Switch those two exceptions directly to random_get_pseudo_bytes() and optimize spa_get_random(), renaming it to random_in_range(), since it is not related to SPA or ZFS in general. On FreeBSD directly map random_in_range() to new prng32_bounded() KPI added in FreeBSD 13. On Linux and in user-space just reduce the type used to uint32_t to avoid more expensive 64bit division. Signed-off-by: Alexander Motin Sponsored-By: iXsystems, Inc. --- include/os/freebsd/spl/sys/random.h | 22 +++++++++++++++++++++ include/os/linux/spl/sys/random.h | 15 +++++++++++++++ include/sys/spa.h | 1 - include/sys/zfs_context.h | 15 +++++++++++++++ module/os/linux/zfs/arc_os.c | 4 ++-- module/zfs/metaslab.c | 2 +- module/zfs/mmp.c | 4 ++-- module/zfs/multilist.c | 5 +---- module/zfs/spa.c | 6 +++--- module/zfs/spa_misc.c | 30 +++++++++-------------------- module/zfs/space_map.c | 2 +- module/zfs/vdev_indirect.c | 4 ++-- module/zfs/vdev_mirror.c | 2 +- module/zfs/zil.c | 6 ++++-- module/zfs/zio_compress.c | 2 +- module/zfs/zio_inject.c | 8 ++++---- 16 files changed, 83 insertions(+), 45 deletions(-) diff --git a/include/os/freebsd/spl/sys/random.h b/include/os/freebsd/spl/sys/random.h index b3c9115f5305..746275e53197 100644 --- a/include/os/freebsd/spl/sys/random.h +++ b/include/os/freebsd/spl/sys/random.h @@ -30,6 +30,9 @@ #define _OPENSOLARIS_SYS_RANDOM_H_ #include_next +#if __FreeBSD_version >= 1300108 +#include +#endif static inline int random_get_bytes(uint8_t *p, size_t s) @@ -45,4 +48,23 @@ random_get_pseudo_bytes(uint8_t *p, size_t s) return (0); } +static inline uint32_t +random_in_range(uint32_t range) +{ +#if __FreeBSD_version >= 1300108 + return (prng32_bounded(range)); +#else + uint32_t r; + + ASSERT(range != 0); + + if (range == 1) + return (0); + + (void) random_get_pseudo_bytes((void *)&r, sizeof (r)); + + return (r % range); +#endif +} + #endif /* !_OPENSOLARIS_SYS_RANDOM_H_ */ diff --git a/include/os/linux/spl/sys/random.h b/include/os/linux/spl/sys/random.h index 1b8cb60d094f..2c446e155761 100644 --- a/include/os/linux/spl/sys/random.h +++ b/include/os/linux/spl/sys/random.h @@ -36,4 +36,19 @@ random_get_bytes(uint8_t *ptr, size_t len) extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len); +static __inline__ uint32_t +random_in_range(uint32_t range) +{ + uint32_t r; + + ASSERT(range != 0); + + if (range == 1) + return (0); + + (void) random_get_pseudo_bytes((void *)&r, sizeof (r)); + + return (r % range); +} + #endif /* _SPL_RANDOM_H */ diff --git a/include/sys/spa.h b/include/sys/spa.h index d37c6c923d8c..08eba250d3a3 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -1065,7 +1065,6 @@ extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid); extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid); extern char *spa_strdup(const char *); extern void spa_strfree(char *); -extern uint64_t spa_get_random(uint64_t range); extern uint64_t spa_generate_guid(spa_t *spa); extern void snprintf_blkptr(char *buf, size_t buflen, const blkptr_t *bp); extern void spa_freeze(spa_t *spa); diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index aa4338ed2859..d20efa02af32 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -638,6 +638,21 @@ extern int lowbit64(uint64_t i); extern int random_get_bytes(uint8_t *ptr, size_t len); extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len); +static __inline__ uint32_t +random_in_range(uint32_t range) +{ + uint32_t r; + + ASSERT(range != 0); + + if (range == 1) + return (0); + + (void) random_get_pseudo_bytes((void *)&r, sizeof (r)); + + return (r % range); +} + extern void kernel_init(int mode); extern void kernel_fini(void); extern void random_init(void); diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c index 465775a6748e..8deec57be5eb 100644 --- a/module/os/linux/zfs/arc_os.c +++ b/module/os/linux/zfs/arc_os.c @@ -437,7 +437,7 @@ arc_available_memory(void) int64_t lowest = INT64_MAX; /* Every 100 calls, free a small amount */ - if (spa_get_random(100) == 0) + if (random_in_range(100) == 0) lowest = -1024; return (lowest); @@ -458,7 +458,7 @@ arc_all_memory(void) uint64_t arc_free_memory(void) { - return (spa_get_random(arc_all_memory() * 20 / 100)); + return (random_in_range(arc_all_memory() * 20 / 100)); } void diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c index e588765b3382..56bd7331c0f9 100644 --- a/module/zfs/metaslab.c +++ b/module/zfs/metaslab.c @@ -5070,7 +5070,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize, * damage can result in extremely long reconstruction times. This * will also test spilling from special to normal. */ - if (psize >= metaslab_force_ganging && (spa_get_random(100) < 3)) { + if (psize >= metaslab_force_ganging && (random_in_range(100) < 3)) { metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG, allocator); return (SET_ERROR(ENOSPC)); diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c index d05c9db24c20..ba687c93617d 100644 --- a/module/zfs/mmp.c +++ b/module/zfs/mmp.c @@ -523,9 +523,9 @@ mmp_write_uberblock(spa_t *spa) mutex_exit(&mmp->mmp_io_lock); offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) - - MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL)); + MMP_BLOCKS_PER_LABEL + random_in_range(MMP_BLOCKS_PER_LABEL)); - label = spa_get_random(VDEV_LABELS); + label = random_in_range(VDEV_LABELS); vdev_label_write(zio, vd, label, ub_abd, offset, VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp, flags | ZIO_FLAG_DONT_PROPAGATE); diff --git a/module/zfs/multilist.c b/module/zfs/multilist.c index eeac73bd7adf..8bbc9b376ae0 100644 --- a/module/zfs/multilist.c +++ b/module/zfs/multilist.c @@ -20,9 +20,6 @@ #include #include -/* needed for spa_get_random() */ -#include - /* * This overrides the number of sublists in each multilist_t, which defaults * to the number of CPUs in the system (see multilist_create()). @@ -275,7 +272,7 @@ multilist_get_num_sublists(multilist_t *ml) unsigned int multilist_get_random_index(multilist_t *ml) { - return (spa_get_random(ml->ml_num_sublists)); + return (random_in_range(ml->ml_num_sublists)); } /* Lock and return the sublist specified at the given index */ diff --git a/module/zfs/spa.c b/module/zfs/spa.c index 26995575adaa..47fd75c0d83a 100644 --- a/module/zfs/spa.c +++ b/module/zfs/spa.c @@ -3175,7 +3175,7 @@ spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config) import_delay = spa_activity_check_duration(spa, ub); /* Add a small random factor in case of simultaneous imports (0-25%) */ - import_delay += import_delay * spa_get_random(250) / 1000; + import_delay += import_delay * random_in_range(250) / 1000; import_expire = gethrtime() + import_delay; @@ -4619,7 +4619,7 @@ spa_ld_checkpoint_rewind(spa_t *spa) vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; int svdcount = 0; int children = rvd->vdev_children; - int c0 = spa_get_random(children); + int c0 = random_in_range(children); for (int c = 0; c < children; c++) { vdev_t *vd = rvd->vdev_child[(c0 + c) % children]; @@ -9111,7 +9111,7 @@ spa_sync_rewrite_vdev_config(spa_t *spa, dmu_tx_t *tx) vdev_t *svd[SPA_SYNC_MIN_VDEVS] = { NULL }; int svdcount = 0; int children = rvd->vdev_children; - int c0 = spa_get_random(children); + int c0 = random_in_range(children); for (int c = 0; c < children; c++) { vdev_t *vd = diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c index 1a2e5abc5335..77d891177cbf 100644 --- a/module/zfs/spa_misc.c +++ b/module/zfs/spa_misc.c @@ -1494,32 +1494,21 @@ spa_strfree(char *s) kmem_free(s, strlen(s) + 1); } -uint64_t -spa_get_random(uint64_t range) -{ - uint64_t r; - - ASSERT(range != 0); - - if (range == 1) - return (0); - - (void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t)); - - return (r % range); -} - uint64_t spa_generate_guid(spa_t *spa) { - uint64_t guid = spa_get_random(-1ULL); + uint64_t guid; if (spa != NULL) { - while (guid == 0 || spa_guid_exists(spa_guid(spa), guid)) - guid = spa_get_random(-1ULL); + do { + (void) random_get_pseudo_bytes((void *)&guid, + sizeof (guid)); + } while (guid == 0 || spa_guid_exists(spa_guid(spa), guid)); } else { - while (guid == 0 || spa_guid_exists(guid, 0)) - guid = spa_get_random(-1ULL); + do { + (void) random_get_pseudo_bytes((void *)&guid, + sizeof (guid)); + } while (guid == 0 || spa_guid_exists(guid, 0)); } return (guid); @@ -2888,7 +2877,6 @@ EXPORT_SYMBOL(spa_maxdnodesize); EXPORT_SYMBOL(spa_guid_exists); EXPORT_SYMBOL(spa_strdup); EXPORT_SYMBOL(spa_strfree); -EXPORT_SYMBOL(spa_get_random); EXPORT_SYMBOL(spa_generate_guid); EXPORT_SYMBOL(snprintf_blkptr); EXPORT_SYMBOL(spa_freeze); diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c index 3db7d199199c..28feb74b7e88 100644 --- a/module/zfs/space_map.c +++ b/module/zfs/space_map.c @@ -726,7 +726,7 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype, length > SM_RUN_MAX || vdev_id != SM_NO_VDEVID || (zfs_force_some_double_word_sm_entries && - spa_get_random(100) == 0))) + random_in_range(100) == 0))) words = 2; space_map_write_seg(sm, rs_get_start(rs, rt), rs_get_end(rs, diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index e539e9aa2d70..4a3346f3d841 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1572,7 +1572,7 @@ vdev_indirect_splits_enumerate_randomly(indirect_vsd_t *iv, zio_t *zio) indirect_child_t *ic = list_head(&is->is_unique_child); int children = is->is_unique_children; - for (int i = spa_get_random(children); i > 0; i--) + for (int i = random_in_range(children); i > 0; i--) ic = list_next(&is->is_unique_child, ic); ASSERT3P(ic, !=, NULL); @@ -1736,7 +1736,7 @@ vdev_indirect_reconstruct_io_done(zio_t *zio) * Known_good will be TRUE when reconstruction is known to be possible. */ if (zfs_reconstruct_indirect_damage_fraction != 0 && - spa_get_random(zfs_reconstruct_indirect_damage_fraction) == 0) + random_in_range(zfs_reconstruct_indirect_damage_fraction) == 0) known_good = (vdev_indirect_splits_damage(iv, zio) == 0); /* diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c index 106678a8708e..5eb331046953 100644 --- a/module/zfs/vdev_mirror.c +++ b/module/zfs/vdev_mirror.c @@ -496,7 +496,7 @@ vdev_mirror_preferred_child_randomize(zio_t *zio) int p; if (mm->mm_root) { - p = spa_get_random(mm->mm_preferred_cnt); + p = random_in_range(mm->mm_preferred_cnt); return (vdev_mirror_dva_select(zio, p)); } diff --git a/module/zfs/zil.c b/module/zfs/zil.c index d9c3042084e3..5443c2a1f3f1 100644 --- a/module/zfs/zil.c +++ b/module/zfs/zil.c @@ -205,8 +205,10 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp) { zio_cksum_t *zc = &bp->blk_cksum; - zc->zc_word[ZIL_ZC_GUID_0] = spa_get_random(-1ULL); - zc->zc_word[ZIL_ZC_GUID_1] = spa_get_random(-1ULL); + (void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_0], + sizeof (zc->zc_word[ZIL_ZC_GUID_0])); + (void) random_get_pseudo_bytes((void *)&zc->zc_word[ZIL_ZC_GUID_1], + sizeof (zc->zc_word[ZIL_ZC_GUID_1])); zc->zc_word[ZIL_ZC_OBJSET] = dmu_objset_id(zilog->zl_os); zc->zc_word[ZIL_ZC_SEQ] = 1ULL; } diff --git a/module/zfs/zio_compress.c b/module/zfs/zio_compress.c index 2db3cec35d5d..33602bd471f3 100644 --- a/module/zfs/zio_compress.c +++ b/module/zfs/zio_compress.c @@ -201,7 +201,7 @@ zio_decompress_data(enum zio_compress c, abd_t *src, void *dst, * in non-ECC RAM), we handle this error (and test it). */ if (zio_decompress_fail_fraction != 0 && - spa_get_random(zio_decompress_fail_fraction) == 0) + random_in_range(zio_decompress_fail_fraction) == 0) ret = SET_ERROR(EINVAL); return (ret); diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index e56ea88682ff..feaf41dc65e3 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -117,7 +117,7 @@ freq_triggered(uint32_t frequency) */ uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX; - return (spa_get_random(maximum) < frequency); + return (random_in_range(maximum) < frequency); } /* @@ -347,12 +347,12 @@ zio_inject_bitflip_cb(void *data, size_t len, void *private) { zio_t *zio __maybe_unused = private; uint8_t *buffer = data; - uint_t byte = spa_get_random(len); + uint_t byte = random_in_range(len); ASSERT(zio->io_type == ZIO_TYPE_READ); /* flip a single random bit in an abd data buffer */ - buffer[byte] ^= 1 << spa_get_random(8); + buffer[byte] ^= 1 << random_in_range(8); return (1); /* stop after first flip */ } @@ -493,7 +493,7 @@ zio_handle_ignored_writes(zio_t *zio) } /* Have a "problem" writing 60% of the time */ - if (spa_get_random(100) < 60) + if (random_in_range(100) < 60) zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES; break; }