From f2dbafdf4f0966ed41ccf8ec12acce9462711712 Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Mon, 1 Aug 2022 18:51:45 +0200 Subject: [PATCH] Skip checksum benchmarks on systems with slow cpu The checksum benchmarking on module load may take a really long time on embedded systems with a slow cpu. Avoid all benchmarks >= 1MiB on systems, where EdonR is slower then 300 MiB/s. This limit is currently hardcoded via the define LIMIT_PERF_MBS. This is the new benchmark output of a slow Intel Atom: ``` implementation 1k 4k 16k 64k 256k 1m 4m 16m edonr-generic 209 257 268 259 262 0 0 0 skein-generic 129 150 151 150 150 0 0 0 sha256-generic 50 55 56 56 56 0 0 0 sha512-generic 76 86 88 89 88 0 0 0 blake3-generic 63 62 62 62 61 0 0 0 blake3-sse2 114 292 301 307 309 0 0 0 ``` Reviewed-by: Sebastian Gottschall Reviewed-by: Brian Behlendorf Signed-off-by: Tino Reichardt Closes #13695 --- module/zfs/zfs_chksum.c | 47 +++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/module/zfs/zfs_chksum.c b/module/zfs/zfs_chksum.c index f890b1103934..b9dc907afa8d 100644 --- a/module/zfs/zfs_chksum.c +++ b/module/zfs/zfs_chksum.c @@ -31,7 +31,8 @@ #include -static kstat_t *chksum_kstat = NULL; +/* limit benchmarking to max 256KiB, when EdonR is slower then this: */ +#define LIMIT_PERF_MBS 300 typedef struct { const char *name; @@ -50,8 +51,9 @@ typedef struct { zio_checksum_tmpl_free_t *(free); } chksum_stat_t; -static int chksum_stat_cnt = 0; static chksum_stat_t *chksum_stat_data = 0; +static int chksum_stat_cnt = 0; +static kstat_t *chksum_kstat = NULL; /* * i3-1005G1 test output: @@ -75,7 +77,7 @@ static chksum_stat_t *chksum_stat_data = 0; * blake3-avx512 473 2687 4905 5836 5844 5643 5374 */ static int -chksum_stat_kstat_headers(char *buf, size_t size) +chksum_kstat_headers(char *buf, size_t size) { ssize_t off = 0; @@ -93,7 +95,7 @@ chksum_stat_kstat_headers(char *buf, size_t size) } static int -chksum_stat_kstat_data(char *buf, size_t size, void *data) +chksum_kstat_data(char *buf, size_t size, void *data) { chksum_stat_t *cs; ssize_t off = 0; @@ -123,7 +125,7 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data) } static void * -chksum_stat_kstat_addr(kstat_t *ksp, loff_t n) +chksum_kstat_addr(kstat_t *ksp, loff_t n) { if (n < chksum_stat_cnt) ksp->ks_private = (void *)(chksum_stat_data + n); @@ -176,17 +178,21 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round, *result = run_bw/1024/1024; /* MiB/s */ } +#define LIMIT_INIT 0 +#define LIMIT_NEEDED 1 +#define LIMIT_NOLIMIT 2 + static void chksum_benchit(chksum_stat_t *cs) { abd_t *abd; void *ctx = 0; void *salt = &cs->salt.zcs_bytes; + static int chksum_stat_limit = LIMIT_INIT; memset(salt, 0, sizeof (cs->salt.zcs_bytes)); - if (cs->init) { + if (cs->init) ctx = cs->init(&cs->salt); - } /* allocate test memory via abd linear interface */ abd = abd_alloc_linear(1<<20, B_FALSE); @@ -195,6 +201,20 @@ chksum_benchit(chksum_stat_t *cs) chksum_run(cs, abd, ctx, 3, &cs->bs16k); chksum_run(cs, abd, ctx, 4, &cs->bs64k); chksum_run(cs, abd, ctx, 5, &cs->bs256k); + + /* check if we ran on a slow cpu */ + if (chksum_stat_limit == LIMIT_INIT) { + if (cs->bs1k < LIMIT_PERF_MBS) { + chksum_stat_limit = LIMIT_NEEDED; + } else { + chksum_stat_limit = LIMIT_NOLIMIT; + } + } + + /* skip benchmarks >= 1MiB when the CPU is to slow */ + if (chksum_stat_limit == LIMIT_NEEDED) + goto abort; + chksum_run(cs, abd, ctx, 6, &cs->bs1m); abd_free(abd); @@ -202,12 +222,13 @@ chksum_benchit(chksum_stat_t *cs) abd = abd_alloc(1<<24, B_FALSE); chksum_run(cs, abd, ctx, 7, &cs->bs4m); chksum_run(cs, abd, ctx, 8, &cs->bs16m); + +abort: abd_free(abd); /* free up temp memory */ - if (cs->free) { + if (cs->free) cs->free(ctx); - } } /* @@ -232,7 +253,7 @@ chksum_benchmark(void) chksum_stat_data = (chksum_stat_t *)kmem_zalloc( sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP); - /* edonr */ + /* edonr - needs to be the first one here (slow CPU check) */ cs = &chksum_stat_data[cbid++]; cs->init = abd_checksum_edonr_tmpl_init; cs->func = abd_checksum_edonr_native; @@ -303,9 +324,9 @@ chksum_init(void) chksum_kstat->ks_data = NULL; chksum_kstat->ks_ndata = UINT32_MAX; kstat_set_raw_ops(chksum_kstat, - chksum_stat_kstat_headers, - chksum_stat_kstat_data, - chksum_stat_kstat_addr); + chksum_kstat_headers, + chksum_kstat_data, + chksum_kstat_addr); kstat_install(chksum_kstat); }