From b122ea6ce7533c0de43ab8e61ae6fd7f8f12499a Mon Sep 17 00:00:00 2001 From: Tino Reichardt Date: Tue, 26 Jul 2022 10:24:53 +0200 Subject: [PATCH] Skip checksum benchmarks on systems with slow cpu The checksum benchmarking on module load may take a really long time on embedded systems with a slow cpu. Avoid all benchmarks >= 1MiB on systems, where EdonR is slower then 300 MiB/s. This limit is currently hardcoded, but could also be a tuneable. When we switch to a tuneable, what name should I use then? This is the new benchmark output of a slow Intel Atom: ``` implementation 1k 4k 16k 64k 256k edonr-generic 209 257 268 259 262 skein-generic 129 150 151 150 150 sha256-generic 50 55 56 56 56 sha512-generic 76 86 88 89 88 blake3-generic 63 62 62 62 61 blake3-sse2 114 292 301 307 309 ``` Signed-off-by: Tino Reichardt --- module/zfs/zfs_chksum.c | 68 +++++++++++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/module/zfs/zfs_chksum.c b/module/zfs/zfs_chksum.c index f890b1103934..cd20ec69802b 100644 --- a/module/zfs/zfs_chksum.c +++ b/module/zfs/zfs_chksum.c @@ -50,9 +50,17 @@ typedef struct { zio_checksum_tmpl_free_t *(free); } chksum_stat_t; +#define LIMIT_INIT 0 +#define LIMIT_NEEDED 1 +#define LIMIT_NOLIMIT -1 + +static int limit = LIMIT_INIT; static int chksum_stat_cnt = 0; static chksum_stat_t *chksum_stat_data = 0; +typedef int (*kstat_headers_f)(char *buf, size_t size); +kstat_headers_f chksum_kstat_headers; + /* * i3-1005G1 test output: * @@ -75,7 +83,7 @@ static chksum_stat_t *chksum_stat_data = 0; * blake3-avx512 473 2687 4905 5836 5844 5643 5374 */ static int -chksum_stat_kstat_headers(char *buf, size_t size) +chksum_kstat_headers_full(char *buf, size_t size) { ssize_t off = 0; @@ -93,7 +101,22 @@ chksum_stat_kstat_headers(char *buf, size_t size) } static int -chksum_stat_kstat_data(char *buf, size_t size, void *data) +chksum_kstat_headers_mini(char *buf, size_t size) +{ + ssize_t off = 0; + + off += snprintf(buf + off, size, "%-23s", "implementation"); + off += snprintf(buf + off, size - off, "%8s", "1k"); + off += snprintf(buf + off, size - off, "%8s", "4k"); + off += snprintf(buf + off, size - off, "%8s", "16k"); + off += snprintf(buf + off, size - off, "%8s", "64k"); + (void) snprintf(buf + off, size - off, "%8s", "256k\n"); + + return (0); +} + +static int +chksum_kstat_data(char *buf, size_t size, void *data) { chksum_stat_t *cs; ssize_t off = 0; @@ -110,6 +133,14 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data) (u_longlong_t)cs->bs16k); off += snprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs64k); + + /* no benchmark >= 1MiB will be done */ + if (cs->bs1m == 0) { + off += snprintf(buf + off, size - off, "%8llu\n", + (u_longlong_t)cs->bs256k); + return (0); + } + off += snprintf(buf + off, size - off, "%8llu", (u_longlong_t)cs->bs256k); off += snprintf(buf + off, size - off, "%8llu", @@ -123,7 +154,7 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data) } static void * -chksum_stat_kstat_addr(kstat_t *ksp, loff_t n) +chksum_kstat_addr(kstat_t *ksp, loff_t n) { if (n < chksum_stat_cnt) ksp->ks_private = (void *)(chksum_stat_data + n); @@ -176,6 +207,7 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round, *result = run_bw/1024/1024; /* MiB/s */ } + static void chksum_benchit(chksum_stat_t *cs) { @@ -184,9 +216,8 @@ chksum_benchit(chksum_stat_t *cs) void *salt = &cs->salt.zcs_bytes; memset(salt, 0, sizeof (cs->salt.zcs_bytes)); - if (cs->init) { + if (cs->init) ctx = cs->init(&cs->salt); - } /* allocate test memory via abd linear interface */ abd = abd_alloc_linear(1<<20, B_FALSE); @@ -195,6 +226,22 @@ chksum_benchit(chksum_stat_t *cs) chksum_run(cs, abd, ctx, 3, &cs->bs16k); chksum_run(cs, abd, ctx, 4, &cs->bs64k); chksum_run(cs, abd, ctx, 5, &cs->bs256k); + + /* check if we ran on a slow cpu */ + if (limit == LIMIT_INIT) { + if (cs->bs1k < 300) { + limit = LIMIT_NEEDED; + chksum_kstat_headers = chksum_kstat_headers_mini; + } else { + limit = LIMIT_NOLIMIT; + chksum_kstat_headers = chksum_kstat_headers_full; + } + } + + /* skip benchmarks >= 1MiB, when edonr is slower then 300 MiB */ + if (limit == LIMIT_NEEDED) + goto abort; + chksum_run(cs, abd, ctx, 6, &cs->bs1m); abd_free(abd); @@ -202,12 +249,13 @@ chksum_benchit(chksum_stat_t *cs) abd = abd_alloc(1<<24, B_FALSE); chksum_run(cs, abd, ctx, 7, &cs->bs4m); chksum_run(cs, abd, ctx, 8, &cs->bs16m); + +abort: abd_free(abd); /* free up temp memory */ - if (cs->free) { + if (cs->free) cs->free(ctx); - } } /* @@ -303,9 +351,9 @@ chksum_init(void) chksum_kstat->ks_data = NULL; chksum_kstat->ks_ndata = UINT32_MAX; kstat_set_raw_ops(chksum_kstat, - chksum_stat_kstat_headers, - chksum_stat_kstat_data, - chksum_stat_kstat_addr); + chksum_kstat_headers, + chksum_kstat_data, + chksum_kstat_addr); kstat_install(chksum_kstat); }