Skip to content

Commit

Permalink
Skip checksum benchmarks on systems with slow cpu
Browse files Browse the repository at this point in the history
The checksum benchmarking on module load may take a really long time
on embedded systems with a slow cpu. Avoid all benchmarks >= 1MiB on
systems, where EdonR is slower then 300 MiB/s.

This limit is currently hardcoded, but could also be a tuneable.
When we switch to a tuneable, what name should I use then?

This is the new benchmark output of a slow Intel Atom:

```
 implementation    1k    4k   16k   64k  256k
 edonr-generic    209   257   268   259   262
 skein-generic    129   150   151   150   150
 sha256-generic    50    55    56    56    56
 sha512-generic    76    86    88    89    88
 blake3-generic    63    62    62    62    61
 blake3-sse2      114   292   301   307   309
```

Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
  • Loading branch information
mcmilk committed Jul 26, 2022
1 parent 8792dd2 commit b122ea6
Showing 1 changed file with 58 additions and 10 deletions.
68 changes: 58 additions & 10 deletions module/zfs/zfs_chksum.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,17 @@ typedef struct {
zio_checksum_tmpl_free_t *(free);
} chksum_stat_t;

#define LIMIT_INIT 0
#define LIMIT_NEEDED 1
#define LIMIT_NOLIMIT -1

static int limit = LIMIT_INIT;
static int chksum_stat_cnt = 0;
static chksum_stat_t *chksum_stat_data = 0;

typedef int (*kstat_headers_f)(char *buf, size_t size);
kstat_headers_f chksum_kstat_headers;

/*
* i3-1005G1 test output:
*
Expand All @@ -75,7 +83,7 @@ static chksum_stat_t *chksum_stat_data = 0;
* blake3-avx512 473 2687 4905 5836 5844 5643 5374
*/
static int
chksum_stat_kstat_headers(char *buf, size_t size)
chksum_kstat_headers_full(char *buf, size_t size)
{
ssize_t off = 0;

Expand All @@ -93,7 +101,22 @@ chksum_stat_kstat_headers(char *buf, size_t size)
}

static int
chksum_stat_kstat_data(char *buf, size_t size, void *data)
chksum_kstat_headers_mini(char *buf, size_t size)
{
ssize_t off = 0;

off += snprintf(buf + off, size, "%-23s", "implementation");
off += snprintf(buf + off, size - off, "%8s", "1k");
off += snprintf(buf + off, size - off, "%8s", "4k");
off += snprintf(buf + off, size - off, "%8s", "16k");
off += snprintf(buf + off, size - off, "%8s", "64k");
(void) snprintf(buf + off, size - off, "%8s", "256k\n");

return (0);
}

static int
chksum_kstat_data(char *buf, size_t size, void *data)
{
chksum_stat_t *cs;
ssize_t off = 0;
Expand All @@ -110,6 +133,14 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data)
(u_longlong_t)cs->bs16k);
off += snprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs64k);

/* no benchmark >= 1MiB will be done */
if (cs->bs1m == 0) {
off += snprintf(buf + off, size - off, "%8llu\n",
(u_longlong_t)cs->bs256k);
return (0);
}

off += snprintf(buf + off, size - off, "%8llu",
(u_longlong_t)cs->bs256k);
off += snprintf(buf + off, size - off, "%8llu",
Expand All @@ -123,7 +154,7 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data)
}

static void *
chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
chksum_kstat_addr(kstat_t *ksp, loff_t n)
{
if (n < chksum_stat_cnt)
ksp->ks_private = (void *)(chksum_stat_data + n);
Expand Down Expand Up @@ -176,6 +207,7 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
*result = run_bw/1024/1024; /* MiB/s */
}


static void
chksum_benchit(chksum_stat_t *cs)
{
Expand All @@ -184,9 +216,8 @@ chksum_benchit(chksum_stat_t *cs)
void *salt = &cs->salt.zcs_bytes;

memset(salt, 0, sizeof (cs->salt.zcs_bytes));
if (cs->init) {
if (cs->init)
ctx = cs->init(&cs->salt);
}

/* allocate test memory via abd linear interface */
abd = abd_alloc_linear(1<<20, B_FALSE);
Expand All @@ -195,19 +226,36 @@ chksum_benchit(chksum_stat_t *cs)
chksum_run(cs, abd, ctx, 3, &cs->bs16k);
chksum_run(cs, abd, ctx, 4, &cs->bs64k);
chksum_run(cs, abd, ctx, 5, &cs->bs256k);

/* check if we ran on a slow cpu */
if (limit == LIMIT_INIT) {
if (cs->bs1k < 300) {
limit = LIMIT_NEEDED;
chksum_kstat_headers = chksum_kstat_headers_mini;
} else {
limit = LIMIT_NOLIMIT;
chksum_kstat_headers = chksum_kstat_headers_full;
}
}

/* skip benchmarks >= 1MiB, when edonr is slower then 300 MiB */
if (limit == LIMIT_NEEDED)
goto abort;

chksum_run(cs, abd, ctx, 6, &cs->bs1m);
abd_free(abd);

/* allocate test memory via abd non linear interface */
abd = abd_alloc(1<<24, B_FALSE);
chksum_run(cs, abd, ctx, 7, &cs->bs4m);
chksum_run(cs, abd, ctx, 8, &cs->bs16m);

abort:
abd_free(abd);

/* free up temp memory */
if (cs->free) {
if (cs->free)
cs->free(ctx);
}
}

/*
Expand Down Expand Up @@ -303,9 +351,9 @@ chksum_init(void)
chksum_kstat->ks_data = NULL;
chksum_kstat->ks_ndata = UINT32_MAX;
kstat_set_raw_ops(chksum_kstat,
chksum_stat_kstat_headers,
chksum_stat_kstat_data,
chksum_stat_kstat_addr);
chksum_kstat_headers,
chksum_kstat_data,
chksum_kstat_addr);
kstat_install(chksum_kstat);
}

Expand Down

0 comments on commit b122ea6

Please sign in to comment.