Skip to content

Commit

Permalink
Skip checksum benchmarks on systems with slow cpu
Browse files Browse the repository at this point in the history
The checksum benchmarking on module load may take a really long time
on embedded systems with a slow cpu. Avoid all benchmarks >= 1MiB on
systems, where EdonR is slower then 300 MiB/s.

This limit is currently hardcoded via the define LIMIT_PERF_MBS.

This is the new benchmark output of a slow Intel Atom:

```
 implementation    1k    4k   16k   64k  256k    1m    4m   16m
 edonr-generic    209   257   268   259   262     0     0     0
 skein-generic    129   150   151   150   150     0     0     0
 sha256-generic    50    55    56    56    56     0     0     0
 sha512-generic    76    86    88    89    88     0     0     0
 blake3-generic    63    62    62    62    61     0     0     0
 blake3-sse2      114   292   301   307   309     0     0     0
```

Reviewed-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de>
Closes openzfs#13695
  • Loading branch information
mcmilk authored and lundman committed Sep 13, 2022
1 parent ab41887 commit fd27e3a
Showing 1 changed file with 34 additions and 13 deletions.
47 changes: 34 additions & 13 deletions module/zfs/zfs_chksum.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@

#include <sys/blake3.h>

static kstat_t *chksum_kstat = NULL;
/* limit benchmarking to max 256KiB, when EdonR is slower then this: */
#define LIMIT_PERF_MBS 300

typedef struct {
const char *name;
Expand All @@ -50,8 +51,9 @@ typedef struct {
zio_checksum_tmpl_free_t *(free);
} chksum_stat_t;

static int chksum_stat_cnt = 0;
static chksum_stat_t *chksum_stat_data = 0;
static int chksum_stat_cnt = 0;
static kstat_t *chksum_kstat = NULL;

/*
* i3-1005G1 test output:
Expand All @@ -75,7 +77,7 @@ static chksum_stat_t *chksum_stat_data = 0;
* blake3-avx512 473 2687 4905 5836 5844 5643 5374
*/
static int
chksum_stat_kstat_headers(char *buf, size_t size)
chksum_kstat_headers(char *buf, size_t size)
{
ssize_t off = 0;

Expand All @@ -93,7 +95,7 @@ chksum_stat_kstat_headers(char *buf, size_t size)
}

static int
chksum_stat_kstat_data(char *buf, size_t size, void *data)
chksum_kstat_data(char *buf, size_t size, void *data)
{
chksum_stat_t *cs;
ssize_t off = 0;
Expand Down Expand Up @@ -123,7 +125,7 @@ chksum_stat_kstat_data(char *buf, size_t size, void *data)
}

static void *
chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
chksum_kstat_addr(kstat_t *ksp, loff_t n)
{
if (n < chksum_stat_cnt)
ksp->ks_private = (void *)(chksum_stat_data + n);
Expand Down Expand Up @@ -176,17 +178,21 @@ chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
*result = run_bw/1024/1024; /* MiB/s */
}

#define LIMIT_INIT 0
#define LIMIT_NEEDED 1
#define LIMIT_NOLIMIT 2

static void
chksum_benchit(chksum_stat_t *cs)
{
abd_t *abd;
void *ctx = 0;
void *salt = &cs->salt.zcs_bytes;
static int chksum_stat_limit = LIMIT_INIT;

memset(salt, 0, sizeof (cs->salt.zcs_bytes));
if (cs->init) {
if (cs->init)
ctx = cs->init(&cs->salt);
}

/* allocate test memory via abd linear interface */
abd = abd_alloc_linear(1<<20, B_FALSE);
Expand All @@ -195,19 +201,34 @@ chksum_benchit(chksum_stat_t *cs)
chksum_run(cs, abd, ctx, 3, &cs->bs16k);
chksum_run(cs, abd, ctx, 4, &cs->bs64k);
chksum_run(cs, abd, ctx, 5, &cs->bs256k);

/* check if we ran on a slow cpu */
if (chksum_stat_limit == LIMIT_INIT) {
if (cs->bs1k < LIMIT_PERF_MBS) {
chksum_stat_limit = LIMIT_NEEDED;
} else {
chksum_stat_limit = LIMIT_NOLIMIT;
}
}

/* skip benchmarks >= 1MiB when the CPU is to slow */
if (chksum_stat_limit == LIMIT_NEEDED)
goto abort;

chksum_run(cs, abd, ctx, 6, &cs->bs1m);
abd_free(abd);

/* allocate test memory via abd non linear interface */
abd = abd_alloc(1<<24, B_FALSE);
chksum_run(cs, abd, ctx, 7, &cs->bs4m);
chksum_run(cs, abd, ctx, 8, &cs->bs16m);

abort:
abd_free(abd);

/* free up temp memory */
if (cs->free) {
if (cs->free)
cs->free(ctx);
}
}

/*
Expand All @@ -232,7 +253,7 @@ chksum_benchmark(void)
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);

/* edonr */
/* edonr - needs to be the first one here (slow CPU check) */
cs = &chksum_stat_data[cbid++];
cs->init = abd_checksum_edonr_tmpl_init;
cs->func = abd_checksum_edonr_native;
Expand Down Expand Up @@ -303,9 +324,9 @@ chksum_init(void)
chksum_kstat->ks_data = NULL;
chksum_kstat->ks_ndata = UINT32_MAX;
kstat_set_raw_ops(chksum_kstat,
chksum_stat_kstat_headers,
chksum_stat_kstat_data,
chksum_stat_kstat_addr);
chksum_kstat_headers,
chksum_kstat_data,
chksum_kstat_addr);
kstat_install(chksum_kstat);
}

Expand Down

0 comments on commit fd27e3a

Please sign in to comment.