Skip to content

Commit

Permalink
Benchmark reshuffled writes to the destination buffer
Browse files Browse the repository at this point in the history
This is expected to test the ability to do write combining for
scattered writes and detect any possible performance penalties.

Example reports:

== ARM Cortex A7 ==
 C fill                                               :   4011.5 MB/s
 C fill (shuffle within 16 byte blocks)               :   4112.2 MB/s (0.3%)
 C fill (shuffle within 32 byte blocks)               :    333.9 MB/s
 C fill (shuffle within 64 byte blocks)               :    336.6 MB/s

== ARM Cortex A15 ==
 C fill                                               :   6065.2 MB/s (0.4%)
 C fill (shuffle within 16 byte blocks)               :   2152.0 MB/s
 C fill (shuffle within 32 byte blocks)               :   2150.7 MB/s
 C fill (shuffle within 64 byte blocks)               :   2238.2 MB/s

== ARM Cortex A53 ==
 C fill                                               :   3080.8 MB/s (0.2%)
 C fill (shuffle within 16 byte blocks)               :   3080.7 MB/s
 C fill (shuffle within 32 byte blocks)               :   3079.2 MB/s
 C fill (shuffle within 64 byte blocks)               :   3080.4 MB/s

== Intel Atom N450 ==
 C fill                                               :   1554.9 MB/s
 C fill (shuffle within 16 byte blocks)               :   1554.5 MB/s
 C fill (shuffle within 32 byte blocks)               :   1553.9 MB/s
 C fill (shuffle within 64 byte blocks)               :   1554.4 MB/s

See #7
  • Loading branch information
ssvb committed Mar 29, 2016
1 parent 6fd9bae commit ada1db8
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 0 deletions.
12 changes: 12 additions & 0 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,18 @@ void bandwidth_bench(int64_t *dstbuf, int64_t *srcbuf, int64_t *tmpbuf,
indent_prefix, 0,
aligned_block_fill,
"C fill");
bandwidth_bench_helper(dstbuf, srcbuf, tmpbuf, size, blocksize,
indent_prefix, 0,
aligned_block_fill_shuffle16,
"C fill (shuffle within 16 byte blocks)");
bandwidth_bench_helper(dstbuf, srcbuf, tmpbuf, size, blocksize,
indent_prefix, 0,
aligned_block_fill_shuffle32,
"C fill (shuffle within 32 byte blocks)");
bandwidth_bench_helper(dstbuf, srcbuf, tmpbuf, size, blocksize,
indent_prefix, 0,
aligned_block_fill_shuffle64,
"C fill (shuffle within 64 byte blocks)");

printf("%s---\n", indent_prefix);
bandwidth_bench_helper(dstbuf, srcbuf, tmpbuf, size, blocksize,
Expand Down
66 changes: 66 additions & 0 deletions util.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,72 @@ void aligned_block_fill(int64_t * __restrict dst_,
}
}

/*
* Simulate reshuffled memory write accesses to the destination
* buffer (a kind of "drunken master style" access pattern).
*
* See: https://github.com/ssvb/tinymembench/issues/7
*/
void aligned_block_fill_shuffle16(int64_t * __restrict dst_,
int64_t * __restrict src,
int size)
{
volatile int64_t *dst = dst_;
int64_t data = *src;
while ((size -= 64) >= 0)
{
dst[0 + 0] = data;
dst[1 + 0] = data;
dst[1 + 2] = data;
dst[0 + 2] = data;
dst[1 + 4] = data;
dst[0 + 4] = data;
dst[0 + 6] = data;
dst[1 + 6] = data;
dst += 8;
}
}

void aligned_block_fill_shuffle32(int64_t * __restrict dst_,
int64_t * __restrict src,
int size)
{
volatile int64_t *dst = dst_;
int64_t data = *src;
while ((size -= 64) >= 0)
{
dst[3 + 0] = data;
dst[0 + 0] = data;
dst[2 + 0] = data;
dst[1 + 0] = data;
dst[3 + 4] = data;
dst[0 + 4] = data;
dst[2 + 4] = data;
dst[1 + 4] = data;
dst += 8;
}
}

void aligned_block_fill_shuffle64(int64_t * __restrict dst_,
int64_t * __restrict src,
int size)
{
volatile int64_t *dst = dst_;
int64_t data = *src;
while ((size -= 64) >= 0)
{
dst[5] = data;
dst[2] = data;
dst[7] = data;
dst[6] = data;
dst[1] = data;
dst[3] = data;
dst[0] = data;
dst[4] = data;
dst += 8;
}
}

double gettime(void)
{
struct timeval tv;
Expand Down
9 changes: 9 additions & 0 deletions util.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ void aligned_block_copy_pf64(int64_t * __restrict dst,
void aligned_block_fill(int64_t * __restrict dst,
int64_t * __restrict src,
int size);
void aligned_block_fill_shuffle16(int64_t * __restrict dst,
int64_t * __restrict src,
int size);
void aligned_block_fill_shuffle32(int64_t * __restrict dst,
int64_t * __restrict src,
int size);
void aligned_block_fill_shuffle64(int64_t * __restrict dst,
int64_t * __restrict src,
int size);

void *alloc_four_nonaliased_buffers(void **buf1, int size1,
void **buf2, int size2,
Expand Down

0 comments on commit ada1db8

Please sign in to comment.