Skip to content

Commit

Permalink
Only prefer POPCNT for 8-bit bit-counting on newer AMD CPUs
Browse files Browse the repository at this point in the history
A lookup is likely faster everywhere else
Ref #4
  • Loading branch information
animetosho committed Sep 25, 2017
1 parent eefc33c commit 7ae8661
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 6 deletions.
2 changes: 0 additions & 2 deletions src/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
#endif


#ifndef __POPCNT__
// table from http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetTable
static const unsigned char BitsSetTable256[256] =
{
Expand All @@ -83,7 +82,6 @@ static const unsigned char BitsSetTable256[256] =
#undef B4
#undef B6
};
#endif



Expand Down
4 changes: 2 additions & 2 deletions src/decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ size_t do_decode_sse(const unsigned char* src, unsigned char* dest, size_t len,
// all that's left is to 'compress' the data (skip over masked chars)
#ifdef __SSSE3__
if(use_ssse3) {
# ifdef __POPCNT__
# if defined(__POPCNT__) && (defined(__tune_znver1__) || defined(__tune_btver2__))
unsigned char skipped = _mm_popcnt_u32(mask & 0xff);
# else
unsigned char skipped = BitsSetTable256[mask & 0xff];
Expand All @@ -300,7 +300,7 @@ size_t do_decode_sse(const unsigned char* src, unsigned char* dest, size_t len,
STOREU_XMM(p, oData);

// increment output position
# ifdef __POPCNT__
# if defined(__POPCNT__) && !defined(__tune_btver1__)
p += XMM_SIZE - _mm_popcnt_u32(mask);
# else
p += XMM_SIZE - skipped - BitsSetTable256[mask >> 8];
Expand Down
4 changes: 2 additions & 2 deletions src/encoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ static size_t do_encode_fast(int line_size, int* colOffset, const unsigned char*
data = _mm_add_epi8(data, shufMixMA);
data2 = _mm_add_epi8(data2, shufMixMB);
// store out
#ifdef __POPCNT__
#if defined(__POPCNT__) && (defined(__tune_znver1__) || defined(__tune_btver2__))
unsigned char shufALen = _mm_popcnt_u32(m1) + 8;
unsigned char shufBLen = _mm_popcnt_u32(m2) + 8;
#else
Expand Down Expand Up @@ -652,7 +652,7 @@ size_t do_encode_fast2(int line_size, int* colOffset, const unsigned char* src,
data = _mm_add_epi8(data, shufMixMA);
data2 = _mm_add_epi8(data2, shufMixMB);
// store out
#ifdef __POPCNT__
#if defined(__POPCNT__) && (defined(__tune_znver1__) || defined(__tune_btver2__))
unsigned char shufALen = _mm_popcnt_u32(m1) + 8;
unsigned char shufBLen = _mm_popcnt_u32(m2) + 8;
#else
Expand Down

0 comments on commit 7ae8661

Please sign in to comment.