-
Notifications
You must be signed in to change notification settings - Fork 165
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add an AVX512 codec, based on the code in Wojciech Muła's `base64simd' project. Only the encoder is currently implemented with AVX512 instructions, because it is relatively simple. The decoder is stubbed by using the AVX2 decoder. A native AVX512 decoder is quite complex, and might be integrated at some later stage. Tested with the Intel SDE instruction set emulator running in Future mode. Resolves #102.
- Loading branch information
Showing
15 changed files
with
234 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#include <stdint.h> | ||
#include <stddef.h> | ||
#include <stdlib.h> | ||
|
||
#include "../../../include/libbase64.h" | ||
#include "../../tables/tables.h" | ||
#include "../../codecs.h" | ||
#include "config.h" | ||
#include "../../env.h" | ||
|
||
#if HAVE_AVX512 | ||
#include <immintrin.h> | ||
|
||
#include "../avx2/dec_reshuffle.c" | ||
#include "../avx2/dec_loop.c" | ||
#include "enc_reshuffle_translate.c" | ||
#include "enc_loop.c" | ||
|
||
#endif // HAVE_AVX512 | ||
|
||
BASE64_ENC_FUNCTION(avx512) | ||
{ | ||
#if HAVE_AVX512 | ||
#include "../generic/enc_head.c" | ||
enc_loop_avx512(&s, &slen, &o, &olen); | ||
#include "../generic/enc_tail.c" | ||
#else | ||
BASE64_ENC_STUB | ||
#endif | ||
} | ||
|
||
// Reuse AVX2 decoding. Not supporting AVX512 at present | ||
BASE64_DEC_FUNCTION(avx512) | ||
{ | ||
#if HAVE_AVX512 | ||
#include "../generic/dec_head.c" | ||
dec_loop_avx2(&s, &slen, &o, &olen); | ||
#include "../generic/dec_tail.c" | ||
#else | ||
BASE64_DEC_STUB | ||
#endif | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
static inline void | ||
enc_loop_avx512_inner (const uint8_t **s, uint8_t **o) | ||
{ | ||
// Load input. | ||
__m512i src = _mm512_loadu_si512((__m512i *) *s); | ||
|
||
// Reshuffle, translate, store. | ||
src = enc_reshuffle_translate(src); | ||
_mm512_storeu_si512((__m512i *) *o, src); | ||
|
||
*s += 48; | ||
*o += 64; | ||
} | ||
|
||
static inline void | ||
enc_loop_avx512 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen) | ||
{ | ||
if (*slen < 64) { | ||
return; | ||
} | ||
|
||
// Process blocks of 48 bytes at a time. Because blocks are loaded 64 | ||
// bytes at a time, ensure that there will be at least 24 remaining | ||
// bytes after the last round, so that the final read will not pass | ||
// beyond the bounds of the input buffer. | ||
size_t rounds = (*slen - 24) / 48; | ||
|
||
*slen -= rounds * 48; // 48 bytes consumed per round | ||
*olen += rounds * 64; // 64 bytes produced per round | ||
|
||
while (rounds > 0) { | ||
if (rounds >= 8) { | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
rounds -= 8; | ||
continue; | ||
} | ||
if (rounds >= 4) { | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
rounds -= 4; | ||
continue; | ||
} | ||
if (rounds >= 2) { | ||
enc_loop_avx512_inner(s, o); | ||
enc_loop_avx512_inner(s, o); | ||
rounds -= 2; | ||
continue; | ||
} | ||
enc_loop_avx512_inner(s, o); | ||
break; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// AVX512 algorithm is based on permutevar and multishift. The code is based on | ||
// https://github.com/WojciechMula/base64simd which is under BSD-2 license. | ||
|
||
static inline __m512i | ||
enc_reshuffle_translate (const __m512i input) | ||
{ | ||
// 32-bit input | ||
// [ 0 0 0 0 0 0 0 0|c1 c0 d5 d4 d3 d2 d1 d0| | ||
// b3 b2 b1 b0 c5 c4 c3 c2|a5 a4 a3 a2 a1 a0 b5 b4] | ||
// output order [1, 2, 0, 1] | ||
// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0| | ||
// a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0] | ||
|
||
const __m512i shuffle_input = _mm512_setr_epi32(0x01020001, | ||
0x04050304, | ||
0x07080607, | ||
0x0a0b090a, | ||
0x0d0e0c0d, | ||
0x10110f10, | ||
0x13141213, | ||
0x16171516, | ||
0x191a1819, | ||
0x1c1d1b1c, | ||
0x1f201e1f, | ||
0x22232122, | ||
0x25262425, | ||
0x28292728, | ||
0x2b2c2a2b, | ||
0x2e2f2d2e); | ||
|
||
// Reorder bytes | ||
// [b3 b2 b1 b0 c5 c4 c3 c2|c1 c0 d5 d4 d3 d2 d1 d0| | ||
// a5 a4 a3 a2 a1 a0 b5 b4|b3 b2 b1 b0 c3 c2 c1 c0] | ||
const __m512i in = _mm512_permutexvar_epi8(shuffle_input, input); | ||
|
||
// After multishift a single 32-bit lane has following layout | ||
// [c1 c0 d5 d4 d3 d2 d1 d0|b1 b0 c5 c4 c3 c2 c1 c0| | ||
// a1 a0 b5 b4 b3 b2 b1 b0|d1 d0 a5 a4 a3 a2 a1 a0] | ||
// (a = [10:17], b = [4:11], c = [22:27], d = [16:21]) | ||
|
||
// 48, 54, 36, 42, 16, 22, 4, 10 | ||
const __m512i shifts = _mm512_set1_epi64(0x3036242a1016040alu); | ||
__m512i shuffled_in = _mm512_multishift_epi64_epi8(shifts, in); | ||
|
||
// Translate immediatedly after reshuffled. | ||
const __m512i lookup = _mm512_loadu_si512(base64_table_enc_6bit); | ||
|
||
// Translation 6-bit values to ASCII. | ||
return _mm512_permutexvar_epi8(shuffled_in, lookup); | ||
} |
Oops, something went wrong.