Skip to content

Commit

Permalink
deps: update simdutf to 5.3.0
Browse files Browse the repository at this point in the history
PR-URL: #53837
Reviewed-By: Marco Ippolito <marcoippolito54@gmail.com>
Reviewed-By: Antoine du Hamel <duhamelantoine1995@gmail.com>
nodejs-github-bot authored and aduh95 committed Nov 3, 2024

Verified

This commit was signed with the committer’s verified signature.
aduh95 Antoine du Hamel
1 parent c952fd8 commit 5fb8e1b
Showing 2 changed files with 88 additions and 44 deletions.
80 changes: 46 additions & 34 deletions deps/simdutf/simdutf.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2024-05-07 22:33:11 -0400. Do not edit! */
/* auto-generated on 2024-07-11 00:01:58 -0400. Do not edit! */
/* begin file src/simdutf.cpp */
#include "simdutf.h"
// We include base64_tables once.
@@ -1522,10 +1522,10 @@ template<>
struct simd16<bool>: base16<bool> {
static simdutf_really_inline simd16<bool> splat(bool _value) { return vmovq_n_u16(uint16_t(-(!!_value))); }

simdutf_really_inline simd16<bool>() : base16() {}
simdutf_really_inline simd16<bool>(const uint16x8_t _value) : base16<bool>(_value) {}
simdutf_really_inline simd16() : base16() {}
simdutf_really_inline simd16(const uint16x8_t _value) : base16<bool>(_value) {}
// Splat constructor
simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
simdutf_really_inline simd16(bool _value) : base16<bool>(splat(_value)) {}

};

@@ -2832,10 +2832,10 @@ template<>
struct simd16<bool>: base16<bool> {
static simdutf_really_inline simd16<bool> splat(bool _value) { return _mm256_set1_epi16(uint16_t(-(!!_value))); }

simdutf_really_inline simd16<bool>() : base16() {}
simdutf_really_inline simd16<bool>(const __m256i _value) : base16<bool>(_value) {}
simdutf_really_inline simd16() : base16() {}
simdutf_really_inline simd16(const __m256i _value) : base16<bool>(_value) {}
// Splat constructor
simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
simdutf_really_inline simd16(bool _value) : base16<bool>(splat(_value)) {}

simdutf_really_inline bitmask_type to_bitmask() const { return _mm256_movemask_epi8(*this); }
simdutf_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
@@ -3803,10 +3803,10 @@ template<>
struct simd16<bool>: base16<bool> {
static simdutf_really_inline simd16<bool> splat(bool _value) { return _mm_set1_epi16(uint16_t(-(!!_value))); }

simdutf_really_inline simd16<bool>() : base16() {}
simdutf_really_inline simd16<bool>(const __m128i _value) : base16<bool>(_value) {}
simdutf_really_inline simd16() : base16() {}
simdutf_really_inline simd16(const __m128i _value) : base16<bool>(_value) {}
// Splat constructor
simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
simdutf_really_inline simd16(bool _value) : base16<bool>(splat(_value)) {}

simdutf_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
simdutf_really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
@@ -5807,6 +5807,13 @@ result base64_tail_decode_safe(char *dst, size_t& outlen, const char_type *src,
// Returns the number of bytes written. The destination buffer must be large
// enough. It will add padding (=) if needed.
size_t tail_encode_base64(char *dst, const char *src, size_t srclen, base64_options options) {
// By default, we use padding if we are not using the URL variant.
// This is check with ((options & base64_url) == 0) which returns true if we are not using the URL variant.
// However, we also allow 'inversion' of the convention with the base64_reverse_padding option.
// If the base64_reverse_padding option is set, we use padding if we are using the URL variant,
// and we omit it if we are not using the URL variant. This is checked with
// ((options & base64_reverse_padding) == base64_reverse_padding).
bool use_padding = ((options & base64_url) == 0) ^ ((options & base64_reverse_padding) == base64_reverse_padding);
// This looks like 3 branches, but we expect the compiler to resolve this to a single branch:
const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 : tables::base64::base64_default::e0;
const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 : tables::base64::base64_default::e1;
@@ -5830,7 +5837,7 @@ size_t tail_encode_base64(char *dst, const char *src, size_t srclen, base64_opti
t1 = uint8_t(src[i]);
*out++ = e0[t1];
*out++ = e1[(t1 & 0x03) << 4];
if((options & base64_url) == 0) {
if(use_padding) {
*out++ = '=';
*out++ = '=';
}
@@ -5841,7 +5848,7 @@ size_t tail_encode_base64(char *dst, const char *src, size_t srclen, base64_opti
*out++ = e0[t1];
*out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
*out++ = e2[(t2 & 0x0F) << 2];
if((options & base64_url) == 0) {
if(use_padding) {
*out++ = '=';
}
}
@@ -5869,7 +5876,14 @@ simdutf_warn_unused size_t maximal_binary_length_from_base64(const char_type * i
}

simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options) noexcept {
if(options & base64_url) {
// By default, we use padding if we are not using the URL variant.
// This is check with ((options & base64_url) == 0) which returns true if we are not using the URL variant.
// However, we also allow 'inversion' of the convention with the base64_reverse_padding option.
// If the base64_reverse_padding option is set, we use padding if we are using the URL variant,
// and we omit it if we are not using the URL variant. This is checked with
// ((options & base64_reverse_padding) == base64_reverse_padding).
bool use_padding = ((options & base64_url) == 0) ^ ((options & base64_reverse_padding) == base64_reverse_padding);
if(!use_padding) {
return length/3 * 4 + ((length % 3) ? (length % 3) + 1 : 0);
}
return (length + 2)/3 * 4; // We use padding to make the length a multiple of 4.
@@ -17055,8 +17069,6 @@ result compress_decode_base64(char *dst, const char_type *src, size_t srclen,
// can avoid the call to compress_block and decode directly.
copy_block(&b, bufferptr);
bufferptr += 64;
// base64_decode_block(dst, &b);
// dst += 48;
}
if (bufferptr >= (block_size - 1) * 64 + buffer) {
for (size_t i = 0; i < (block_size - 1); i++) {
@@ -27138,8 +27150,8 @@ simdutf_really_inline __m256i lookup_pshufb_improved(const __m256i input) {
return _mm256_add_epi8(result, input);
}

template <base64_options options>
size_t encode_base64(char *dst, const char *src, size_t srclen) {
template <bool isbase64url>
size_t encode_base64(char *dst, const char *src, size_t srclen, base64_options options) {
// credit: Wojciech Muła
const uint8_t *input = (const uint8_t *)src;

@@ -27206,18 +27218,18 @@ size_t encode_base64(char *dst, const char *src, size_t srclen) {
const __m256i input3 = _mm256_or_si256(t1_3, t3_3);

_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(input0));
lookup_pshufb_improved<isbase64url>(input0));
out += 32;

_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(input1));
lookup_pshufb_improved<isbase64url>(input1));
out += 32;

_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(input2));
lookup_pshufb_improved<isbase64url>(input2));
out += 32;
_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(input3));
lookup_pshufb_improved<isbase64url>(input3));
out += 32;
}
for (; i + 28 <= srclen; i += 24) {
@@ -27241,7 +27253,7 @@ size_t encode_base64(char *dst, const char *src, size_t srclen) {
const __m256i indices = _mm256_or_si256(t1, t3);

_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(indices));
lookup_pshufb_improved<isbase64url>(indices));
out += 32;
}
return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i,
@@ -30012,9 +30024,9 @@ simdutf_warn_unused size_t implementation::base64_length_from_binary(size_t leng

size_t implementation::binary_to_base64(const char * input, size_t length, char* output, base64_options options) const noexcept {
if(options & base64_url) {
return encode_base64<base64_url>(output, input, length);
return encode_base64<true>(output, input, length, options);
} else {
return encode_base64<base64_default>(output, input, length);
return encode_base64<false>(output, input, length, options);
}
}
} // namespace haswell
@@ -35675,8 +35687,8 @@ template <bool base64_url> __m128i lookup_pshufb_improved(const __m128i input) {
return _mm_add_epi8(result, input);
}

template <base64_options options>
size_t encode_base64(char *dst, const char *src, size_t srclen) {
template <bool isbase64url>
size_t encode_base64(char *dst, const char *src, size_t srclen, base64_options options) {
// credit: Wojciech Muła
// SSE (lookup: pshufb improved unrolled)
const uint8_t *input = (const uint8_t *)src;
@@ -35727,19 +35739,19 @@ size_t encode_base64(char *dst, const char *src, size_t srclen) {
const __m128i input3 = _mm_or_si128(t1_3, t3_3);

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(input0));
lookup_pshufb_improved<isbase64url>(input0));
out += 16;

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(input1));
lookup_pshufb_improved<isbase64url>(input1));
out += 16;

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(input2));
lookup_pshufb_improved<isbase64url>(input2));
out += 16;

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(input3));
lookup_pshufb_improved<isbase64url>(input3));
out += 16;
}
for (; i + 16 <= srclen; i += 12) {
@@ -35779,7 +35791,7 @@ size_t encode_base64(char *dst, const char *src, size_t srclen) {
const __m128i indices = _mm_or_si128(t1, t3);

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(indices));
lookup_pshufb_improved<isbase64url>(indices));
out += 16;
}

@@ -38555,10 +38567,10 @@ simdutf_warn_unused size_t implementation::base64_length_from_binary(size_t leng
}

size_t implementation::binary_to_base64(const char * input, size_t length, char* output, base64_options options) const noexcept {
if(options == base64_url) {
return encode_base64<base64_url>(output, input, length);
if(options & base64_url) {
return encode_base64<true>(output, input, length, options);
} else {
return encode_base64<base64_default>(output, input, length);
return encode_base64<false>(output, input, length, options);
}
}
} // namespace westmere
52 changes: 42 additions & 10 deletions deps/simdutf/simdutf.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2024-05-07 22:33:11 -0400. Do not edit! */
/* auto-generated on 2024-07-11 00:01:58 -0400. Do not edit! */
/* begin file include/simdutf.h */
#ifndef SIMDUTF_H
#define SIMDUTF_H
@@ -594,7 +594,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
#define SIMDUTF_SIMDUTF_VERSION_H

/** The version of simdutf being used (major.minor.revision) */
#define SIMDUTF_VERSION "5.2.8"
#define SIMDUTF_VERSION "5.3.0"

namespace simdutf {
enum {
@@ -605,11 +605,11 @@ enum {
/**
* The minor version (major.MINOR.revision) of simdutf being used.
*/
SIMDUTF_VERSION_MINOR = 2,
SIMDUTF_VERSION_MINOR = 3,
/**
* The revision (major.minor.REVISION) of simdutf being used.
*/
SIMDUTF_VERSION_REVISION = 8
SIMDUTF_VERSION_REVISION = 0
};
} // namespace simdutf

@@ -2300,9 +2300,13 @@ simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t leng

// base64_options are used to specify the base64 encoding options.
using base64_options = uint64_t;
using base64_options = uint64_t;
enum : base64_options {
base64_default = 0, /* standard base64 format */
base64_url = 1 /* base64url format*/
base64_default = 0, /* standard base64 format (with padding) */
base64_url = 1, /* base64url format (no padding) */
base64_reverse_padding = 2, /* modifier for base64_default and base64_url */
base64_default_no_padding = base64_default | base64_reverse_padding, /* standard base64 format without padding */
base64_url_with_padding = base64_url | base64_reverse_padding, /* base64url with padding */
};

/**
@@ -2345,6 +2349,12 @@ simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t * in
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* The default option (simdutf::base64_default) expects the characters `+` and `/` as part of its alphabet.
* The URL option (simdutf::base64_url) expects the characters `-` and `_` as part of its alphabet.
*
* The padding (`=`) is validated if present. There may be at most two padding characters at the end of the input.
* If there are any padding characters, the total number of characters (excluding spaces but including padding characters) must be divisible by four.
*
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
* If you fail to provide that much space, the function may cause a buffer overflow.
*
@@ -2365,8 +2375,13 @@ simdutf_warn_unused result base64_to_binary(const char * input, size_t length, c
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options = base64_default) noexcept;

/**
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
* a multiple of 4 bytes long.
* Convert a binary input to a base64 ouput.
*
* The default option (simdutf::base64_default) uses the characters `+` and `/` as part of its alphabet.
* Further, it adds padding (`=`) at the end of the output to ensure that the output length is a multiple of four.
*
* The URL option (simdutf::base64_url) uses the characters `-` and `_` as part of its alphabet. No padding
* is added at the end of the output.
*
* This function always succeeds.
*
@@ -2396,6 +2411,12 @@ size_t binary_to_base64(const char * input, size_t length, char* output, base64_
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* The default option (simdutf::base64_default) expects the characters `+` and `/` as part of its alphabet.
* The URL option (simdutf::base64_url) expects the characters `-` and `_` as part of its alphabet.
*
* The padding (`=`) is validated if present. There may be at most two padding characters at the end of the input.
* If there are any padding characters, the total number of characters (excluding spaces but including padding characters) must be divisible by four.
*
* You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long.
* If you fail to provide that much space, the function may cause a buffer overflow.
*
@@ -2429,6 +2450,12 @@ simdutf_warn_unused result base64_to_binary(const char16_t * input, size_t lengt
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* The default option (simdutf::base64_default) expects the characters `+` and `/` as part of its alphabet.
* The URL option (simdutf::base64_url) expects the characters `-` and `_` as part of its alphabet.
*
* The padding (`=`) is validated if present. There may be at most two padding characters at the end of the input.
* If there are any padding characters, the total number of characters (excluding spaces but including padding characters) must be divisible by four.
*
* The INVALID_BASE64_CHARACTER cases are considered fatal and you are expected to discard
* the output.
*
@@ -3590,8 +3617,13 @@ class implementation {
simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length, base64_options options = base64_default) const noexcept = 0;

/**
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
* a multiple of 4 bytes long.
* Convert a binary input to a base64 ouput.
*
* The default option (simdutf::base64_default) uses the characters `+` and `/` as part of its alphabet.
* Further, it adds padding (`=`) at the end of the output to ensure that the output length is a multiple of four.
*
* The URL option (simdutf::base64_url) uses the characters `-` and `_` as part of its alphabet. No padding
* is added at the end of the output.
*
* This function always succeeds.
*

0 comments on commit 5fb8e1b

Please sign in to comment.