Skip to content

Commit

Permalink
buffer: add SIMD Neon optimization for byteLength
Browse files Browse the repository at this point in the history
Co-authored-by: Keyhan Vakil <kvakil@sylph.kvakil.me>
Co-authored-by: Daniel Lemire <daniel@lemire.me>
  • Loading branch information
3 people committed May 17, 2023
1 parent c9ec72d commit 8e4c7dd
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 8 deletions.
1 change: 1 addition & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
'src/node_report_utils.cc',
'src/node_sea.cc',
'src/node_serdes.cc',
'src/node_simd.cc',
'src/node_shadow_realm.cc',
'src/node_snapshotable.cc',
'src/node_sockaddr.cc',
Expand Down
11 changes: 3 additions & 8 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "node_external_reference.h"
#include "node_i18n.h"
#include "node_internals.h"
#include "node_simd.h"

#include "env-inl.h"
#include "simdutf.h"
Expand Down Expand Up @@ -743,14 +744,8 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {

uint32_t FastByteLengthUtf8(Local<Value> receiver,
const v8::FastOneByteString& source) {
uint32_t result = 0;
uint32_t length = source.length;
const uint8_t* data = reinterpret_cast<const uint8_t*>(source.data);
for (uint32_t i = 0; i < length; ++i) {
result += (data[i] >> 7);
}
result += length;
return result;
return node::simd::utf8_byte_length(
reinterpret_cast<const uint8_t*>(source.data), source.length);
}

static v8::CFunction fast_byte_length_utf8(
Expand Down
56 changes: 56 additions & 0 deletions src/node_simd.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#include "node_simd.h"

#if NODE_HAS_SIMD_NEON
#include <arm_neon.h>
#endif

namespace node {
namespace simd {

#if NODE_HAS_SIMD_NEON
uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
uint32_t result{0};
const int lanes = sizeof(uint8x16_t);
uint8_t remaining = length % lanes;
const auto* simd_end = data + (length / lanes) * lanes;
const auto threshold = vdupq_n_u8(0x80);

for (; data < simd_end; data += lanes) {
// load 16 bytes
uint8x16_t input = vld1q_u8(data);

// compare to threshold (0x80)
uint8x16_t with_highbit = vcgeq_u8(input, threshold);

// nshift and narrow
uint8x8_t highbits = vshrn_n_u16(vreinterpretq_u16_u8(with_highbit), 4);

// we have 0, 4 or 8 bits per byte
uint8x8_t bitsperbyte = vcnt_u8(highbits);

// sum the bytes vertically to uint32_t
result += vaddlv_u8(bitsperbyte);
}

// we overcounted by a factor of 4
result /= 4;

for (uint8_t j = 0; j < remaining; j++) {
result += (simd_end[j] >> 7);
}

return result + length;
}
#else
uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
uint32_t result = 0;
for (uint32_t i = 0; i < length; ++i) {
result += (data[i] >> 7);
}
result += length;
return result;
}
#endif

} // namespace simd
} // namespace node
22 changes: 22 additions & 0 deletions src/node_simd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef SRC_NODE_SIMD_H_
#define SRC_NODE_SIMD_H_

#if defined(__aarch64__) || defined(_M_ARM64)
#define NODE_HAS_SIMD_NEON 1
#endif

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#include <stdlib.h>

namespace node {
namespace simd {

uint32_t utf8_byte_length(const uint8_t* input, size_t length);

} // namespace simd
} // namespace node

#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#endif // SRC_NODE_SIMD_H_

0 comments on commit 8e4c7dd

Please sign in to comment.