Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

buffer: add SIMD Neon optimization for byteLength #48009

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
'src/node_report_utils.cc',
'src/node_sea.cc',
'src/node_serdes.cc',
'src/node_simd.cc',
'src/node_shadow_realm.cc',
'src/node_snapshotable.cc',
'src/node_sockaddr.cc',
Expand Down
11 changes: 3 additions & 8 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "node_external_reference.h"
#include "node_i18n.h"
#include "node_internals.h"
#include "node_simd.h"

#include "env-inl.h"
#include "simdutf.h"
Expand Down Expand Up @@ -743,14 +744,8 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {

uint32_t FastByteLengthUtf8(Local<Value> receiver,
const v8::FastOneByteString& source) {
uint32_t result = 0;
uint32_t length = source.length;
const uint8_t* data = reinterpret_cast<const uint8_t*>(source.data);
for (uint32_t i = 0; i < length; ++i) {
result += (data[i] >> 7);
}
result += length;
return result;
return node::simd::utf8_byte_length(
reinterpret_cast<const uint8_t*>(source.data), source.length);
}

static v8::CFunction fast_byte_length_utf8(
Expand Down
60 changes: 60 additions & 0 deletions src/node_simd.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#include "node_simd.h"

#include <string_view>

#if NODE_HAS_SIMD_NEON
#include <arm_neon.h>
#endif

namespace node {
namespace simd {

#if NODE_HAS_SIMD_NEON
uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
uint64_t result{0};

const int lanes = sizeof(uint8x16_t);
const int max_sra_count = 256 / lanes; // Avoid overflowing vaddvq_u8.
const int unrolls = max_sra_count;
const int unrolled_lanes = lanes * unrolls;

const uint8_t* unroll_end = data + (length / unrolled_lanes) * unrolled_lanes;
uint32_t length_after_unroll = length % unrolled_lanes;
for (; data < unroll_end;) {
uint8x16_t acc = {};
for (int i = 0; i < unrolls; ++i, data += lanes) {
uint8x16_t chunk = vld1q_u8(data);
acc = vsraq_n_u8(acc, chunk, 7);
}
result += vaddvq_u8(acc);
}

const uint8_t* simd_end = data + (length_after_unroll / lanes) * lanes;
uint32_t length_after_simd = length % lanes;
uint8x16_t acc = {};
for (; data < simd_end; data += lanes) {
uint8x16_t chunk = vld1q_u8(data);
acc = vsraq_n_u8(acc, chunk, 7);
}
result += vaddvq_u8(acc);

const uint8_t* scalar_end = data + length_after_simd;
for (; data < scalar_end; data += 1) {
result += *data >> 7;
}

return result + length;
}
#else
uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
uint32_t result = 0;
for (uint32_t i = 0; i < length; ++i) {
result += (data[i] >> 7);
}
result += length;
return result;
}
#endif

} // namespace simd
} // namespace node
22 changes: 22 additions & 0 deletions src/node_simd.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#ifndef SRC_NODE_SIMD_H_
#define SRC_NODE_SIMD_H_

#if defined(__aarch64__) || defined(_M_ARM64)
#define NODE_HAS_SIMD_NEON 1
#endif

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#include <string_view>

namespace node {
namespace simd {

uint32_t utf8_byte_length(const uint8_t* input, size_t length);

} // namespace simd
} // namespace node

#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#endif // SRC_NODE_SIMD_H_