From f1d3a0eb43d99462618fcc5c022f922631d7a587 Mon Sep 17 00:00:00 2001 From: Nikita Skovoroda Date: Sat, 20 Dec 2025 03:27:28 +0400 Subject: [PATCH] src: improve StringBytes::Encode perf on UTF8 --- src/encoding_binding.cc | 2 ++ src/string_bytes.cc | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc index 1506442f704ee1..7683d205aa6a3e 100644 --- a/src/encoding_binding.cc +++ b/src/encoding_binding.cc @@ -379,6 +379,8 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( env->isolate(), "The encoded data was not valid for encoding utf-8"); } + + // TODO(chalker): save on utf8 validity recheck in StringBytes::Encode() } if (length == 0) return args.GetReturnValue().SetEmptyString(); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 8f6bedd63e25b3..bbf0595a2d848c 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -539,6 +539,16 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, return ExternOneByteString::NewFromCopy(isolate, buf, buflen); } + if (simdutf::validate_utf8(buf, buflen)) { + // We know that we are non-ASCII (and are unlikely Latin1), use 2-byte + // In the most likely case of valid UTF-8, we can use this fast impl + size_t u16size = simdutf::utf16_length_from_utf8(buf, buflen); + uint16_t* dst = node::UncheckedMalloc(u16size); + size_t utf16len = simdutf::convert_valid_utf8_to_utf16( + buf, buflen, reinterpret_cast(dst)); + return ExternTwoByteString::New(isolate, dst, utf16len); + } + val = String::NewFromUtf8(isolate, buf, v8::NewStringType::kNormal, buflen); Local str;