From dcba3a06735075b17acf7158d098217b6f5ead26 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Sat, 11 Feb 2023 18:54:35 +0100 Subject: [PATCH] src: move encoding bindings to a new binding Move the bindings used by TextEncoder to a new binding for more self-contained code. PR-URL: https://github.com/nodejs/node/pull/46658 Reviewed-By: Darshan Sen --- lib/internal/encoding.js | 2 +- node.gyp | 2 + src/base_object_types.h | 1 + src/encoding_binding.cc | 202 ++++++++++++++++++++++++ src/encoding_binding.h | 46 ++++++ src/node_binding.cc | 1 + src/node_buffer.cc | 128 --------------- src/node_external_reference.h | 1 + src/node_snapshotable.cc | 1 + test/parallel/test-bootstrap-modules.js | 1 + 10 files changed, 256 insertions(+), 129 deletions(-) create mode 100644 src/encoding_binding.cc create mode 100644 src/encoding_binding.h diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 5feec0552870be..2ee569c737d18c 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -54,7 +54,7 @@ const { encodeInto, encodeUtf8String, decodeUTF8, -} = internalBinding('buffer'); +} = internalBinding('encoding_binding'); const { Buffer } = require('buffer'); diff --git a/node.gyp b/node.gyp index 0b9b600e92fa57..2e41a7bb2272b7 100644 --- a/node.gyp +++ b/node.gyp @@ -479,6 +479,7 @@ 'src/connection_wrap.cc', 'src/dataqueue/queue.cc', 'src/debug_utils.cc', + 'src/encoding_binding.cc', 'src/env.cc', 'src/fs_event_wrap.cc', 'src/handle_wrap.cc', @@ -585,6 +586,7 @@ 'src/dataqueue/queue.h', 'src/debug_utils.h', 'src/debug_utils-inl.h', + 'src/encoding_binding.h', 'src/env_properties.h', 'src/env.h', 'src/env-inl.h', diff --git a/src/base_object_types.h b/src/base_object_types.h index db5b5e2f5e9ba9..3745c00970ee84 100644 --- a/src/base_object_types.h +++ b/src/base_object_types.h @@ -10,6 +10,7 @@ namespace node { // what the class passes to SET_BINDING_ID(), the second argument should match // the C++ class name. #define SERIALIZABLE_BINDING_TYPES(V) \ + V(encoding_binding_data, encoding_binding::BindingData) \ V(fs_binding_data, fs::BindingData) \ V(v8_binding_data, v8_utils::BindingData) \ V(blob_binding_data, BlobBindingData) \ diff --git a/src/encoding_binding.cc b/src/encoding_binding.cc new file mode 100644 index 00000000000000..0f54990907549b --- /dev/null +++ b/src/encoding_binding.cc @@ -0,0 +1,202 @@ +#include "encoding_binding.h" +#include "env-inl.h" +#include "node_errors.h" +#include "node_external_reference.h" +#include "simdutf.h" +#include "string_bytes.h" +#include "v8.h" + +#include + +namespace node { +namespace encoding_binding { + +using v8::ArrayBuffer; +using v8::BackingStore; +using v8::Context; +using v8::FunctionCallbackInfo; +using v8::Isolate; +using v8::Local; +using v8::MaybeLocal; +using v8::Object; +using v8::String; +using v8::Uint8Array; +using v8::Uint32Array; +using v8::Value; + +BindingData::BindingData(Environment* env, Local object) + : SnapshotableObject(env, object, type_int) {} + +bool BindingData::PrepareForSerialization(Local context, + v8::SnapshotCreator* creator) { + // Return true because we need to maintain the reference to the binding from + // JS land. + return true; +} + +InternalFieldInfoBase* BindingData::Serialize(int index) { + DCHECK_EQ(index, BaseObject::kEmbedderType); + InternalFieldInfo* info = + InternalFieldInfoBase::New(type()); + return info; +} + +void BindingData::Deserialize(Local context, + Local holder, + int index, + InternalFieldInfoBase* info) { + DCHECK_EQ(index, BaseObject::kEmbedderType); + v8::HandleScope scope(context->GetIsolate()); + Environment* env = Environment::GetCurrent(context); + // Recreate the buffer in the constructor. + BindingData* binding = env->AddBindingData(context, holder); + CHECK_NOT_NULL(binding); +} + +void BindingData::EncodeInto(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + CHECK_GE(args.Length(), 3); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsUint8Array()); + CHECK(args[2]->IsUint32Array()); + + Local source = args[0].As(); + + Local dest = args[1].As(); + Local buf = dest->Buffer(); + char* write_result = static_cast(buf->Data()) + dest->ByteOffset(); + size_t dest_length = dest->ByteLength(); + + // results = [ read, written ] + Local result_arr = args[2].As(); + uint32_t* results = reinterpret_cast( + static_cast(result_arr->Buffer()->Data()) + + result_arr->ByteOffset()); + + int nchars; + int written = source->WriteUtf8( + isolate, + write_result, + dest_length, + &nchars, + String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); + results[0] = nchars; + results[1] = written; +} + +// Encode a single string to a UTF-8 Uint8Array (not Buffer). +// Used in TextEncoder.prototype.encode. +void BindingData::EncodeUtf8String(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + Isolate* isolate = env->isolate(); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + + Local str = args[0].As(); + size_t length = str->Utf8Length(isolate); + + Local ab; + { + NoArrayBufferZeroFillScope no_zero_fill_scope(env->isolate_data()); + std::unique_ptr bs = + ArrayBuffer::NewBackingStore(isolate, length); + + CHECK(bs); + + str->WriteUtf8(isolate, + static_cast(bs->Data()), + -1, // We are certain that `data` is sufficiently large + nullptr, + String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); + + ab = ArrayBuffer::New(isolate, std::move(bs)); + } + + auto array = Uint8Array::New(ab, 0, length); + args.GetReturnValue().Set(array); +} + +// Convert the input into an encoded string +void BindingData::DecodeUTF8(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); // list, flags + + CHECK_GE(args.Length(), 1); + + if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || + args[0]->IsArrayBufferView())) { + return node::THROW_ERR_INVALID_ARG_TYPE( + env->isolate(), + "The \"list\" argument must be an instance of SharedArrayBuffer, " + "ArrayBuffer or ArrayBufferView."); + } + + ArrayBufferViewContents buffer(args[0]); + + bool ignore_bom = args[1]->IsTrue(); + bool has_fatal = args[2]->IsTrue(); + + const char* data = buffer.data(); + size_t length = buffer.length(); + + if (has_fatal) { + auto result = simdutf::validate_utf8_with_errors(data, length); + + if (result.error) { + return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( + env->isolate(), "The encoded data was not valid for encoding utf-8"); + } + } + + if (!ignore_bom && length >= 3) { + if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { + data += 3; + length -= 3; + } + } + + if (length == 0) return args.GetReturnValue().SetEmptyString(); + + Local error; + MaybeLocal maybe_ret = + StringBytes::Encode(env->isolate(), data, length, UTF8, &error); + Local ret; + + if (!maybe_ret.ToLocal(&ret)) { + CHECK(!error.IsEmpty()); + env->isolate()->ThrowException(error); + return; + } + + args.GetReturnValue().Set(ret); +} + +void BindingData::Initialize(Local target, + Local unused, + Local context, + void* priv) { + Environment* env = Environment::GetCurrent(context); + BindingData* const binding_data = + env->AddBindingData(context, target); + if (binding_data == nullptr) return; + + SetMethod(context, target, "encodeInto", EncodeInto); + SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String); + SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8); +} + +void BindingData::RegisterTimerExternalReferences( + ExternalReferenceRegistry* registry) { + registry->Register(EncodeInto); + registry->Register(EncodeUtf8String); + registry->Register(DecodeUTF8); +} + +} // namespace encoding_binding +} // namespace node + +NODE_BINDING_CONTEXT_AWARE_INTERNAL( + encoding_binding, node::encoding_binding::BindingData::Initialize) +NODE_BINDING_EXTERNAL_REFERENCE( + encoding_binding, + node::encoding_binding::BindingData::RegisterTimerExternalReferences) diff --git a/src/encoding_binding.h b/src/encoding_binding.h new file mode 100644 index 00000000000000..472b6bb4ad03ad --- /dev/null +++ b/src/encoding_binding.h @@ -0,0 +1,46 @@ +#ifndef SRC_ENCODING_BINDING_H_ +#define SRC_ENCODING_BINDING_H_ + +#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#include +#include "aliased_buffer.h" +#include "node_snapshotable.h" +#include "v8-fast-api-calls.h" + +namespace node { +class ExternalReferenceRegistry; + +namespace encoding_binding { +class BindingData : public SnapshotableObject { + public: + BindingData(Environment* env, v8::Local obj); + + using InternalFieldInfo = InternalFieldInfoBase; + + SERIALIZABLE_OBJECT_METHODS() + SET_BINDING_ID(encoding_binding_data) + + SET_NO_MEMORY_INFO() + SET_SELF_SIZE(BindingData) + SET_MEMORY_INFO_NAME(BindingData) + + static void EncodeInto(const v8::FunctionCallbackInfo& args); + static void EncodeUtf8String(const v8::FunctionCallbackInfo& args); + static void DecodeUTF8(const v8::FunctionCallbackInfo& args); + + static void Initialize(v8::Local target, + v8::Local unused, + v8::Local context, + void* priv); + static void RegisterTimerExternalReferences( + ExternalReferenceRegistry* registry); +}; + +} // namespace encoding_binding + +} // namespace node + +#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS + +#endif // SRC_ENCODING_BINDING_H_ diff --git a/src/node_binding.cc b/src/node_binding.cc index db607ea298edf5..f9c3af892da864 100644 --- a/src/node_binding.cc +++ b/src/node_binding.cc @@ -42,6 +42,7 @@ V(config) \ V(contextify) \ V(credentials) \ + V(encoding_binding) \ V(errors) \ V(fs) \ V(fs_dir) \ diff --git a/src/node_buffer.cc b/src/node_buffer.cc index fae5b8431926ee..07f040ecea3750 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -568,60 +568,6 @@ void StringSlice(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(ret); } -// Convert the input into an encoded string -void DecodeUTF8(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); // list, flags - - CHECK_GE(args.Length(), 1); - - if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() || - args[0]->IsArrayBufferView())) { - return node::THROW_ERR_INVALID_ARG_TYPE( - env->isolate(), - "The \"list\" argument must be an instance of SharedArrayBuffer, " - "ArrayBuffer or ArrayBufferView."); - } - - ArrayBufferViewContents buffer(args[0]); - - bool ignore_bom = args[1]->IsTrue(); - bool has_fatal = args[2]->IsTrue(); - - const char* data = buffer.data(); - size_t length = buffer.length(); - - if (has_fatal) { - auto result = simdutf::validate_utf8_with_errors(data, length); - - if (result.error) { - return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA( - env->isolate(), "The encoded data was not valid for encoding utf-8"); - } - } - - if (!ignore_bom && length >= 3) { - if (memcmp(data, "\xEF\xBB\xBF", 3) == 0) { - data += 3; - length -= 3; - } - } - - if (length == 0) return args.GetReturnValue().SetEmptyString(); - - Local error; - MaybeLocal maybe_ret = - StringBytes::Encode(env->isolate(), data, length, UTF8, &error); - Local ret; - - if (!maybe_ret.ToLocal(&ret)) { - CHECK(!error.IsEmpty()); - env->isolate()->ThrowException(error); - return; - } - - args.GetReturnValue().Set(ret); -} - // bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd]) void Copy(const FunctionCallbackInfo &args) { Environment* env = Environment::GetCurrent(args); @@ -1173,72 +1119,6 @@ void Swap64(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(args[0]); } - -// Encode a single string to a UTF-8 Uint8Array (not Buffer). -// Used in TextEncoder.prototype.encode. -static void EncodeUtf8String(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - - Local str = args[0].As(); - size_t length = str->Utf8Length(isolate); - - Local ab; - { - NoArrayBufferZeroFillScope no_zero_fill_scope(env->isolate_data()); - std::unique_ptr bs = - ArrayBuffer::NewBackingStore(isolate, length); - - CHECK(bs); - - str->WriteUtf8(isolate, - static_cast(bs->Data()), - -1, // We are certain that `data` is sufficiently large - nullptr, - String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); - - ab = ArrayBuffer::New(isolate, std::move(bs)); - } - - auto array = Uint8Array::New(ab, 0, length); - args.GetReturnValue().Set(array); -} - - -static void EncodeInto(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); - CHECK_GE(args.Length(), 3); - CHECK(args[0]->IsString()); - CHECK(args[1]->IsUint8Array()); - CHECK(args[2]->IsUint32Array()); - - Local source = args[0].As(); - - Local dest = args[1].As(); - Local buf = dest->Buffer(); - char* write_result = static_cast(buf->Data()) + dest->ByteOffset(); - size_t dest_length = dest->ByteLength(); - - // results = [ read, written ] - Local result_arr = args[2].As(); - uint32_t* results = reinterpret_cast( - static_cast(result_arr->Buffer()->Data()) + - result_arr->ByteOffset()); - - int nchars; - int written = source->WriteUtf8( - isolate, - write_result, - dest_length, - &nchars, - String::NO_NULL_TERMINATION | String::REPLACE_INVALID_UTF8); - results[0] = nchars; - results[1] = written; -} - static void IsUtf8(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_EQ(args.Length(), 1); @@ -1382,7 +1262,6 @@ void Initialize(Local target, SetMethod(context, target, "setBufferPrototype", SetBufferPrototype); SetMethodNoSideEffect(context, target, "createFromString", CreateFromString); - SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8); SetFastMethodNoSideEffect(context, target, @@ -1404,9 +1283,6 @@ void Initialize(Local target, SetMethod(context, target, "swap32", Swap32); SetMethod(context, target, "swap64", Swap64); - SetMethod(context, target, "encodeInto", EncodeInto); - SetMethodNoSideEffect(context, target, "encodeUtf8String", EncodeUtf8String); - SetMethodNoSideEffect(context, target, "isUtf8", IsUtf8); SetMethodNoSideEffect(context, target, "isAscii", IsAscii); @@ -1447,7 +1323,6 @@ void Initialize(Local target, void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(SetBufferPrototype); registry->Register(CreateFromString); - registry->Register(DecodeUTF8); registry->Register(SlowByteLengthUtf8); registry->Register(fast_byte_length_utf8.GetTypeInfo()); @@ -1464,9 +1339,6 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(Swap32); registry->Register(Swap64); - registry->Register(EncodeInto); - registry->Register(EncodeUtf8String); - registry->Register(IsUtf8); registry->Register(IsAscii); diff --git a/src/node_external_reference.h b/src/node_external_reference.h index 0bbcb53315981a..f64a296a5dd191 100644 --- a/src/node_external_reference.h +++ b/src/node_external_reference.h @@ -77,6 +77,7 @@ class ExternalReferenceRegistry { V(cares_wrap) \ V(contextify) \ V(credentials) \ + V(encoding_binding) \ V(env_var) \ V(errors) \ V(fs) \ diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index bae1c3e74e7fa0..693fad8dda40fd 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -5,6 +5,7 @@ #include #include "base_object-inl.h" #include "debug_utils-inl.h" +#include "encoding_binding.h" #include "env-inl.h" #include "node_blob.h" #include "node_builtins.h" diff --git a/test/parallel/test-bootstrap-modules.js b/test/parallel/test-bootstrap-modules.js index c0fd99055de445..c2879ee76fc179 100644 --- a/test/parallel/test-bootstrap-modules.js +++ b/test/parallel/test-bootstrap-modules.js @@ -10,6 +10,7 @@ const assert = require('assert'); const expectedModules = new Set([ 'Internal Binding builtins', + 'Internal Binding encoding_binding', 'Internal Binding errors', 'Internal Binding util', 'NativeModule internal/errors',