Skip to content

Commit 5b1d172

Browse files
anonrigaddaleax
andcommitted
util: add fast path for utf8 encoding
Co-authored-by: Anna Henningsen <anna@addaleax.net>
1 parent d55a7c3 commit 5b1d172

File tree

2 files changed

+81
-5
lines changed

2 files changed

+81
-5
lines changed

lib/internal/encoding.js

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// https://encoding.spec.whatwg.org
55

66
const {
7+
Boolean,
78
ObjectCreate,
89
ObjectDefineProperties,
910
ObjectGetOwnPropertyDescriptors,
@@ -29,6 +30,8 @@ const kFlags = Symbol('flags');
2930
const kEncoding = Symbol('encoding');
3031
const kDecoder = Symbol('decoder');
3132
const kEncoder = Symbol('encoder');
33+
const kUTF8FastPath = Symbol('kUTF8FastPath');
34+
const kIgnoreBOM = Symbol('kIgnoreBOM');
3235

3336
const {
3437
getConstructorOf,
@@ -50,7 +53,8 @@ const {
5053

5154
const {
5255
encodeInto,
53-
encodeUtf8String
56+
encodeUtf8String,
57+
decodeUTF8,
5458
} = internalBinding('buffer');
5559

5660
let Buffer;
@@ -398,19 +402,40 @@ function makeTextDecoderICU() {
398402
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
399403
}
400404

401-
const handle = getConverter(enc, flags);
402-
if (handle === undefined)
403-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
405+
// Only support fast path for UTF-8 without FATAL flag
406+
const fastPathAvailable = enc === 'utf-8' && !(options?.fatal);
404407

405408
this[kDecoder] = true;
406-
this[kHandle] = handle;
407409
this[kFlags] = flags;
408410
this[kEncoding] = enc;
411+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
412+
this[kUTF8FastPath] = fastPathAvailable;
413+
this[kHandle] = undefined;
414+
415+
if (!fastPathAvailable) {
416+
this.#prepareConverter();
417+
}
409418
}
410419

420+
#prepareConverter() {
421+
if (this[kHandle] !== undefined) return;
422+
const handle = getConverter(this[kEncoding], this[kFlags]);
423+
if (handle === undefined)
424+
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
425+
this[kHandle] = handle;
426+
}
411427

412428
decode(input = empty, options = kEmptyObject) {
413429
validateDecoder(this);
430+
431+
this[kUTF8FastPath] &&= !(options?.stream);
432+
433+
if (this[kUTF8FastPath]) {
434+
return decodeUTF8(input, this[kIgnoreBOM]);
435+
}
436+
437+
this.#prepareConverter();
438+
414439
if (!isAnyArrayBuffer(input) && !isArrayBufferView(input)) {
415440
throw new ERR_INVALID_ARG_TYPE('input',
416441
['ArrayBuffer', 'ArrayBufferView'],

src/node_buffer.cc

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "node_blob.h"
2525
#include "node_errors.h"
2626
#include "node_external_reference.h"
27+
#include "node_i18n.h"
2728
#include "node_internals.h"
2829

2930
#include "env-inl.h"
@@ -565,6 +566,54 @@ void StringSlice(const FunctionCallbackInfo<Value>& args) {
565566
args.GetReturnValue().Set(ret);
566567
}
567568

569+
// Convert the input into an encoded string
570+
void DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
571+
Environment* env = Environment::GetCurrent(args); // list, flags
572+
573+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
574+
args[0]->IsArrayBufferView())) {
575+
return node::THROW_ERR_INVALID_ARG_TYPE(
576+
env->isolate(),
577+
"The \"list\" argument must be an instance of SharedArrayBuffer, "
578+
"ArrayBuffer or ArrayBufferView.");
579+
}
580+
581+
ArrayBufferViewContents<char> buffer(args[0]);
582+
583+
CHECK(args[1]->IsBoolean());
584+
bool ignore_bom = args[1]->BooleanValue(env->isolate());
585+
586+
const char* data = buffer.data();
587+
auto beginning = 0;
588+
auto length = buffer.length();
589+
590+
if (buffer.length() == 0) return args.GetReturnValue().SetEmptyString();
591+
592+
if (!ignore_bom) {
593+
char bom[] = "\xEF\xBB\xBF";
594+
595+
if (strncmp(data, bom, 0) == 0 && strncmp(data, bom, 1) == 0 &&
596+
strncmp(data, bom, 2) == 0) {
597+
beginning += 3;
598+
length -= 3;
599+
}
600+
}
601+
602+
auto output = data + beginning;
603+
604+
Local<Value> error;
605+
MaybeLocal<Value> maybe_ret =
606+
StringBytes::Encode(env->isolate(), output, length, UTF8, &error);
607+
Local<Value> ret;
608+
609+
if (!maybe_ret.ToLocal(&ret)) {
610+
CHECK(!error.IsEmpty());
611+
env->isolate()->ThrowException(error);
612+
return;
613+
}
614+
615+
args.GetReturnValue().Set(ret);
616+
}
568617

569618
// bytesCopied = copy(buffer, target[, targetStart][, sourceStart][, sourceEnd])
570619
void Copy(const FunctionCallbackInfo<Value> &args) {
@@ -1282,6 +1331,7 @@ void Initialize(Local<Object> target,
12821331

12831332
SetMethod(context, target, "setBufferPrototype", SetBufferPrototype);
12841333
SetMethodNoSideEffect(context, target, "createFromString", CreateFromString);
1334+
SetMethodNoSideEffect(context, target, "decodeUTF8", DecodeUTF8);
12851335

12861336
SetMethodNoSideEffect(context, target, "byteLengthUtf8", ByteLengthUtf8);
12871337
SetMethod(context, target, "copy", Copy);
@@ -1339,6 +1389,7 @@ void Initialize(Local<Object> target,
13391389
void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
13401390
registry->Register(SetBufferPrototype);
13411391
registry->Register(CreateFromString);
1392+
registry->Register(DecodeUTF8);
13421393

13431394
registry->Register(ByteLengthUtf8);
13441395
registry->Register(Copy);

0 commit comments

Comments
 (0)