Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

buffer: improve fill & normalizeEncoding performance #18790

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions benchmark/buffers/buffer-fill.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
'use strict';
const common = require('../common.js');

const bench = common.createBenchmark(main, {
type: [
'fill(0)',
'fill("")',
'fill(100)',
'fill(400)',
'fill("t")',
'fill("test")',
'fill("t", "utf8")',
'fill("t", 0, "utf8")',
'fill("t", 0)',
'fill(Buffer.alloc(1), 0)'
],
size: [2 ** 8, 2 ** 13, 2 ** 16],
n: [2e4]
});

function main({ n, type, size }) {
const buffer = Buffer.allocUnsafe(size);
const testFunction = new Function('b', `
for (var i = 0; i < ${n}; i++) {
b.${type || 'fill(0)'};
}
`);
bench.start();
testFunction(buffer);
bench.end(n);
}
43 changes: 43 additions & 0 deletions benchmark/buffers/buffer-normalize-encoding.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
'use strict';

const common = require('../common.js');

const bench = common.createBenchmark(main, {
encoding: [
'ascii',
'ASCII',
'base64',
'BASE64',
'binary',
'BINARY',
'hex',
'HEX',
'latin1',
'LATIN1',
'ucs-2',
'UCS-2',
'ucs2',
'UCS2',
'utf-16le',
'UTF-16LE',
'utf-8',
'UTF-8',
'utf16le',
'UTF16LE',
'utf8',
'UTF8'
],
n: [1e6]
}, {
flags: ['--expose-internals']
});

function main({ encoding, n }) {
const { normalizeEncoding } = require('internal/util');

bench.start();
for (var i = 0; i < n; i++) {
normalizeEncoding(encoding);
}
bench.end(n);
}
File renamed without changes.
3 changes: 3 additions & 0 deletions doc/api/buffer.md
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,9 @@ console.log(buf1.equals(buf3));
<!-- YAML
added: v0.5.0
changes:
- version: REPLACEME
pr-url: https://github.com/nodejs/node/pull/REPLACEME
description: Negative `end` values throw an `ERR_INDEX_OUT_OF_RANGE` error.
- version: REPLACEME
pr-url: https://github.com/nodejs/node/pull/18129
description: Attempting to fill a non-zero length buffer with a zero length
Expand Down
94 changes: 49 additions & 45 deletions lib/buffer.js
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ function assertSize(size) {
err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);
}

if (err) {
if (err !== null) {
Error.captureStackTrace(err, assertSize);
throw err;
}
Expand All @@ -254,16 +254,8 @@ function assertSize(size) {
**/
Buffer.alloc = function alloc(size, fill, encoding) {
assertSize(size);
if (size > 0 && fill !== undefined) {
// Since we are filling anyway, don't zero fill initially.
// Only pay attention to encoding if it's a string. This
// prevents accidentally sending in a number that would
// be interpreted as a start offset.
if (typeof encoding !== 'string')
encoding = undefined;
const ret = createUnsafeBuffer(size);
if (fill_(ret, fill, encoding) > 0)
return ret;
if (fill !== undefined && size > 0) {
return _fill(createUnsafeBuffer(size), fill, encoding);
}
return new FastBuffer(size);
};
Expand Down Expand Up @@ -435,8 +427,8 @@ Buffer.compare = function compare(a, b) {


Buffer.isEncoding = function isEncoding(encoding) {
return typeof encoding === 'string' &&
typeof normalizeEncoding(encoding) === 'string';
return typeof encoding === 'string' && encoding.length !== 0 &&
normalizeEncoding(encoding) !== undefined;
};
Buffer[kIsEncodingSymbol] = Buffer.isEncoding;

Expand Down Expand Up @@ -834,14 +826,12 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
// buffer.fill(buffer[, offset[, end]])
// buffer.fill(string[, offset[, end]][, encoding])
Buffer.prototype.fill = function fill(val, start, end, encoding) {
fill_(this, val, start, end, encoding);
return this;
return _fill(this, val, start, end, encoding);
};

function fill_(buf, val, start, end, encoding) {
// Handle string cases:
function _fill(buf, val, start, end, encoding) {
if (typeof val === 'string') {
if (typeof start === 'string') {
if (start === undefined || typeof start === 'string') {
encoding = start;
start = 0;
end = buf.length;
Expand All @@ -850,46 +840,60 @@ function fill_(buf, val, start, end, encoding) {
end = buf.length;
}

if (encoding !== undefined && typeof encoding !== 'string') {
throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'encoding',
'string', encoding);
}
var normalizedEncoding = normalizeEncoding(encoding);
const normalizedEncoding = normalizeEncoding(encoding);
if (normalizedEncoding === undefined) {
if (typeof encoding !== 'string') {
throw new errors.TypeError('ERR_INVALID_ARG_TYPE', 'encoding', 'string',
encoding);
}
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', encoding);
}

if (val.length === 0) {
// Previously, if val === '', the Buffer would not fill,
// which is rather surprising.
// If val === '' default to zero.
val = 0;
} else if (val.length === 1) {
var code = val.charCodeAt(0);
if ((normalizedEncoding === 'utf8' && code < 128) ||
normalizedEncoding === 'latin1') {
// Fast path: If `val` fits into a single byte, use that numeric value.
val = code;
// Fast path: If `val` fits into a single byte, use that numeric value.
if (normalizedEncoding === 'utf8') {
const code = val.charCodeAt(0);
if (code < 128) {
val = code;
}
} else if (normalizedEncoding === 'latin1') {
val = val.charCodeAt(0);
}
}
} else if (typeof val === 'number') {
val = val & 255;
} else {
encoding = undefined;
}

// Invalid ranges are not set to a default, so can range check early.
if (start < 0 || end > buf.length)
throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');

if (end <= start)
return 0;

start = start >>> 0;
end = end === undefined ? buf.length : end >>> 0;
const fillLength = bindingFill(buf, val, start, end, encoding);
if (start === undefined) {
start = 0;
end = buf.length;
} else {
// Invalid ranges are not set to a default, so can range check early.
if (end === undefined) {
if (start < 0)
throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
end = buf.length;
} else {
if (start < 0 || end > buf.length || end < 0)
throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
end = end >>> 0;
}
start = start >>> 0;
if (start >= end)
return buf;
}

if (fillLength === -1)
throw new errors.TypeError('ERR_INVALID_ARG_VALUE', 'value', val);
const res = bindingFill(buf, val, start, end, encoding);
if (res < 0) {
if (res === -1)
throw new errors.TypeError('ERR_INVALID_ARG_VALUE', 'value', val);
throw new errors.RangeError('ERR_INDEX_OUT_OF_RANGE');
}

return fillLength;
return buf;
}


Expand Down
77 changes: 50 additions & 27 deletions lib/internal/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,36 +96,59 @@ function assertCrypto() {
throw new errors.Error('ERR_NO_CRYPTO');
}

// The loop should only run at most twice, retrying with lowercased enc
// if there is no match in the first pass.
// We use a loop instead of branching to retry with a helper
// function in order to avoid the performance hit.
// Return undefined if there is no match.
// Move the "slow cases" to a separate function to make sure this function gets
// inlined properly. That prioritizes the common case.
function normalizeEncoding(enc) {
if (!enc) return 'utf8';
let retried;
while (true) {
switch (enc) {
case 'utf8':
case 'utf-8':
return 'utf8';
case 'ucs2':
case 'ucs-2':
case 'utf16le':
case 'utf-16le':
if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8';
return slowCases(enc);
}

function slowCases(enc) {
switch (enc.length) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm surprised this improves performance noticeably.

case 4:
if (enc === 'UTF8') return 'utf8';
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
enc = `${enc}`.toLowerCase();
if (enc === 'utf8') return 'utf8';
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
break;
case 3:
if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex')
return 'hex';
break;
case 5:
if (enc === 'ascii') return 'ascii';
if (enc === 'ucs-2') return 'utf16le';
if (enc === 'UTF-8') return 'utf8';
if (enc === 'ASCII') return 'ascii';
if (enc === 'UCS-2') return 'utf16le';
enc = `${enc}`.toLowerCase();
if (enc === 'utf-8') return 'utf8';
if (enc === 'ascii') return 'ascii';
if (enc === 'usc-2') return 'utf16le';
break;
case 6:
if (enc === 'base64') return 'base64';
if (enc === 'latin1' || enc === 'binary') return 'latin1';
if (enc === 'BASE64') return 'base64';
if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1';
enc = `${enc}`.toLowerCase();
if (enc === 'base64') return 'base64';
if (enc === 'latin1' || enc === 'binary') return 'latin1';
break;
case 7:
if (enc === 'utf16le' || enc === 'UTF16LE' ||
`${enc}`.toLowerCase() === 'utf16le')
return 'utf16le';
case 'latin1':
case 'binary':
return 'latin1';
case 'base64':
case 'ascii':
case 'hex':
return enc;
default:
if (retried) return; // undefined
enc = ('' + enc).toLowerCase();
retried = true;
}
break;
case 8:
if (enc === 'utf-16le' || enc === 'UTF-16LE' ||
`${enc}`.toLowerCase() === 'utf-16le')
return 'utf16le';
break;
default:
if (enc === '') return 'utf8';
}
}

Expand Down
10 changes: 6 additions & 4 deletions lib/string_decoder.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder');
// modules monkey-patch it to support additional encodings
function normalizeEncoding(enc) {
const nenc = internalUtil.normalizeEncoding(enc);
if (typeof nenc !== 'string' &&
(Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
return nenc || enc;
if (nenc === undefined) {
if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
return enc;
}
return nenc;
}

const encodingsMap = {};
Expand Down
17 changes: 6 additions & 11 deletions src/node_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -571,10 +571,10 @@ void Fill(const FunctionCallbackInfo<Value>& args) {
Local<String> str_obj;
size_t str_length;
enum encoding enc;
THROW_AND_RETURN_IF_OOB(start <= end);
THROW_AND_RETURN_IF_OOB(fill_length + start <= ts_obj_length);

args.GetReturnValue().Set(static_cast<double>(fill_length));
// OOB Check. Throw the error in JS.
if (start > end || fill_length + start > ts_obj_length)
return args.GetReturnValue().Set(-2);

// First check if Buffer has been passed.
if (Buffer::HasInstance(args[1])) {
Expand All @@ -593,29 +593,24 @@ void Fill(const FunctionCallbackInfo<Value>& args) {

str_obj = args[1]->ToString(env->context()).ToLocalChecked();
enc = ParseEncoding(env->isolate(), args[4], UTF8);
str_length =
enc == UTF8 ? str_obj->Utf8Length() :
enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();

if (str_length == 0) {
args.GetReturnValue().Set(0);
return;
}

// Can't use StringBytes::Write() in all cases. For example if attempting
// to write a two byte character into a one byte Buffer.
if (enc == UTF8) {
str_length = str_obj->Utf8Length();
node::Utf8Value str(env->isolate(), args[1]);
memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));

} else if (enc == UCS2) {
str_length = str_obj->Length() * sizeof(uint16_t);
node::TwoByteValue str(env->isolate(), args[1]);
if (IsBigEndian())
SwapBytes16(reinterpret_cast<char*>(&str[0]), str_length);

memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));

} else {
str_length = str_obj->Length();
// Write initial String to Buffer, then use that memory to copy remainder
// of string. Correct the string length for cases like HEX where less than
// the total string length is written.
Expand Down
Loading