diff --git a/benchmark/buffers/buffer-normalize-encoding.js b/benchmark/buffers/buffer-normalize-encoding.js new file mode 100644 index 00000000000000..7a820465bd5d6b --- /dev/null +++ b/benchmark/buffers/buffer-normalize-encoding.js @@ -0,0 +1,43 @@ +'use strict'; + +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + encoding: [ + 'ascii', + 'ASCII', + 'base64', + 'BASE64', + 'binary', + 'BINARY', + 'hex', + 'HEX', + 'latin1', + 'LATIN1', + 'ucs-2', + 'UCS-2', + 'ucs2', + 'UCS2', + 'utf-16le', + 'UTF-16LE', + 'utf-8', + 'UTF-8', + 'utf16le', + 'UTF16LE', + 'utf8', + 'UTF8' + ], + n: [1e6] +}, { + flags: ['--expose-internals'] +}); + +function main({ encoding, n }) { + const { normalizeEncoding } = require('internal/util'); + + bench.start(); + for (var i = 0; i < n; i++) { + normalizeEncoding(encoding); + } + bench.end(n); +} diff --git a/lib/buffer.js b/lib/buffer.js index 07bd63c0ae5b97..68cebedcc97ef4 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -242,7 +242,7 @@ function assertSize(size) { err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size); } - if (err) { + if (err !== null) { Error.captureStackTrace(err, assertSize); throw err; } @@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) { Buffer.isEncoding = function isEncoding(encoding) { return typeof encoding === 'string' && - typeof normalizeEncoding(encoding) === 'string'; + normalizeEncoding(encoding) !== undefined; }; Buffer[kIsEncodingSymbol] = Buffer.isEncoding; diff --git a/lib/internal/util.js b/lib/internal/util.js index 2516b84f342cea..b144063ee50100 100644 --- a/lib/internal/util.js +++ b/lib/internal/util.js @@ -96,36 +96,59 @@ function assertCrypto() { throw new errors.Error('ERR_NO_CRYPTO'); } -// The loop should only run at most twice, retrying with lowercased enc -// if there is no match in the first pass. -// We use a loop instead of branching to retry with a helper -// function in order to avoid the performance hit. // Return undefined if there is no match. +// Move the "slow cases" to a separate function to make sure this function gets +// inlined properly. That prioritizes the common case. function normalizeEncoding(enc) { - if (enc == null || enc === '') return 'utf8'; - let retried; - while (true) { - switch (enc) { - case 'utf8': - case 'utf-8': - return 'utf8'; - case 'ucs2': - case 'ucs-2': - case 'utf16le': - case 'utf-16le': + if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8'; + return slowCases(enc); +} + +function slowCases(enc) { + switch (enc.length) { + case 4: + if (enc === 'UTF8') return 'utf8'; + if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le'; + enc = `${enc}`.toLowerCase(); + if (enc === 'utf8') return 'utf8'; + if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le'; + break; + case 3: + if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex') + return 'hex'; + break; + case 5: + if (enc === 'ascii') return 'ascii'; + if (enc === 'ucs-2') return 'utf16le'; + if (enc === 'UTF-8') return 'utf8'; + if (enc === 'ASCII') return 'ascii'; + if (enc === 'UCS-2') return 'utf16le'; + enc = `${enc}`.toLowerCase(); + if (enc === 'utf-8') return 'utf8'; + if (enc === 'ascii') return 'ascii'; + if (enc === 'usc-2') return 'utf16le'; + break; + case 6: + if (enc === 'base64') return 'base64'; + if (enc === 'latin1' || enc === 'binary') return 'latin1'; + if (enc === 'BASE64') return 'base64'; + if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1'; + enc = `${enc}`.toLowerCase(); + if (enc === 'base64') return 'base64'; + if (enc === 'latin1' || enc === 'binary') return 'latin1'; + break; + case 7: + if (enc === 'utf16le' || enc === 'UTF16LE' || + `${enc}`.toLowerCase() === 'utf16le') return 'utf16le'; - case 'latin1': - case 'binary': - return 'latin1'; - case 'base64': - case 'ascii': - case 'hex': - return enc; - default: - if (retried) return; // undefined - enc = ('' + enc).toLowerCase(); - retried = true; - } + break; + case 8: + if (enc === 'utf-16le' || enc === 'UTF-16LE' || + `${enc}`.toLowerCase() === 'utf-16le') + return 'utf16le'; + break; + default: + if (enc === '') return 'utf8'; } } diff --git a/lib/string_decoder.js b/lib/string_decoder.js index 04d31b2607c63e..18097be0e6dd08 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder'); // modules monkey-patch it to support additional encodings function normalizeEncoding(enc) { const nenc = internalUtil.normalizeEncoding(enc); - if (typeof nenc !== 'string' && - (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))) - throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc); - return nenc || enc; + if (nenc === undefined) { + if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)) + throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc); + return enc; + } + return nenc; } const encodingsMap = {};