From 8fd75fb9b56576d0f9f9cbaee19f86794427c38f Mon Sep 17 00:00:00 2001 From: James M Snell Date: Mon, 2 Oct 2017 14:42:55 -0700 Subject: [PATCH] util: graduate TextEncoder/TextDecoder, tests Add tests ported from Web Platform Tests. Graduate TextEncoder / TextDecoder from experimental PR-URL: https://github.com/nodejs/node/pull/15743 Reviewed-By: Colin Ihrig Reviewed-By: Refael Ackermann Reviewed-By: Anna Henningsen Reviewed-By: Joyee Cheung Reviewed-By: Timothy Gu --- doc/api/util.md | 4 - lib/internal/encoding.js | 20 ---- .../test-whatwg-encoding-fatal-streaming.js | 76 +++++++++++++ .../test-whatwg-encoding-internals.js | 1 + .../test-whatwg-encoding-surrogates-utf8.js | 56 ++++++++++ .../test-whatwg-encoding-textdecoder-fatal.js | 93 ++++++++++++++++ ...t-whatwg-encoding-textdecoder-ignorebom.js | 50 +++++++++ ...t-whatwg-encoding-textdecoder-streaming.js | 49 +++++++++ ...g-encoding-textdecoder-utf16-surrogates.js | 63 +++++++++++ .../test-whatwg-encoding-textdecoder.js | 104 ++++++++++++++++++ ...g-encoding-textencoder-utf16-surrogates.js | 52 +++++++++ .../test-whatwg-encoding-textencoder.js | 23 +++- 12 files changed, 563 insertions(+), 28 deletions(-) create mode 100644 test/parallel/test-whatwg-encoding-fatal-streaming.js create mode 100644 test/parallel/test-whatwg-encoding-surrogates-utf8.js create mode 100644 test/parallel/test-whatwg-encoding-textdecoder-fatal.js create mode 100644 test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js create mode 100644 test/parallel/test-whatwg-encoding-textdecoder-streaming.js create mode 100644 test/parallel/test-whatwg-encoding-textdecoder-utf16-surrogates.js create mode 100644 test/parallel/test-whatwg-encoding-textencoder-utf16-surrogates.js diff --git a/doc/api/util.md b/doc/api/util.md index c0ca4b930d9992..6619ee2ad60062 100644 --- a/doc/api/util.md +++ b/doc/api/util.md @@ -551,8 +551,6 @@ see [Custom promisified functions][]. added: v8.3.0 --> -> Stability: 1 - Experimental - An implementation of the [WHATWG Encoding Standard][] `TextDecoder` API. ```js @@ -690,8 +688,6 @@ mark. added: v8.3.0 --> -> Stability: 1 - Experimental - An implementation of the [WHATWG Encoding Standard][] `TextEncoder` API. All instances of `TextEncoder` only support UTF-8 encoding. diff --git a/lib/internal/encoding.js b/lib/internal/encoding.js index 5c2e2072587d08..09242be8dcff6f 100644 --- a/lib/internal/encoding.js +++ b/lib/internal/encoding.js @@ -10,11 +10,6 @@ const kEncoding = Symbol('encoding'); const kDecoder = Symbol('decoder'); const kEncoder = Symbol('encoder'); -let warned = false; -const experimental = - 'The WHATWG Encoding Standard implementation is an experimental API. It ' + - 'should not yet be used in production applications.'; - const { getConstructorOf, customInspectSymbol: inspect @@ -289,11 +284,6 @@ function getEncodingFromLabel(label) { class TextEncoder { constructor() { - if (!warned) { - warned = true; - process.emitWarning(experimental, 'ExperimentalWarning'); - } - this[kEncoder] = true; } @@ -353,11 +343,6 @@ function makeTextDecoderICU() { class TextDecoder { constructor(encoding = 'utf-8', options = {}) { - if (!warned) { - warned = true; - process.emitWarning(experimental, 'ExperimentalWarning'); - } - encoding = `${encoding}`; if (typeof options !== 'object') throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object'); @@ -430,11 +415,6 @@ function makeTextDecoderJS() { class TextDecoder { constructor(encoding = 'utf-8', options = {}) { - if (!warned) { - warned = true; - process.emitWarning(experimental, 'ExperimentalWarning'); - } - encoding = `${encoding}`; if (typeof options !== 'object') throw new errors.Error('ERR_INVALID_ARG_TYPE', 'options', 'object'); diff --git a/test/parallel/test-whatwg-encoding-fatal-streaming.js b/test/parallel/test-whatwg-encoding-fatal-streaming.js new file mode 100644 index 00000000000000..0b510126affe3a --- /dev/null +++ b/test/parallel/test-whatwg-encoding-fatal-streaming.js @@ -0,0 +1,76 @@ +'use strict'; + +// From: https://github.com/w3c/web-platform-tests/blob/d74324b53c/encoding/textdecoder-fatal-streaming.html + +const common = require('../common'); + +if (!common.hasIntl) + common.skip('missing Intl'); + +const assert = require('assert'); +const { + TextDecoder +} = require('util'); + + +{ + [ + { encoding: 'utf-8', sequence: [0xC0] }, + { encoding: 'utf-16le', sequence: [0x00] }, + { encoding: 'utf-16be', sequence: [0x00] } + ].forEach((testCase) => { + const data = new Uint8Array([testCase.sequence]); + common.expectsError( + () => { + const decoder = new TextDecoder(testCase.encoding, { fatal: true }); + decoder.decode(data); + }, { + code: 'ERR_ENCODING_INVALID_ENCODED_DATA', + type: TypeError, + message: + `The encoded data was not valid for encoding ${testCase.encoding}` + } + ); + + assert.strictEqual( + new TextDecoder(testCase.encoding).decode(data), + '\uFFFD' + ); + }); +} + +{ + const decoder = new TextDecoder('utf-16le', { fatal: true }); + const odd = new Uint8Array([0x00]); + const even = new Uint8Array([0x00, 0x00]); + + assert.strictEqual(decoder.decode(odd, { stream: true }), ''); + assert.strictEqual(decoder.decode(odd), '\u0000'); + + common.expectsError( + () => { + decoder.decode(even, { stream: true }); + decoder.decode(odd); + }, { + code: 'ERR_ENCODING_INVALID_ENCODED_DATA', + type: TypeError, + message: + 'The encoded data was not valid for encoding utf-16le' + } + ); + + common.expectsError( + () => { + decoder.decode(odd, { stream: true }); + decoder.decode(even); + }, { + code: 'ERR_ENCODING_INVALID_ENCODED_DATA', + type: TypeError, + message: + 'The encoded data was not valid for encoding utf-16le' + } + ); + + assert.strictEqual(decoder.decode(even, { stream: true }), '\u0000'); + assert.strictEqual(decoder.decode(even), '\u0000'); +} diff --git a/test/parallel/test-whatwg-encoding-internals.js b/test/parallel/test-whatwg-encoding-internals.js index d5bf07acb31803..d025642365ff73 100644 --- a/test/parallel/test-whatwg-encoding-internals.js +++ b/test/parallel/test-whatwg-encoding-internals.js @@ -2,6 +2,7 @@ 'use strict'; require('../common'); + const assert = require('assert'); const { getEncodingFromLabel } = require('internal/encoding'); diff --git a/test/parallel/test-whatwg-encoding-surrogates-utf8.js b/test/parallel/test-whatwg-encoding-surrogates-utf8.js new file mode 100644 index 00000000000000..5fbdd0d83b944d --- /dev/null +++ b/test/parallel/test-whatwg-encoding-surrogates-utf8.js @@ -0,0 +1,56 @@ +'use strict'; + +// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/api-surrogates-utf8.html + +require('../common'); + +const assert = require('assert'); +const { + TextDecoder, + TextEncoder +} = require('util'); + +const badStrings = [ + { + input: 'abc123', + expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33], + decoded: 'abc123', + name: 'Sanity check' + }, + { + input: '\uD800', + expected: [0xef, 0xbf, 0xbd], + decoded: '\uFFFD', + name: 'Surrogate half (low)' + }, + { + input: '\uDC00', + expected: [0xef, 0xbf, 0xbd], + decoded: '\uFFFD', + name: 'Surrogate half (high)' + }, + { + input: 'abc\uD800123', + expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33], + decoded: 'abc\uFFFD123', + name: 'Surrogate half (low), in a string' + }, + { + input: 'abc\uDC00123', + expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33], + decoded: 'abc\uFFFD123', + name: 'Surrogate half (high), in a string' + }, + { + input: '\uDC00\uD800', + expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd], + decoded: '\uFFFD\uFFFD', + name: 'Wrong order' + } +]; + +badStrings.forEach((t) => { + const encoded = new TextEncoder().encode(t.input); + assert.deepStrictEqual([].slice.call(encoded), t.expected); + assert.strictEqual(new TextDecoder('utf-8').decode(encoded), t.decoded); +}); diff --git a/test/parallel/test-whatwg-encoding-textdecoder-fatal.js b/test/parallel/test-whatwg-encoding-textdecoder-fatal.js new file mode 100644 index 00000000000000..cfb595e78e6b40 --- /dev/null +++ b/test/parallel/test-whatwg-encoding-textdecoder-fatal.js @@ -0,0 +1,93 @@ +'use strict'; + +// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-fatal.html + +const common = require('../common'); + +if (!common.hasIntl) + common.skip('missing Intl'); + +const assert = require('assert'); +const { + TextDecoder +} = require('util'); + +const bad = [ + { encoding: 'utf-8', input: [0xFF], name: 'invalid code' }, + { encoding: 'utf-8', input: [0xC0], name: 'ends early' }, + { encoding: 'utf-8', input: [0xE0], name: 'ends early 2' }, + { encoding: 'utf-8', input: [0xC0, 0x00], name: 'invalid trail' }, + { encoding: 'utf-8', input: [0xC0, 0xC0], name: 'invalid trail 2' }, + { encoding: 'utf-8', input: [0xE0, 0x00], name: 'invalid trail 3' }, + { encoding: 'utf-8', input: [0xE0, 0xC0], name: 'invalid trail 4' }, + { encoding: 'utf-8', input: [0xE0, 0x80, 0x00], name: 'invalid trail 5' }, + { encoding: 'utf-8', input: [0xE0, 0x80, 0xC0], name: 'invalid trail 6' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], + name: '> 0x10FFFF' }, + { encoding: 'utf-8', input: [0xFE, 0x80, 0x80, 0x80, 0x80, 0x80], + name: 'obsolete lead byte' }, + // Overlong encodings + { encoding: 'utf-8', input: [0xC0, 0x80], name: 'overlong U+0000 - 2 bytes' }, + { encoding: 'utf-8', input: [0xE0, 0x80, 0x80], + name: 'overlong U+0000 - 3 bytes' }, + { encoding: 'utf-8', input: [0xF0, 0x80, 0x80, 0x80], + name: 'overlong U+0000 - 4 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x80, 0x80], + name: 'overlong U+0000 - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80], + name: 'overlong U+0000 - 6 bytes' }, + { encoding: 'utf-8', input: [0xC1, 0xBF], name: 'overlong U+007F - 2 bytes' }, + { encoding: 'utf-8', input: [0xE0, 0x81, 0xBF], + name: 'overlong U+007F - 3 bytes' }, + { encoding: 'utf-8', input: [0xF0, 0x80, 0x81, 0xBF], + name: 'overlong U+007F - 4 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x81, 0xBF], + name: 'overlong U+007F - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF], + name: 'overlong U+007F - 6 bytes' }, + { encoding: 'utf-8', input: [0xE0, 0x9F, 0xBF], + name: 'overlong U+07FF - 3 bytes' }, + { encoding: 'utf-8', input: [0xF0, 0x80, 0x9F, 0xBF], + name: 'overlong U+07FF - 4 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x80, 0x80, 0x9F, 0xBF], + name: 'overlong U+07FF - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF], + name: 'overlong U+07FF - 6 bytes' }, + { encoding: 'utf-8', input: [0xF0, 0x8F, 0xBF, 0xBF], + name: 'overlong U+FFFF - 4 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x80, 0x8F, 0xBF, 0xBF], + name: 'overlong U+FFFF - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF], + name: 'overlong U+FFFF - 6 bytes' }, + { encoding: 'utf-8', input: [0xF8, 0x84, 0x8F, 0xBF, 0xBF], + name: 'overlong U+10FFFF - 5 bytes' }, + { encoding: 'utf-8', input: [0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF], + name: 'overlong U+10FFFF - 6 bytes' }, + // UTF-16 surrogates encoded as code points in UTF-8 + { encoding: 'utf-8', input: [0xED, 0xA0, 0x80], name: 'lead surrogate' }, + { encoding: 'utf-8', input: [0xED, 0xB0, 0x80], name: 'trail surrogate' }, + { encoding: 'utf-8', input: [0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80], + name: 'surrogate pair' }, + { encoding: 'utf-16le', input: [0x00], name: 'truncated code unit' }, + // Mismatched UTF-16 surrogates are exercised in utf16-surrogates.html + // FIXME: Add legacy encoding cases +]; + +bad.forEach((t) => { + common.expectsError( + () => { + new TextDecoder(t.encoding, { fatal: true }) + .decode(new Uint8Array(t.input)); + }, { + code: 'ERR_ENCODING_INVALID_ENCODED_DATA', + type: TypeError + } + ); +}); + +{ + assert('fatal' in new TextDecoder()); + assert.strictEqual(typeof new TextDecoder().fatal, 'boolean'); + assert(!new TextDecoder().fatal); + assert(new TextDecoder('utf-8', { fatal: true }).fatal); +} diff --git a/test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js b/test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js new file mode 100644 index 00000000000000..0e3cd3025d040a --- /dev/null +++ b/test/parallel/test-whatwg-encoding-textdecoder-ignorebom.js @@ -0,0 +1,50 @@ +'use strict'; + +// From: https://github.com/w3c/web-platform-tests/blob/7f567fa29c/encoding/textdecoder-ignorebom.html + +const common = require('../common'); + +const assert = require('assert'); +const { + TextDecoder +} = require('util'); + +const cases = [ + { + encoding: 'utf-8', + bytes: [0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63], + skipNoIntl: false + }, + { + encoding: 'utf-16le', + bytes: [0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00], + skipNoIntl: false + }, + { + encoding: 'utf-16be', + bytes: [0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63], + skipNoIntl: true + } +]; + +cases.forEach((testCase) => { + if (testCase.skipNoIntl && !common.hasIntl) { + console.log(`skipping ${testCase.encoding} because missing Intl`); + return; // skipping + } + const BOM = '\uFEFF'; + let decoder = new TextDecoder(testCase.encoding, { ignoreBOM: true }); + const bytes = new Uint8Array(testCase.bytes); + assert.strictEqual(decoder.decode(bytes), `${BOM}abc`); + decoder = new TextDecoder(testCase.encoding, { ignoreBOM: false }); + assert.strictEqual(decoder.decode(bytes), 'abc'); + decoder = new TextDecoder(testCase.encoding); + assert.strictEqual(decoder.decode(bytes), 'abc'); +}); + +{ + assert('ignoreBOM' in new TextDecoder()); + assert.strictEqual(typeof new TextDecoder().ignoreBOM, 'boolean'); + assert(!new TextDecoder().ignoreBOM); + assert(new TextDecoder('utf-8', { ignoreBOM: true }).ignoreBOM); +} diff --git a/test/parallel/test-whatwg-encoding-textdecoder-streaming.js b/test/parallel/test-whatwg-encoding-textdecoder-streaming.js new file mode 100644 index 00000000000000..e446d56ffd2abb --- /dev/null +++ b/test/parallel/test-whatwg-encoding-textdecoder-streaming.js @@ -0,0 +1,49 @@ +'use strict'; + +// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/textdecoder-streaming.html + +const common = require('../common'); + +const assert = require('assert'); +const { + TextDecoder +} = require('util'); + +const string = + '\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF'; +const octets = { + 'utf-8': [ + 0x00, 0x31, 0x32, 0x33, 0x41, 0x42, 0x43, 0x61, 0x62, 0x63, 0xc2, 0x80, + 0xc3, 0xbf, 0xc4, 0x80, 0xe1, 0x80, 0x80, 0xef, 0xbf, 0xbd, 0xf0, 0x90, + 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf], + 'utf-16le': [ + 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, 0x00, + 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xFF, 0x00, + 0x00, 0x01, 0x00, 0x10, 0xFD, 0xFF, 0x00, 0xD8, 0x00, 0xDC, 0xFF, 0xDB, + 0xFF, 0xDF], + 'utf-16be': [ + 0x00, 0x00, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x41, 0x00, 0x42, + 0x00, 0x43, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x80, 0x00, 0xFF, + 0x01, 0x00, 0x10, 0x00, 0xFF, 0xFD, 0xD8, 0x00, 0xDC, 0x00, 0xDB, 0xFF, + 0xDF, 0xFF] +}; + +Object.keys(octets).forEach((encoding) => { + if (encoding === 'utf-16be' && !common.hasIntl) { + console.log('skipping utf-16be because missing Intl'); + return; + } + for (let len = 1; len <= 5; ++len) { + const encoded = octets[encoding]; + const decoder = new TextDecoder(encoding); + let out = ''; + for (let i = 0; i < encoded.length; i += len) { + const sub = []; + for (let j = i; j < encoded.length && j < i + len; ++j) + sub.push(encoded[j]); + out += decoder.decode(new Uint8Array(sub), { stream: true }); + } + out += decoder.decode(); + assert.strictEqual(out, string); + } +}); diff --git a/test/parallel/test-whatwg-encoding-textdecoder-utf16-surrogates.js b/test/parallel/test-whatwg-encoding-textdecoder-utf16-surrogates.js new file mode 100644 index 00000000000000..fcf6a82e90fd85 --- /dev/null +++ b/test/parallel/test-whatwg-encoding-textdecoder-utf16-surrogates.js @@ -0,0 +1,63 @@ +'use strict'; + +// From: https://github.com/w3c/web-platform-tests/blob/39a67e2fff/encoding/textdecoder-utf16-surrogates.html + +const common = require('../common'); + +if (!common.hasIntl) + common.skip('missing Intl'); + +const assert = require('assert'); +const { + TextDecoder +} = require('util'); + +const bad = [ + { + encoding: 'utf-16le', + input: [0x00, 0xd8], + expected: '\uFFFD', + name: 'lone surrogate lead' + }, + { + encoding: 'utf-16le', + input: [0x00, 0xdc], + expected: '\uFFFD', + name: 'lone surrogate trail' + }, + { + encoding: 'utf-16le', + input: [0x00, 0xd8, 0x00, 0x00], + expected: '\uFFFD\u0000', + name: 'unmatched surrogate lead' + }, + { + encoding: 'utf-16le', + input: [0x00, 0xdc, 0x00, 0x00], + expected: '\uFFFD\u0000', + name: 'unmatched surrogate trail' + }, + { + encoding: 'utf-16le', + input: [0x00, 0xdc, 0x00, 0xd8], + expected: '\uFFFD\uFFFD', + name: 'swapped surrogate pair' + } +]; + +bad.forEach((t) => { + + assert.strictEqual( + new TextDecoder(t.encoding).decode(new Uint8Array(t.input)), + t.expected); + + common.expectsError( + () => { + new TextDecoder(t.encoding, { fatal: true }) + .decode(new Uint8Array(t.input)); + }, { + code: 'ERR_ENCODING_INVALID_ENCODED_DATA', + type: TypeError + } + ); +}); diff --git a/test/parallel/test-whatwg-encoding-textdecoder.js b/test/parallel/test-whatwg-encoding-textdecoder.js index 440ccc38124ec6..55c601364d0add 100644 --- a/test/parallel/test-whatwg-encoding-textdecoder.js +++ b/test/parallel/test-whatwg-encoding-textdecoder.js @@ -2,6 +2,7 @@ 'use strict'; const common = require('../common'); + const assert = require('assert'); const { TextDecoder, TextEncoder } = require('util'); const { customInspectSymbol: inspect } = require('internal/util'); @@ -16,6 +17,7 @@ assert(TextDecoder); { ['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => { const dec = new TextDecoder(i); + assert.strictEqual(dec.encoding, 'utf-8'); const res = dec.decode(buf); assert.strictEqual(res, 'test€'); }); @@ -102,3 +104,105 @@ if (common.hasIntl) { })); }); } + +// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/api-basics.html +function testDecodeSample(encoding, string, bytes) { + assert.strictEqual( + new TextDecoder(encoding).decode(new Uint8Array(bytes)), + string); + assert.strictEqual( + new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), + string); +} + +// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34), +// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD) +// byte-swapped BOM (non-character U+FFFE) +const sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE'; + +{ + const encoding = 'utf-8'; + const string = sample; + const bytes = [ + 0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, + 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, + 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, + 0xBF, 0xBE + ]; + const encoded = new TextEncoder().encode(string); + assert.deepStrictEqual([].slice.call(encoded), bytes); + assert.strictEqual( + new TextDecoder(encoding).decode(new Uint8Array(bytes)), + string); + assert.strictEqual( + new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), + string); +} + +testDecodeSample( + 'utf-16le', + sample, + [ + 0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, + 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, + 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF + ] +); + +if (common.hasIntl) { + testDecodeSample( + 'utf-16be', + sample, + [ + 0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, + 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, + 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE + ] + ); +} + +testDecodeSample( + 'utf-16', + sample, + [ + 0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, + 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, + 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF + ] +); + +// From: https://github.com/w3c/web-platform-tests/blob/master/encoding/api-invalid-label.html +[ + 'utf-8', + 'unicode-1-1-utf-8', + 'utf8', + 'utf-16be', + 'utf-16le', + 'utf-16' +].forEach((i) => { + ['\u0000', '\u000b', '\u00a0', '\u2028', '\u2029'].forEach((ws) => { + common.expectsError( + () => new TextDecoder(`${ws}${i}`), + { + code: 'ERR_ENCODING_NOT_SUPPORTED', + type: RangeError + } + ); + + common.expectsError( + () => new TextDecoder(`${i}${ws}`), + { + code: 'ERR_ENCODING_NOT_SUPPORTED', + type: RangeError + } + ); + + common.expectsError( + () => new TextDecoder(`${ws}${i}${ws}`), + { + code: 'ERR_ENCODING_NOT_SUPPORTED', + type: RangeError + } + ); + }); +}); diff --git a/test/parallel/test-whatwg-encoding-textencoder-utf16-surrogates.js b/test/parallel/test-whatwg-encoding-textencoder-utf16-surrogates.js new file mode 100644 index 00000000000000..9ef3c0c2360b67 --- /dev/null +++ b/test/parallel/test-whatwg-encoding-textencoder-utf16-surrogates.js @@ -0,0 +1,52 @@ +'use strict'; + +// From: https://github.com/w3c/web-platform-tests/blob/fa9436d12c/encoding/textencoder-utf16-surrogates.html + +require('../common'); + +const assert = require('assert'); +const { + TextDecoder, + TextEncoder +} = require('util'); + +const bad = [ + { + input: '\uD800', + expected: '\uFFFD', + name: 'lone surrogate lead' + }, + { + input: '\uDC00', + expected: '\uFFFD', + name: 'lone surrogate trail' + }, + { + input: '\uD800\u0000', + expected: '\uFFFD\u0000', + name: 'unmatched surrogate lead' + }, + { + input: '\uDC00\u0000', + expected: '\uFFFD\u0000', + name: 'unmatched surrogate trail' + }, + { + input: '\uDC00\uD800', + expected: '\uFFFD\uFFFD', + name: 'swapped surrogate pair' + }, + { + input: '\uD834\uDD1E', + expected: '\uD834\uDD1E', + name: 'properly encoded MUSICAL SYMBOL G CLEF (U+1D11E)' + } +]; + +bad.forEach((t) => { + const encoded = new TextEncoder().encode(t.input); + const decoded = new TextDecoder().decode(encoded); + assert.strictEqual(decoded, t.expected); +}); + +assert.strictEqual(new TextEncoder().encode().length, 0); diff --git a/test/parallel/test-whatwg-encoding-textencoder.js b/test/parallel/test-whatwg-encoding-textencoder.js index cf2769bb0ce577..2e8ca9e9abafd1 100644 --- a/test/parallel/test-whatwg-encoding-textencoder.js +++ b/test/parallel/test-whatwg-encoding-textencoder.js @@ -2,6 +2,7 @@ 'use strict'; const common = require('../common'); + const assert = require('assert'); const { TextDecoder, TextEncoder } = require('util'); const { customInspectSymbol: inspect } = require('internal/util'); @@ -13,11 +14,25 @@ const encoded = Buffer.from([0xef, 0xbb, 0xbf, 0x74, 0x65, assert(TextEncoder); // Test TextEncoder -const enc = new TextEncoder(); -assert(enc); -const buf = enc.encode('\ufefftest€'); +{ + const enc = new TextEncoder(); + assert.strictEqual(enc.encoding, 'utf-8'); + assert(enc); + const buf = enc.encode('\ufefftest€'); + assert.strictEqual(Buffer.compare(buf, encoded), 0); +} + +{ + const enc = new TextEncoder(); + const buf = enc.encode(); + assert.strictEqual(buf.length, 0); +} -assert.strictEqual(Buffer.compare(buf, encoded), 0); +{ + const enc = new TextEncoder(); + const buf = enc.encode(undefined); + assert.strictEqual(buf.length, 0); +} { const fn = TextEncoder.prototype[inspect];