nodejs · joyeecheung · Dec 4, 2018 · Jan 2, 2019 · Dec 4, 2018 · Jan 2, 2019
diff --git a/test/common/wpt.js b/test/common/wpt.js
@@ -1,7 +1,7 @@
+/* eslint-disable node-core/required-modules */
 'use strict';
 
 const assert = require('assert');
-const common = require('../common');
 const fixtures = require('../common/fixtures');
 const fs = require('fs');
 const fsPromises = fs.promises;
@@ -160,12 +160,49 @@ class WPTTest {
   getContent() {
     return fs.readFileSync(this.getAbsolutePath(), 'utf8');
   }
+}
+
+const kIntlRequirement = {
+  none: 0,
+  small: 1,
+  full: 2,
+  // TODO(joyeecheung): we may need to deal with --with-intl=system-icu
+};
+
+class IntlRequirement {
+  constructor() {
+    this.currentIntl = kIntlRequirement.none;
+    if (process.config.variables.v8_enable_i18n_support === 0) {
+      this.currentIntl = kIntlRequirement.none;
+      return;
+    }
+    // i18n enabled
+    if (process.config.variables.icu_small) {
+      this.currentIntl = kIntlRequirement.small;
+    } else {
+      this.currentIntl = kIntlRequirement.full;
+    }
+  }
 
-  requireIntl() {
-    return this.requires.has('intl');
+  /**
+   * @param {Set} requires
+   * @returns {string|false} The config that the build is lacking, or false
+   */
+  isLacking(requires) {
+    const current = this.currentIntl;
+    if (requires.has('full-icu') && current !== kIntlRequirement.full) {
+      return 'full-icu';
+    }
+    if (requires.has('small-icu') && current < kIntlRequirement.small) {
+      return 'small-icu';
+    }
+    return false;
   }
 }
 
+const intlRequirements = new IntlRequirement();
+
+
 class StatusLoader {
   constructor(path) {
     this.path = path;
@@ -498,8 +535,9 @@ class WPTRunner {
         continue;
       }
 
-      if (!common.hasIntl && test.requireIntl()) {
-        this.skip(filename, [ 'missing Intl' ]);
+      const lackingIntl = intlRequirements.isLacking(test.requires);
+      if (lackingIntl) {
+        this.skip(filename, [ `requires ${lackingIntl}` ]);
         continue;
       }
 

diff --git a/test/fixtures/wpt/README.md b/test/fixtures/wpt/README.md
@@ -10,10 +10,11 @@ See [test/wpt](../../wpt/README.md) for information on how these tests are run.
 
 Last update:
 
-- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
-- interfaces: https://github.com/web-platform-tests/wpt/tree/db7f86289e/interfaces
 - console: https://github.com/web-platform-tests/wpt/tree/9786a4b131/console
+- encoding: https://github.com/web-platform-tests/wpt/tree/a093a659ed/encoding
 - url: https://github.com/web-platform-tests/wpt/tree/75b0f336c5/url
+- resources: https://github.com/web-platform-tests/wpt/tree/679a364421/resources
+- interfaces: https://github.com/web-platform-tests/wpt/tree/712c9f275e/interfaces
 
 [Web Platform Tests]: https://github.com/web-platform-tests/wpt
 [`git node wpt`]: https://github.com/nodejs/node-core-utils/blob/master/docs/git-node.md#git-node-wpt
diff --git a/test/fixtures/wpt/encoding/META.yml b/test/fixtures/wpt/encoding/META.yml
@@ -0,0 +1,4 @@
+spec: https://encoding.spec.whatwg.org/
+suggested_reviewers:
+  - inexorabletash
+  - annevk
diff --git a/test/fixtures/wpt/encoding/api-basics.any.js b/test/fixtures/wpt/encoding/api-basics.any.js
@@ -0,0 +1,52 @@
+// META: title=Encoding API: Basics
+
+test(function() {
+    assert_equals((new TextEncoder).encoding, 'utf-8', 'default encoding is utf-8');
+    assert_equals((new TextDecoder).encoding, 'utf-8', 'default encoding is utf-8');
+}, 'Default encodings');
+
+test(function() {
+    assert_array_equals(new TextEncoder().encode(), [], 'input default should be empty string')
+    assert_array_equals(new TextEncoder().encode(undefined), [], 'input default should be empty string')
+}, 'Default inputs');
+
+
+function testDecodeSample(encoding, string, bytes) {
+  test(function() {
+    assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
+    assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
+  }, 'Decode sample: ' + encoding);
+}
+
+// z (ASCII U+007A), cent (Latin-1 U+00A2), CJK water (BMP U+6C34),
+// G-Clef (non-BMP U+1D11E), PUA (BMP U+F8FF), PUA (non-BMP U+10FFFD)
+// byte-swapped BOM (non-character U+FFFE)
+var sample = 'z\xA2\u6C34\uD834\uDD1E\uF8FF\uDBFF\uDFFD\uFFFE';
+
+test(function() {
+  var encoding = 'utf-8';
+  var string = sample;
+  var bytes = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xEF, 0xA3, 0xBF, 0xF4, 0x8F, 0xBF, 0xBD, 0xEF, 0xBF, 0xBE];
+  var encoded = new TextEncoder().encode(string);
+  assert_array_equals([].slice.call(encoded), bytes);
+  assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes)), string);
+  assert_equals(new TextDecoder(encoding).decode(new Uint8Array(bytes).buffer), string);
+}, 'Encode/decode round trip: utf-8');
+
+testDecodeSample(
+  'utf-16le',
+  sample,
+  [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
+);
+
+testDecodeSample(
+  'utf-16be',
+  sample,
+  [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xF8, 0xFF, 0xDB, 0xFF, 0xDF, 0xFD, 0xFF, 0xFE]
+);
+
+testDecodeSample(
+  'utf-16',
+  sample,
+  [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xF8, 0xFF, 0xDB, 0xFD, 0xDF, 0xFE, 0xFF]
+);
diff --git a/test/fixtures/wpt/encoding/api-invalid-label.any.js b/test/fixtures/wpt/encoding/api-invalid-label.any.js
@@ -0,0 +1,24 @@
+// META: title=Encoding API: invalid label
+// META: timeout=long
+// META: script=resources/encodings.js
+
+var tests = ["invalid-invalidLabel"];
+setup(function() {
+  encodings_table.forEach(function(section) {
+    section.encodings.forEach(function(encoding) {
+      encoding.labels.forEach(function(label) {
+        ["\u0000", "\u000b", "\u00a0", "\u2028", "\u2029"].forEach(function(ws) {
+          tests.push(ws + label);
+          tests.push(label + ws);
+          tests.push(ws + label + ws);
+        });
+      });
+    });
+  });
+});
+
+tests.forEach(function(input) {
+  test(function() {
+    assert_throws(new RangeError(), function() { new TextDecoder(input); });
+  }, 'Invalid label ' + format_value(input) + ' should be rejected by TextDecoder.');
+});
diff --git a/test/fixtures/wpt/encoding/api-replacement-encodings.any.js b/test/fixtures/wpt/encoding/api-replacement-encodings.any.js
@@ -0,0 +1,15 @@
+// META: title=Encoding API: replacement encoding
+// META: script=resources/encodings.js
+
+encodings_table.forEach(function(section) {
+    section.encodings.filter(function(encoding) {
+        return encoding.name === 'replacement';
+    }).forEach(function(encoding) {
+        encoding.labels.forEach(function(label) {
+            test(function() {
+                assert_throws(new RangeError(), function() { new TextDecoder(label); });
+            }, 'Label for "replacement" should be rejected by API: ' + label);
+        });
+    });
+});
+
diff --git a/test/fixtures/wpt/encoding/api-surrogates-utf8.any.js b/test/fixtures/wpt/encoding/api-surrogates-utf8.any.js
@@ -0,0 +1,48 @@
+// META: title=Encoding API: Invalid UTF-16 surrogates with UTF-8 encoding
+
+var badStrings = [
+    {
+        input: 'abc123',
+        expected: [0x61, 0x62, 0x63, 0x31, 0x32, 0x33],
+        decoded: 'abc123',
+        name: 'Sanity check'
+    },
+    {
+        input: '\uD800',
+        expected: [0xef, 0xbf, 0xbd],
+        decoded: '\uFFFD',
+        name: 'Surrogate half (low)'
+    },
+    {
+        input: '\uDC00',
+        expected: [0xef, 0xbf, 0xbd],
+        decoded: '\uFFFD',
+        name: 'Surrogate half (high)'
+    },
+    {
+        input: 'abc\uD800123',
+        expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
+        decoded: 'abc\uFFFD123',
+        name: 'Surrogate half (low), in a string'
+    },
+    {
+        input: 'abc\uDC00123',
+        expected: [0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33],
+        decoded: 'abc\uFFFD123',
+        name: 'Surrogate half (high), in a string'
+    },
+    {
+        input: '\uDC00\uD800',
+        expected: [0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd],
+        decoded: '\uFFFD\uFFFD',
+        name: 'Wrong order'
+    }
+];
+
+badStrings.forEach(function(t) {
+    test(function() {
+        var encoded = new TextEncoder().encode(t.input);
+        assert_array_equals([].slice.call(encoded), t.expected);
+        assert_equals(new TextDecoder('utf-8').decode(encoded), t.decoded);
+    }, 'Invalid surrogates encoded into UTF-8: ' + t.name);
+});
diff --git a/test/fixtures/wpt/encoding/big5-encoder.html b/test/fixtures/wpt/encoding/big5-encoder.html
@@ -0,0 +1,33 @@
+<!doctype html>
+<meta charset=big5> <!-- test breaks if the server overrides this -->
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<div id=log></div>
+<script>
+ function encode(input, output, desc) {
+   test(function() {
+     var a = document.createElement("a"); // <a> uses document encoding for URL's query
+     // Append and prepend X to test for off-by-one errors
+     a.href = "https://example.com/?X" + input + "X";
+     assert_equals(a.search.substr(1), "X" + output + "X"); // remove leading "?"
+   }, "big5 encoder: " + desc);
+ }
+
+ encode("ab", "ab", "very basic")
+ // edge cases
+ encode("\u9EA6", "%26%2340614%3B", "Highest-pointer BMP character excluded from encoder");
+ encode("\uD858\uDE6B", "%26%23156267%3B", "Highest-pointer character excluded from encoder");
+ encode("\u3000", "%A1@", "Lowest-pointer character included in encoder");
+ encode("\u20AC", "%A3%E1", "Euro; the highest-pointer character before a range of 30 unmapped pointers");
+ encode("\u4E00", "%A4@", "The lowest-pointer character after the range of 30 unmapped pointers");
+ encode("\uD85D\uDE07", "%C8%A4", "The highest-pointer character before a range of 41 unmapped pointers");
+ encode("\uFFE2", "%C8%CD", "The lowest-pointer character after the range of 41 unmapped pointers");
+ encode("\u79D4", "%FE%FE", "The last character in the index");
+ // not in index
+ encode("\u2603", "%26%239731%3B", "The canonical BMP test character that is not in the index");
+ encode("\uD83D\uDCA9", "%26%23128169%3B", "The canonical astral test character that is not in the index");
+ // duplicate low bits
+ encode("\uD840\uDFB5", "%FDj", "A Plane 2 character whose low 16 bits match a BMP character that has a lower pointer");
+ // prefer last
+ encode("\u2550", "%F9%F9", "A duplicate-mapped code point that prefers the highest pointer in the encoder");
+</script>
diff --git a/test/fixtures/wpt/encoding/eof-shift_jis-ref.html b/test/fixtures/wpt/encoding/eof-shift_jis-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=shift_jis>
+<title>Shift_JIS file ending with a truncated sequence</title>
+One-byte truncated sequence:&#xFFFD;
diff --git a/test/fixtures/wpt/encoding/eof-shift_jis.html b/test/fixtures/wpt/encoding/eof-shift_jis.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=shift_jis>
+<title>Shift_JIS file ending with a truncated sequence</title>
+<link rel=match href=/encoding/eof-shift_jis-ref.html>
+One-byte truncated sequence:�
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-one-ref.html b/test/fixtures/wpt/encoding/eof-utf-8-one-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a one-byte truncated sequence</title>
+One-byte truncated sequence:&#xFFFD;
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-one.html b/test/fixtures/wpt/encoding/eof-utf-8-one.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a one-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-one-ref.html">
+One-byte truncated sequence:�
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-three-ref.html b/test/fixtures/wpt/encoding/eof-utf-8-three-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a three-byte truncated sequence</title>
+Three-byte truncated sequence:&#xFFFD;
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-three.html b/test/fixtures/wpt/encoding/eof-utf-8-three.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a three-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-three-ref.html">
+Three-byte truncated sequence:�
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-two-ref.html b/test/fixtures/wpt/encoding/eof-utf-8-two-ref.html
@@ -0,0 +1,4 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a two-byte truncated sequence</title>
+Two-byte truncated sequence:&#xFFFD;
diff --git a/test/fixtures/wpt/encoding/eof-utf-8-two.html b/test/fixtures/wpt/encoding/eof-utf-8-two.html
@@ -0,0 +1,5 @@
+<!doctype html>
+<meta charset=utf-8>
+<title>UTF-8 file ending with a two-byte truncated sequence</title>
+<link rel=match href="eof-utf-8-two-ref.html">
+Two-byte truncated sequence:�
diff --git a/test/fixtures/wpt/encoding/gb18030-encoder.html b/test/fixtures/wpt/encoding/gb18030-encoder.html
@@ -0,0 +1,21 @@
+<!doctype html>
+<meta charset=gb18030> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<div id=log></div>
+<script>
+ function encode(input, output, desc) {
+   test(function() {
+     var a = document.createElement("a") // <a> uses document encoding for URL's query
+     a.href = "https://example.com/?" + input
+     assert_equals(a.search.substr(1), output) // remove leading "?"
+   }, "gb18030 encoder: " + desc)
+ }
+
+ encode("s", "s", "very basic")
+ encode("\u20AC", "%A2%E3", "Euro")
+ encode("\u4E02", "%81@", "character")
+ encode("\uE4C6", "%A1@", "PUA")
+ encode("\uE4C5", "%FE%FE", "PUA #2")
+ encode("\ud83d\udca9", "%949%DA3", "poo")
+</script>
diff --git a/test/fixtures/wpt/encoding/gbk-encoder.html b/test/fixtures/wpt/encoding/gbk-encoder.html
@@ -0,0 +1,21 @@
+<!doctype html>
+<meta charset=gbk> <!-- if the server overrides this, it is stupid, as this is a testsuite -->
+<script src=/resources/testharness.js></script>
+<script src=/resources/testharnessreport.js></script>
+<div id=log></div>
+<script>
+ function encode(input, output, desc) {
+   test(function() {
+     var a = document.createElement("a") // <a> uses document encoding for URL's query
+     a.href = "https://example.com/?" + input
+     assert_equals(a.search.substr(1), output) // remove leading "?"
+   }, "gbk encoder: " + desc)
+ }
+
+ encode("s", "s", "very basic")
+ encode("\u20AC", "%80", "Euro")
+ encode("\u4E02", "%81@", "character")
+ encode("\uE4C6", "%A1@", "PUA")
+ encode("\uE4C5", "%FE%FE", "PUA #2")
+ encode("\ud83d\udca9", "%26%23128169%3B", "poo")
+</script>
diff --git a/test/fixtures/wpt/encoding/idlharness.any.js b/test/fixtures/wpt/encoding/idlharness.any.js
@@ -0,0 +1,14 @@
+// META: global=window,worker
+// META: script=/resources/WebIDLParser.js
+// META: script=/resources/idlharness.js
+
+idl_test(
+  ['encoding'],
+  [], // No deps
+  idl_array => {
+    idl_array.add_objects({
+      TextEncoder: ['new TextEncoder()'],
+      TextDecoder: ['new TextDecoder()']
+    });
+  }
+);