From 71d3f94e695c458a68ce545212df0c8ca20ca171 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Sat, 28 Jan 2017 10:37:45 -0800 Subject: [PATCH 01/21] url: extend URLSearchParams constructor PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/10635 Reviewed-By: James M Snell --- lib/internal/url.js | 50 ++++++++++- ...est-whatwg-url-searchparams-constructor.js | 87 +++++++++++-------- 2 files changed, 96 insertions(+), 41 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index a474ed30b4d6e0..c646a757024f34 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -614,11 +614,53 @@ function defineIDLClass(proto, classStr, obj) { } class URLSearchParams { - constructor(init = '') { - if (init instanceof URLSearchParams) { - const childParams = init[searchParams]; - this[searchParams] = childParams.slice(); + // URL Standard says the default value is '', but as undefined and '' have + // the same result, undefined is used to prevent unnecessary parsing. + // Default parameter is necessary to keep URLSearchParams.length === 0 in + // accordance with Web IDL spec. + constructor(init = undefined) { + if (init === null || init === undefined) { + this[searchParams] = []; + } else if (typeof init === 'object') { + const method = init[Symbol.iterator]; + if (method === this[Symbol.iterator]) { + // While the spec does not have this branch, we can use it as a + // shortcut to avoid having to go through the costly generic iterator. + const childParams = init[searchParams]; + this[searchParams] = childParams.slice(); + } else if (method !== null && method !== undefined) { + if (typeof method !== 'function') { + throw new TypeError('Query pairs must be iterable'); + } + + // sequence> + // Note: per spec we have to first exhaust the lists then process them + const pairs = []; + for (const pair of init) { + if (typeof pair !== 'object' || + typeof pair[Symbol.iterator] !== 'function') { + throw new TypeError('Each query pair must be iterable'); + } + pairs.push(Array.from(pair)); + } + + this[searchParams] = []; + for (const pair of pairs) { + if (pair.length !== 2) { + throw new TypeError('Each query pair must be a name/value tuple'); + } + this[searchParams].push(String(pair[0]), String(pair[1])); + } + } else { + // record + this[searchParams] = []; + for (const key of Object.keys(init)) { + const value = String(init[key]); + this[searchParams].push(key, value); + } + } } else { + // USVString init = String(init); if (init[0] === '?') init = init.slice(1); initSearchParams(this, init); diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index 4e177ce59ccf20..d57373e727ac51 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -4,7 +4,8 @@ const common = require('../common'); const assert = require('assert'); const URLSearchParams = require('url').URLSearchParams; const { - test, assert_equals, assert_true, assert_false + test, assert_equals, assert_true, + assert_false, assert_throws, assert_array_equals } = common.WPT; /* eslint-disable */ @@ -40,10 +41,10 @@ test(() => { assert_equals(params.__proto__, URLSearchParams.prototype, 'expected URLSearchParams.prototype as prototype.'); }, "URLSearchParams constructor, empty string as argument") -// test(() => { -// params = new URLSearchParams({}); -// assert_equals(params + '', ""); -// }, 'URLSearchParams constructor, {} as argument'); +test(() => { + params = new URLSearchParams({}); + assert_equals(params + '', ""); +}, 'URLSearchParams constructor, {} as argument'); test(function() { var params = new URLSearchParams('a=b'); @@ -142,39 +143,39 @@ test(function() { assert_equals(params.get('a\uD83D\uDCA9b'), 'c'); }, 'Parse %f0%9f%92%a9'); // Unicode Character 'PILE OF POO' (U+1F4A9) -// test(function() { -// var params = new URLSearchParams([]); -// assert_true(params != null, 'constructor returned non-null value.'); -// params = new URLSearchParams([['a', 'b'], ['c', 'd']]); -// assert_equals(params.get("a"), "b"); -// assert_equals(params.get("c"), "d"); -// assert_throws(new TypeError(), function() { new URLSearchParams([[1]]); }); -// assert_throws(new TypeError(), function() { new URLSearchParams([[1,2,3]]); }); -// }, "Constructor with sequence of sequences of strings"); - -// [ +test(function() { + var params = new URLSearchParams([]); + assert_true(params != null, 'constructor returned non-null value.'); + params = new URLSearchParams([['a', 'b'], ['c', 'd']]); + assert_equals(params.get("a"), "b"); + assert_equals(params.get("c"), "d"); + assert_throws(new TypeError(), function() { new URLSearchParams([[1]]); }); + assert_throws(new TypeError(), function() { new URLSearchParams([[1,2,3]]); }); +}, "Constructor with sequence of sequences of strings"); + +[ // { "input": {"+": "%C2"}, "output": [[" ", "\uFFFD"]], "name": "object with +" }, -// { "input": {c: "x", a: "?"}, "output": [["c", "x"], ["a", "?"]], "name": "object with two keys" }, -// { "input": [["c", "x"], ["a", "?"]], "output": [["c", "x"], ["a", "?"]], "name": "array with two keys" } -// ].forEach((val) => { -// test(() => { -// let params = new URLSearchParams(val.input), -// i = 0 -// for (let param of params) { -// assert_array_equals(param, val.output[i]) -// i++ -// } -// }, "Construct with " + val.name) -// }) + { "input": {c: "x", a: "?"}, "output": [["c", "x"], ["a", "?"]], "name": "object with two keys" }, + { "input": [["c", "x"], ["a", "?"]], "output": [["c", "x"], ["a", "?"]], "name": "array with two keys" } +].forEach((val) => { + test(() => { + let params = new URLSearchParams(val.input), + i = 0 + for (let param of params) { + assert_array_equals(param, val.output[i]) + i++ + } + }, "Construct with " + val.name) +}) -// test(() => { -// params = new URLSearchParams() -// params[Symbol.iterator] = function *() { -// yield ["a", "b"] -// } -// let params2 = new URLSearchParams(params) -// assert_equals(params2.get("a"), "b") -// }, "Custom [Symbol.iterator]") +test(() => { + params = new URLSearchParams() + params[Symbol.iterator] = function *() { + yield ["a", "b"] + } + let params2 = new URLSearchParams(params) + assert_equals(params2.get("a"), "b") +}, "Custom [Symbol.iterator]") /* eslint-enable */ // Tests below are not from WPT. @@ -192,5 +193,17 @@ test(function() { params = new URLSearchParams(undefined); assert.strictEqual(params.toString(), ''); params = new URLSearchParams(null); - assert.strictEqual(params.toString(), 'null='); + assert.strictEqual(params.toString(), ''); + assert.throws(() => new URLSearchParams([[1]]), + /^TypeError: Each query pair must be a name\/value tuple$/); + assert.throws(() => new URLSearchParams([[1, 2, 3]]), + /^TypeError: Each query pair must be a name\/value tuple$/); + assert.throws(() => new URLSearchParams({ [Symbol.iterator]: 42 }), + /^TypeError: Query pairs must be iterable$/); + assert.throws(() => new URLSearchParams([{}]), + /^TypeError: Each query pair must be iterable$/); + assert.throws(() => new URLSearchParams(['a']), + /^TypeError: Each query pair must be iterable$/); + assert.throws(() => new URLSearchParams([{ [Symbol.iterator]: 42 }]), + /^TypeError: Each query pair must be iterable$/); } From c40a45fd3bbfd9613ea98bf8b083973d2e24f3a3 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Sat, 28 Jan 2017 12:02:35 -0800 Subject: [PATCH 02/21] doc: document URLSearchParams constructor PR-URL: https://github.com/nodejs/node/pull/12507 Ref: https://github.com/whatwg/url/pull/175 Reviewed-By: James M Snell --- doc/api/url.md | 126 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 122 insertions(+), 4 deletions(-) diff --git a/doc/api/url.md b/doc/api/url.md index aa33b554042ff6..088ea95d23f22b 100644 --- a/doc/api/url.md +++ b/doc/api/url.md @@ -693,7 +693,8 @@ console.log(JSON.stringify(myURLs)); ### Class: URLSearchParams The `URLSearchParams` API provides read and write access to the query of a -`URL`. +`URL`. The `URLSearchParams` class can also be used standalone with one of the +four following constructors. The WHATWG `URLSearchParams` interface and the [`querystring`][] module have similar purpose, but the purpose of the [`querystring`][] module is more @@ -701,7 +702,8 @@ general, as it allows the customization of delimiter characters (`&` and `=`). On the other hand, this API is designed purely for URL query strings. ```js -const URL = require('url').URL; +const { URL, URLSearchParams } = require('url'); + const myURL = new URL('https://example.org/?abc=123'); console.log(myURL.searchParams.get('abc')); // Prints 123 @@ -714,11 +716,125 @@ myURL.searchParams.delete('abc'); myURL.searchParams.set('a', 'b'); console.log(myURL.href); // Prints https://example.org/?a=b + +const newSearchParams = new URLSearchParams(myURL.searchParams); +// The above is equivalent to +// const newSearchParams = new URLSearchParams(myURL.search); + +newSearchParams.append('a', 'c'); +console.log(myURL.href); + // Prints https://example.org/?a=b +console.log(newSearchParams.toString()); + // Prints a=b&a=c + +// newSearchParams.toString() is implicitly called +myURL.search = newSearchParams; +console.log(myURL.href); + // Prints https://example.org/?a=b&a=c +newSearchParams.delete('a'); +console.log(myURL.href); + // Prints https://example.org/?a=b&a=c ``` -#### Constructor: new URLSearchParams([init]) +#### Constructor: new URLSearchParams() + +Instantiate a new empty `URLSearchParams` object. + +#### Constructor: new URLSearchParams(string) + +* `string` {string} A query string + +Parse the `string` as a query string, and use it to instantiate a new +`URLSearchParams` object. A leading `'?'`, if present, is ignored. + +```js +const { URLSearchParams } = require('url'); +let params; + +params = new URLSearchParams('user=abc&query=xyz'); +console.log(params.get('user')); + // Prints 'abc' +console.log(params.toString()); + // Prints 'user=abc&query=xyz' + +params = new URLSearchParams('?user=abc&query=xyz'); +console.log(params.toString()); + // Prints 'user=abc&query=xyz' +``` + +#### Constructor: new URLSearchParams(obj) + +* `obj` {Object} An object representing a collection of key-value pairs -* `init` {String} The URL query +Instantiate a new `URLSearchParams` object with a query hash map. The key and +value of each property of `obj` are always coerced to strings. + +*Note*: Unlike [`querystring`][] module, duplicate keys in the form of array +values are not allowed. Arrays are stringified using [`array.toString()`][], +which simply joins all array elements with commas. + +```js +const { URLSearchParams } = require('url'); +const params = new URLSearchParams({ + user: 'abc', + query: ['first', 'second'] +}); +console.log(params.getAll('query')); + // Prints ['first,second'] +console.log(params.toString()); + // Prints 'user=abc&query=first%2Csecond' +``` + +#### Constructor: new URLSearchParams(iterable) + +* `iterable` {Iterable} An iterable object whose elements are key-value pairs + +Instantiate a new `URLSearchParams` object with an iterable map in a way that +is similar to [`Map`][]'s constructor. `iterable` can be an Array or any +iterable object. That means `iterable` can be another `URLSearchParams`, in +which case the constructor will simply create a clone of the provided +`URLSearchParams`. Elements of `iterable` are key-value pairs, and can +themselves be any iterable object. + +Duplicate keys are allowed. + +```js +const { URLSearchParams } = require('url'); +let params; + +// Using an array +params = new URLSearchParams([ + ['user', 'abc'], + ['query', 'first'], + ['query', 'second'] +]); +console.log(params.toString()); + // Prints 'user=abc&query=first&query=second' + +// Using a Map object +const map = new Map(); +map.set('user', 'abc'); +map.set('query', 'xyz'); +params = new URLSearchParams(map); +console.log(params.toString()); + // Prints 'user=abc&query=xyz' + +// Using a generator function +function* getQueryPairs() { + yield ['user', 'abc']; + yield ['query', 'first']; + yield ['query', 'second']; +} +params = new URLSearchParams(getQueryPairs()); +console.log(params.toString()); + // Prints 'user=abc&query=first&query=second' + +// Each key-value pair must have exactly two elements +new URLSearchParams([ + ['user', 'abc', 'error'] +]); + // Throws TypeError: Each query pair must be a name/value tuple +``` #### urlSearchParams.append(name, value) @@ -975,6 +1091,8 @@ console.log(myURL.origin); [`require('url').format()`]: #url_url_format_url_options [`url.toString()`]: #url_url_tostring [Punycode]: https://tools.ietf.org/html/rfc5891#section-4.4 +[`Map`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Map +[`array.toString()`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/toString [WHATWG URL]: #url_the_whatwg_url_api [`new URL()`]: #url_constructor_new_url_input_base [`url.href`]: #url_url_href From b0fecbe9807d2470d73cc5954f1f8f35cd9a9e78 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Fri, 3 Feb 2017 21:53:27 -0800 Subject: [PATCH 03/21] url: enforce valid UTF-8 in WHATWG parser This commit implements the Web IDL USVString conversion, which mandates all unpaired Unicode surrogates be turned into U+FFFD REPLACEMENT CHARACTER. It also disallows Symbols to be used as USVString per spec. Certain functions call into C++ methods in the binding that use the Utf8Value class to access string arguments. Utf8Value already does the normalization using V8's String::Write, so in those cases, instead of doing the full USVString normalization, only a symbol check is done (`'' + val`, which uses ES's ToString, versus `String()` which has special provisions for symbols). PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 98 +++++--- src/node_url.cc | 53 ++++ test/fixtures/url-setter-tests-additional.js | 237 ++++++++++++++++++ test/fixtures/url-tests-additional.js | 30 +++ .../test-whatwg-url-searchparams-append.js | 9 + ...est-whatwg-url-searchparams-constructor.js | 16 ++ .../test-whatwg-url-searchparams-delete.js | 6 + .../test-whatwg-url-searchparams-get.js | 6 + .../test-whatwg-url-searchparams-getall.js | 6 + .../test-whatwg-url-searchparams-has.js | 6 + .../test-whatwg-url-searchparams-set.js | 9 + test/parallel/test-whatwg-url-searchparams.js | 32 ++- test/parallel/test-whatwg-url-setters.js | 45 ++++ 13 files changed, 509 insertions(+), 44 deletions(-) create mode 100644 test/fixtures/url-setter-tests-additional.js diff --git a/lib/internal/url.js b/lib/internal/url.js index c646a757024f34..a2c255f765cdc6 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -23,6 +23,18 @@ const IteratorPrototype = Object.getPrototypeOf( Object.getPrototypeOf([][Symbol.iterator]()) ); +const unpairedSurrogateRe = + /([^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/; +function toUSVString(val) { + const str = '' + val; + // As of V8 5.5, `str.search()` (and `unpairedSurrogateRe[@@search]()`) are + // slower than `unpairedSurrogateRe.exec()`. + const match = unpairedSurrogateRe.exec(str); + if (!match) + return str; + return binding.toUSVString(str, match.index); +} + class OpaqueOrigin { toString() { return 'null'; @@ -108,7 +120,6 @@ function onParseError(flags, input) { // Reused by URL constructor and URL#href setter. function parse(url, input, base) { - input = String(input); const base_context = base ? base[context] : undefined; url[context] = new StorageObject(); binding.parse(input.trim(), -1, @@ -203,8 +214,10 @@ function onParseHashComplete(flags, protocol, username, password, class URL { constructor(input, base) { + // toUSVString is not needed. + input = '' + input; if (base !== undefined && !(base instanceof URL)) - base = new URL(String(base)); + base = new URL(base); parse(this, input, base); } @@ -312,6 +325,8 @@ Object.defineProperties(URL.prototype, { return this[kFormat]({}); }, set(input) { + // toUSVString is not needed. + input = '' + input; parse(this, input); } }, @@ -329,7 +344,8 @@ Object.defineProperties(URL.prototype, { return this[context].scheme; }, set(scheme) { - scheme = String(scheme); + // toUSVString is not needed. + scheme = '' + scheme; if (scheme.length === 0) return; binding.parse(scheme, binding.kSchemeStart, null, this[context], @@ -343,7 +359,8 @@ Object.defineProperties(URL.prototype, { return this[context].username || ''; }, set(username) { - username = String(username); + // toUSVString is not needed. + username = '' + username; if (!this.hostname) return; const ctx = this[context]; @@ -363,7 +380,8 @@ Object.defineProperties(URL.prototype, { return this[context].password || ''; }, set(password) { - password = String(password); + // toUSVString is not needed. + password = '' + password; if (!this.hostname) return; const ctx = this[context]; @@ -388,7 +406,8 @@ Object.defineProperties(URL.prototype, { }, set(host) { const ctx = this[context]; - host = String(host); + // toUSVString is not needed. + host = '' + host; if (this[cannotBeBase] || (this[special] && host.length === 0)) { // Cannot set the host if cannot-be-base is set or @@ -412,7 +431,8 @@ Object.defineProperties(URL.prototype, { }, set(host) { const ctx = this[context]; - host = String(host); + // toUSVString is not needed. + host = '' + host; if (this[cannotBeBase] || (this[special] && host.length === 0)) { // Cannot set the host if cannot-be-base is set or @@ -436,11 +456,12 @@ Object.defineProperties(URL.prototype, { return port === undefined ? '' : String(port); }, set(port) { + // toUSVString is not needed. + port = '' + port; const ctx = this[context]; if (!ctx.host || this[cannotBeBase] || this.protocol === 'file:') return; - port = String(port); if (port === '') { ctx.port = undefined; return; @@ -459,9 +480,11 @@ Object.defineProperties(URL.prototype, { return ctx.path !== undefined ? `/${ctx.path.join('/')}` : ''; }, set(path) { + // toUSVString is not needed. + path = '' + path; if (this[cannotBeBase]) return; - binding.parse(String(path), binding.kPathStart, null, this[context], + binding.parse(path, binding.kPathStart, null, this[context], onParsePathComplete.bind(this)); } }, @@ -474,7 +497,7 @@ Object.defineProperties(URL.prototype, { }, set(search) { const ctx = this[context]; - search = String(search); + search = toUSVString(search); if (!search) { ctx.query = null; ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY; @@ -506,7 +529,8 @@ Object.defineProperties(URL.prototype, { }, set(hash) { const ctx = this[context]; - hash = String(hash); + // toUSVString is not needed. + hash = '' + hash; if (this.protocol === 'javascript:') return; if (!hash) { @@ -649,19 +673,22 @@ class URLSearchParams { if (pair.length !== 2) { throw new TypeError('Each query pair must be a name/value tuple'); } - this[searchParams].push(String(pair[0]), String(pair[1])); + const key = toUSVString(pair[0]); + const value = toUSVString(pair[1]); + this[searchParams].push(key, value); } } else { // record this[searchParams] = []; - for (const key of Object.keys(init)) { - const value = String(init[key]); + for (var key of Object.keys(init)) { + key = toUSVString(key); + const value = toUSVString(init[key]); this[searchParams].push(key, value); } } } else { // USVString - init = String(init); + init = toUSVString(init); if (init[0] === '?') init = init.slice(1); initSearchParams(this, init); } @@ -740,8 +767,8 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { throw new TypeError('"name" and "value" arguments must be specified'); } - name = String(name); - value = String(value); + name = toUSVString(name); + value = toUSVString(value); this[searchParams].push(name, value); update(this[context], this); }, @@ -755,7 +782,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { } const list = this[searchParams]; - name = String(name); + name = toUSVString(name); for (var i = 0; i < list.length;) { const cur = list[i]; if (cur === name) { @@ -776,7 +803,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { } const list = this[searchParams]; - name = String(name); + name = toUSVString(name); for (var i = 0; i < list.length; i += 2) { if (list[i] === name) { return list[i + 1]; @@ -795,7 +822,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { const list = this[searchParams]; const values = []; - name = String(name); + name = toUSVString(name); for (var i = 0; i < list.length; i += 2) { if (list[i] === name) { values.push(list[i + 1]); @@ -813,7 +840,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { } const list = this[searchParams]; - name = String(name); + name = toUSVString(name); for (var i = 0; i < list.length; i += 2) { if (list[i] === name) { return true; @@ -831,8 +858,8 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { } const list = this[searchParams]; - name = String(name); - value = String(value); + name = toUSVString(name); + value = toUSVString(value); // If there are any name-value pairs whose name is `name`, in `list`, set // the value of the first such name-value pair to `value` and remove the @@ -1094,11 +1121,13 @@ function originFor(url, base) { } function domainToASCII(domain) { - return binding.domainToASCII(String(domain)); + // toUSVString is not needed. + return binding.domainToASCII('' + domain); } function domainToUnicode(domain) { - return binding.domainToUnicode(String(domain)); + // toUSVString is not needed. + return binding.domainToUnicode('' + domain); } // Utility function that converts a URL object into an ordinary @@ -1184,11 +1213,14 @@ function getPathFromURL(path) { return isWindows ? getPathFromURLWin32(path) : getPathFromURLPosix(path); } -exports.getPathFromURL = getPathFromURL; -exports.URL = URL; -exports.URLSearchParams = URLSearchParams; -exports.domainToASCII = domainToASCII; -exports.domainToUnicode = domainToUnicode; -exports.urlToOptions = urlToOptions; -exports.formatSymbol = kFormat; -exports.searchParamsSymbol = searchParams; +module.exports = { + toUSVString, + getPathFromURL, + URL, + URLSearchParams, + domainToASCII, + domainToUnicode, + urlToOptions, + formatSymbol: kFormat, + searchParamsSymbol: searchParams +}; diff --git a/src/node_url.cc b/src/node_url.cc index a013380b75839e..d9213738e7f894 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -20,6 +20,8 @@ #include #endif +#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD + namespace node { using v8::Array; @@ -104,6 +106,21 @@ namespace url { } #endif + // If a UTF-16 character is a low/trailing surrogate. + static inline bool IsUnicodeTrail(uint16_t c) { + return (c & 0xFC00) == 0xDC00; + } + + // If a UTF-16 character is a surrogate. + static inline bool IsUnicodeSurrogate(uint16_t c) { + return (c & 0xF800) == 0xD800; + } + + // If a UTF-16 surrogate is a low/trailing one. + static inline bool IsUnicodeSurrogateTrail(uint16_t c) { + return (c & 0x400) != 0; + } + static url_host_type ParseIPv6Host(url_host* host, const char* input, size_t length) { @@ -1356,6 +1373,41 @@ namespace url { v8::NewStringType::kNormal).ToLocalChecked()); } + static void ToUSVString(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 2); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsNumber()); + + TwoByteValue value(env->isolate(), args[0]); + const size_t n = value.length(); + + const int64_t start = args[1]->IntegerValue(env->context()).FromJust(); + CHECK_GE(start, 0); + + for (size_t i = start; i < n; i++) { + uint16_t c = value[i]; + if (!IsUnicodeSurrogate(c)) { + continue; + } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) { + value[i] = UNICODE_REPLACEMENT_CHARACTER; + } else { + uint16_t d = value[i + 1]; + if (IsUnicodeTrail(d)) { + i++; + } else { + value[i] = UNICODE_REPLACEMENT_CHARACTER; + } + } + } + + args.GetReturnValue().Set( + String::NewFromTwoByte(env->isolate(), + *value, + v8::NewStringType::kNormal, + n).ToLocalChecked()); + } + static void DomainToASCII(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); @@ -1403,6 +1455,7 @@ namespace url { Environment* env = Environment::GetCurrent(context); env->SetMethod(target, "parse", Parse); env->SetMethod(target, "encodeAuth", EncodeAuthSet); + env->SetMethod(target, "toUSVString", ToUSVString); env->SetMethod(target, "domainToASCII", DomainToASCII); env->SetMethod(target, "domainToUnicode", DomainToUnicode); diff --git a/test/fixtures/url-setter-tests-additional.js b/test/fixtures/url-setter-tests-additional.js new file mode 100644 index 00000000000000..b27ae336a28776 --- /dev/null +++ b/test/fixtures/url-setter-tests-additional.js @@ -0,0 +1,237 @@ +module.exports = { + 'username': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '\uD83D\uDE00', + 'expected': { + 'href': 'https://%F0%9F%98%80@github.com/', + 'username': '%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uD83D', + 'expected': { + 'href': 'https://%EF%BF%BD@github.com/', + 'username': '%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uD83Dnode', + 'expected': { + 'href': 'https://%EF%BF%BDnode@github.com/', + 'username': '%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uDE00', + 'expected': { + 'href': 'https://%EF%BF%BD@github.com/', + 'username': '%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uDE00node', + 'expected': { + 'href': 'https://%EF%BF%BDnode@github.com/', + 'username': '%EF%BF%BDnode' + } + } + ], + 'password': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '\uD83D\uDE00', + 'expected': { + 'href': 'https://:%F0%9F%98%80@github.com/', + 'password': '%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uD83D', + 'expected': { + 'href': 'https://:%EF%BF%BD@github.com/', + 'password': '%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uD83Dnode', + 'expected': { + 'href': 'https://:%EF%BF%BDnode@github.com/', + 'password': '%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uDE00', + 'expected': { + 'href': 'https://:%EF%BF%BD@github.com/', + 'password': '%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uDE00node', + 'expected': { + 'href': 'https://:%EF%BF%BDnode@github.com/', + 'password': '%EF%BF%BDnode' + } + } + ], + 'pathname': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '/\uD83D\uDE00', + 'expected': { + 'href': 'https://github.com/%F0%9F%98%80', + 'pathname': '/%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '/\uD83D', + 'expected': { + 'href': 'https://github.com/%EF%BF%BD', + 'pathname': '/%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '/\uD83Dnode', + 'expected': { + 'href': 'https://github.com/%EF%BF%BDnode', + 'pathname': '/%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '/\uDE00', + 'expected': { + 'href': 'https://github.com/%EF%BF%BD', + 'pathname': '/%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '/\uDE00node', + 'expected': { + 'href': 'https://github.com/%EF%BF%BDnode', + 'pathname': '/%EF%BF%BDnode' + } + } + ], + 'search': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '\uD83D\uDE00', + 'expected': { + 'href': 'https://github.com/?%F0%9F%98%80', + 'search': '?%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uD83D', + 'expected': { + 'href': 'https://github.com/?%EF%BF%BD', + 'search': '?%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uD83Dnode', + 'expected': { + 'href': 'https://github.com/?%EF%BF%BDnode', + 'search': '?%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uDE00', + 'expected': { + 'href': 'https://github.com/?%EF%BF%BD', + 'search': '?%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uDE00node', + 'expected': { + 'href': 'https://github.com/?%EF%BF%BDnode', + 'search': '?%EF%BF%BDnode' + } + } + ], + 'hash': [ + { + 'comment': 'Surrogate pair', + 'href': 'https://github.com/', + 'new_value': '\uD83D\uDE00', + 'expected': { + 'href': 'https://github.com/#%F0%9F%98%80', + 'hash': '#%F0%9F%98%80' + } + }, + { + 'comment': 'Unpaired low surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uD83D', + 'expected': { + 'href': 'https://github.com/#%EF%BF%BD', + 'hash': '#%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired low surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uD83Dnode', + 'expected': { + 'href': 'https://github.com/#%EF%BF%BDnode', + 'hash': '#%EF%BF%BDnode' + } + }, + { + 'comment': 'Unpaired high surrogate 1', + 'href': 'https://github.com/', + 'new_value': '\uDE00', + 'expected': { + 'href': 'https://github.com/#%EF%BF%BD', + 'hash': '#%EF%BF%BD' + } + }, + { + 'comment': 'Unpaired high surrogate 2', + 'href': 'https://github.com/', + 'new_value': '\uDE00node', + 'expected': { + 'href': 'https://github.com/#%EF%BF%BDnode', + 'hash': '#%EF%BF%BDnode' + } + } + ] +}; diff --git a/test/fixtures/url-tests-additional.js b/test/fixtures/url-tests-additional.js index ffe47fb639dcba..c1c640f4bb4b7d 100644 --- a/test/fixtures/url-tests-additional.js +++ b/test/fixtures/url-tests-additional.js @@ -3,4 +3,34 @@ // This file contains test cases not part of the WPT module.exports = [ + { + // surrogate pair + 'url': 'https://github.com/nodejs/\uD83D\uDE00node', + 'protocol': 'https:', + 'pathname': '/nodejs/%F0%9F%98%80node' + }, + { + // unpaired low surrogate + 'url': 'https://github.com/nodejs/\uD83D', + 'protocol': 'https:', + 'pathname': '/nodejs/%EF%BF%BD' + }, + { + // unpaired low surrogate + 'url': 'https://github.com/nodejs/\uD83Dnode', + 'protocol': 'https:', + 'pathname': '/nodejs/%EF%BF%BDnode' + }, + { + // unmatched high surrogate + 'url': 'https://github.com/nodejs/\uDE00', + 'protocol': 'https:', + 'pathname': '/nodejs/%EF%BF%BD' + }, + { + // unmatched high surrogate + 'url': 'https://github.com/nodejs/\uDE00node', + 'protocol': 'https:', + 'pathname': '/nodejs/%EF%BF%BDnode' + } ]; diff --git a/test/parallel/test-whatwg-url-searchparams-append.js b/test/parallel/test-whatwg-url-searchparams-append.js index 2e3a33b26307c3..67eddbcc503e1e 100644 --- a/test/parallel/test-whatwg-url-searchparams-append.js +++ b/test/parallel/test-whatwg-url-searchparams-append.js @@ -57,4 +57,13 @@ test(function() { assert.throws(() => { params.set('a'); }, /^TypeError: "name" and "value" arguments must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.set(obj, 'b'), /^Error: toString$/); + assert.throws(() => params.set('a', obj), /^Error: toString$/); + assert.throws(() => params.set(sym, 'b'), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => params.set('a', sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index d57373e727ac51..8ccd8f9427f160 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -207,3 +207,19 @@ test(() => { assert.throws(() => new URLSearchParams([{ [Symbol.iterator]: 42 }]), /^TypeError: Each query pair must be iterable$/); } + +{ + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + + assert.throws(() => new URLSearchParams({ a: obj }), /^Error: toString$/); + assert.throws(() => new URLSearchParams([['a', obj]]), /^Error: toString$/); + assert.throws(() => new URLSearchParams(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => new URLSearchParams({ a: sym }), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => new URLSearchParams([[sym, 'a']]), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => new URLSearchParams([['a', sym]]), + /^TypeError: Cannot convert a Symbol value to a string$/); +} diff --git a/test/parallel/test-whatwg-url-searchparams-delete.js b/test/parallel/test-whatwg-url-searchparams-delete.js index c6235263f22bad..d0bae75b4718a8 100644 --- a/test/parallel/test-whatwg-url-searchparams-delete.js +++ b/test/parallel/test-whatwg-url-searchparams-delete.js @@ -51,6 +51,12 @@ test(function() { assert.throws(() => { params.delete(); }, /^TypeError: "name" argument must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.delete(obj), /^Error: toString$/); + assert.throws(() => params.delete(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } // https://github.com/nodejs/node/issues/10480 diff --git a/test/parallel/test-whatwg-url-searchparams-get.js b/test/parallel/test-whatwg-url-searchparams-get.js index 3a46993214a997..2244fc28612755 100644 --- a/test/parallel/test-whatwg-url-searchparams-get.js +++ b/test/parallel/test-whatwg-url-searchparams-get.js @@ -42,4 +42,10 @@ test(function() { assert.throws(() => { params.get(); }, /^TypeError: "name" argument must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.get(obj), /^Error: toString$/); + assert.throws(() => params.get(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams-getall.js b/test/parallel/test-whatwg-url-searchparams-getall.js index df055e009e7e4d..921a6c9bc66da2 100644 --- a/test/parallel/test-whatwg-url-searchparams-getall.js +++ b/test/parallel/test-whatwg-url-searchparams-getall.js @@ -46,4 +46,10 @@ test(function() { assert.throws(() => { params.getAll(); }, /^TypeError: "name" argument must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.getAll(obj), /^Error: toString$/); + assert.throws(() => params.getAll(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams-has.js b/test/parallel/test-whatwg-url-searchparams-has.js index 1be9cf6121593e..9d7272f999c653 100644 --- a/test/parallel/test-whatwg-url-searchparams-has.js +++ b/test/parallel/test-whatwg-url-searchparams-has.js @@ -45,4 +45,10 @@ test(function() { assert.throws(() => { params.has(); }, /^TypeError: "name" argument must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.has(obj), /^Error: toString$/); + assert.throws(() => params.has(sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams-set.js b/test/parallel/test-whatwg-url-searchparams-set.js index e78ce4763158b5..0eee7b5c9a0130 100644 --- a/test/parallel/test-whatwg-url-searchparams-set.js +++ b/test/parallel/test-whatwg-url-searchparams-set.js @@ -43,4 +43,13 @@ test(function() { assert.throws(() => { params.set('a'); }, /^TypeError: "name" and "value" arguments must be specified$/); + + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + assert.throws(() => params.append(obj, 'b'), /^Error: toString$/); + assert.throws(() => params.append('a', obj), /^Error: toString$/); + assert.throws(() => params.append(sym, 'b'), + /^TypeError: Cannot convert a Symbol value to a string$/); + assert.throws(() => params.append('a', sym), + /^TypeError: Cannot convert a Symbol value to a string$/); } diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js index 36fac3a2307ecc..e0d1826596704c 100644 --- a/test/parallel/test-whatwg-url-searchparams.js +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -5,8 +5,14 @@ const assert = require('assert'); const URL = require('url').URL; // Tests below are not from WPT. -const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%5Bobject%20Object%5D'; -const values = ['a', 1, true, undefined, null, {}]; +const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' + + '&a=%EF%BF%BD&a=%F0%9F%98%80&a=%EF%BF%BD%EF%BF%BD' + + '&a=%5Bobject%20Object%5D'; +const values = ['a', 1, true, undefined, null, '\uD83D', '\uDE00', + '\uD83D\uDE00', '\uDE00\uD83D', {}]; +const normalizedValues = ['a', '1', 'true', 'undefined', 'null', '\uFFFD', + '\uFFFD', '\uD83D\uDE00', '\uFFFD\uFFFD', + '[object Object]']; const m = new URL('http://example.org'); const sp = m.searchParams; @@ -27,7 +33,7 @@ assert.strictEqual(sp.toString(), ''); values.forEach((i) => sp.append('a', i)); assert(sp.has('a')); -assert.strictEqual(sp.getAll('a').length, 6); +assert.strictEqual(sp.getAll('a').length, values.length); assert.strictEqual(sp.get('a'), 'a'); assert.strictEqual(sp.toString(), serialized); @@ -39,23 +45,27 @@ assert.strictEqual(sp[Symbol.iterator], sp.entries); let key, val; let n = 0; for ([key, val] of sp) { - assert.strictEqual(key, 'a'); - assert.strictEqual(val, String(values[n++])); + assert.strictEqual(key, 'a', n); + assert.strictEqual(val, normalizedValues[n], n); + n++; } n = 0; for (key of sp.keys()) { - assert.strictEqual(key, 'a'); + assert.strictEqual(key, 'a', n); + n++; } n = 0; for (val of sp.values()) { - assert.strictEqual(val, String(values[n++])); + assert.strictEqual(val, normalizedValues[n], n); + n++; } n = 0; sp.forEach(function(val, key, obj) { - assert.strictEqual(this, undefined); - assert.strictEqual(key, 'a'); - assert.strictEqual(val, String(values[n++])); - assert.strictEqual(obj, sp); + assert.strictEqual(this, undefined, n); + assert.strictEqual(key, 'a', n); + assert.strictEqual(val, normalizedValues[n], n); + assert.strictEqual(obj, sp, n); + n++; }); sp.forEach(function() { assert.strictEqual(this, m); diff --git a/test/parallel/test-whatwg-url-setters.js b/test/parallel/test-whatwg-url-setters.js index 63ebba84918945..6e1f4bccbd2314 100644 --- a/test/parallel/test-whatwg-url-setters.js +++ b/test/parallel/test-whatwg-url-setters.js @@ -1,9 +1,12 @@ 'use strict'; const common = require('../common'); +const assert = require('assert'); const path = require('path'); const URL = require('url').URL; const { test, assert_equals } = common.WPT; +const additionalTestCases = require( + path.join(common.fixturesDir, 'url-setter-tests-additional.js')); if (!common.hasIntl) { // A handful of the tests fail when ICU is not included. @@ -76,3 +79,45 @@ function runURLSettersTests(all_test_cases) { startURLSettersTests() /* eslint-enable */ + +// Tests below are not from WPT. + +{ + for (const attributeToBeSet in additionalTestCases) { + if (attributeToBeSet === 'comment') { + continue; + } + const testCases = additionalTestCases[attributeToBeSet]; + for (const testCase of testCases) { + let name = `Setting <${testCase.href}>.${attributeToBeSet}` + + ` = "${testCase.new_value}"`; + if ('comment' in testCase) { + name += ' ' + testCase.comment; + } + test(function() { + const url = new URL(testCase.href); + url[attributeToBeSet] = testCase.new_value; + for (const attribute in testCase.expected) { + assert_equals(url[attribute], testCase.expected[attribute]); + } + }, 'URL: ' + name); + } + } +} + +{ + const url = new URL('http://example.com/'); + const obj = { toString() { throw new Error('toString'); } }; + const sym = Symbol(); + const props = Object.getOwnPropertyDescriptors(Object.getPrototypeOf(url)); + for (const [name, { set }] of Object.entries(props)) { + if (set) { + assert.throws(() => url[name] = obj, + /^Error: toString$/, + `url.${name} = { toString() { throw ... } }`); + assert.throws(() => url[name] = sym, + /^TypeError: Cannot convert a Symbol value to a string$/, + `url.${name} = ${String(sym)}`); + } + } +} From c3366a592bd6de0df8fbc5d3655e1f36aa41c3e2 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 7 Mar 2017 19:31:29 -0800 Subject: [PATCH 04/21] url: prioritize toString when stringifying The ES addition operator calls the ToPrimitive() abstract operation without hint String, leading a subsequent OrdinaryToPrimitive() to call valueOf() first on an object rather than the desired toString(). Instead, use template literals which directly call ToString() abstract operation, per Web IDL spec. PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: b610a4db1c2919f887119 "url: enforce valid UTF-8 in WHATWG parser" Refs: https://github.com/nodejs/node/commit/b610a4db1c2919f88711962f5797f25ecb1cd36b#commitcomment-21200056 Refs: https://tc39.github.io/ecma262/#sec-addition-operator-plus-runtime-semantics-evaluation Refs: https://tc39.github.io/ecma262/#sec-template-literals-runtime-semantics-evaluation Reviewed-By: James M Snell --- lib/internal/url.js | 26 +++++++++---------- .../test-whatwg-url-searchparams-append.js | 5 +++- ...est-whatwg-url-searchparams-constructor.js | 5 +++- .../test-whatwg-url-searchparams-delete.js | 5 +++- .../test-whatwg-url-searchparams-get.js | 5 +++- .../test-whatwg-url-searchparams-getall.js | 5 +++- .../test-whatwg-url-searchparams-has.js | 5 +++- .../test-whatwg-url-searchparams-set.js | 5 +++- test/parallel/test-whatwg-url-setters.js | 5 +++- 9 files changed, 45 insertions(+), 21 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index a2c255f765cdc6..56fbfd27070863 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -26,7 +26,7 @@ const IteratorPrototype = Object.getPrototypeOf( const unpairedSurrogateRe = /([^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/; function toUSVString(val) { - const str = '' + val; + const str = `${val}`; // As of V8 5.5, `str.search()` (and `unpairedSurrogateRe[@@search]()`) are // slower than `unpairedSurrogateRe.exec()`. const match = unpairedSurrogateRe.exec(str); @@ -215,7 +215,7 @@ function onParseHashComplete(flags, protocol, username, password, class URL { constructor(input, base) { // toUSVString is not needed. - input = '' + input; + input = `${input}`; if (base !== undefined && !(base instanceof URL)) base = new URL(base); parse(this, input, base); @@ -326,7 +326,7 @@ Object.defineProperties(URL.prototype, { }, set(input) { // toUSVString is not needed. - input = '' + input; + input = `${input}`; parse(this, input); } }, @@ -345,7 +345,7 @@ Object.defineProperties(URL.prototype, { }, set(scheme) { // toUSVString is not needed. - scheme = '' + scheme; + scheme = `${scheme}`; if (scheme.length === 0) return; binding.parse(scheme, binding.kSchemeStart, null, this[context], @@ -360,7 +360,7 @@ Object.defineProperties(URL.prototype, { }, set(username) { // toUSVString is not needed. - username = '' + username; + username = `${username}`; if (!this.hostname) return; const ctx = this[context]; @@ -381,7 +381,7 @@ Object.defineProperties(URL.prototype, { }, set(password) { // toUSVString is not needed. - password = '' + password; + password = `${password}`; if (!this.hostname) return; const ctx = this[context]; @@ -407,7 +407,7 @@ Object.defineProperties(URL.prototype, { set(host) { const ctx = this[context]; // toUSVString is not needed. - host = '' + host; + host = `${host}`; if (this[cannotBeBase] || (this[special] && host.length === 0)) { // Cannot set the host if cannot-be-base is set or @@ -432,7 +432,7 @@ Object.defineProperties(URL.prototype, { set(host) { const ctx = this[context]; // toUSVString is not needed. - host = '' + host; + host = `${host}`; if (this[cannotBeBase] || (this[special] && host.length === 0)) { // Cannot set the host if cannot-be-base is set or @@ -457,7 +457,7 @@ Object.defineProperties(URL.prototype, { }, set(port) { // toUSVString is not needed. - port = '' + port; + port = `${port}`; const ctx = this[context]; if (!ctx.host || this[cannotBeBase] || this.protocol === 'file:') @@ -481,7 +481,7 @@ Object.defineProperties(URL.prototype, { }, set(path) { // toUSVString is not needed. - path = '' + path; + path = `${path}`; if (this[cannotBeBase]) return; binding.parse(path, binding.kPathStart, null, this[context], @@ -530,7 +530,7 @@ Object.defineProperties(URL.prototype, { set(hash) { const ctx = this[context]; // toUSVString is not needed. - hash = '' + hash; + hash = `${hash}`; if (this.protocol === 'javascript:') return; if (!hash) { @@ -1122,12 +1122,12 @@ function originFor(url, base) { function domainToASCII(domain) { // toUSVString is not needed. - return binding.domainToASCII('' + domain); + return binding.domainToASCII(`${domain}`); } function domainToUnicode(domain) { // toUSVString is not needed. - return binding.domainToUnicode('' + domain); + return binding.domainToUnicode(`${domain}`); } // Utility function that converts a URL object into an ordinary diff --git a/test/parallel/test-whatwg-url-searchparams-append.js b/test/parallel/test-whatwg-url-searchparams-append.js index 67eddbcc503e1e..ff4a568c303668 100644 --- a/test/parallel/test-whatwg-url-searchparams-append.js +++ b/test/parallel/test-whatwg-url-searchparams-append.js @@ -58,7 +58,10 @@ test(function() { params.set('a'); }, /^TypeError: "name" and "value" arguments must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.set(obj, 'b'), /^Error: toString$/); assert.throws(() => params.set('a', obj), /^Error: toString$/); diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index 8ccd8f9427f160..236d01396095f1 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -209,7 +209,10 @@ test(() => { } { - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => new URLSearchParams({ a: obj }), /^Error: toString$/); diff --git a/test/parallel/test-whatwg-url-searchparams-delete.js b/test/parallel/test-whatwg-url-searchparams-delete.js index d0bae75b4718a8..589fbc2f8698b5 100644 --- a/test/parallel/test-whatwg-url-searchparams-delete.js +++ b/test/parallel/test-whatwg-url-searchparams-delete.js @@ -52,7 +52,10 @@ test(function() { params.delete(); }, /^TypeError: "name" argument must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.delete(obj), /^Error: toString$/); assert.throws(() => params.delete(sym), diff --git a/test/parallel/test-whatwg-url-searchparams-get.js b/test/parallel/test-whatwg-url-searchparams-get.js index 2244fc28612755..5e81be4f32cc1d 100644 --- a/test/parallel/test-whatwg-url-searchparams-get.js +++ b/test/parallel/test-whatwg-url-searchparams-get.js @@ -43,7 +43,10 @@ test(function() { params.get(); }, /^TypeError: "name" argument must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.get(obj), /^Error: toString$/); assert.throws(() => params.get(sym), diff --git a/test/parallel/test-whatwg-url-searchparams-getall.js b/test/parallel/test-whatwg-url-searchparams-getall.js index 921a6c9bc66da2..f80f45d5427e77 100644 --- a/test/parallel/test-whatwg-url-searchparams-getall.js +++ b/test/parallel/test-whatwg-url-searchparams-getall.js @@ -47,7 +47,10 @@ test(function() { params.getAll(); }, /^TypeError: "name" argument must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.getAll(obj), /^Error: toString$/); assert.throws(() => params.getAll(sym), diff --git a/test/parallel/test-whatwg-url-searchparams-has.js b/test/parallel/test-whatwg-url-searchparams-has.js index 9d7272f999c653..f2696063b998a1 100644 --- a/test/parallel/test-whatwg-url-searchparams-has.js +++ b/test/parallel/test-whatwg-url-searchparams-has.js @@ -46,7 +46,10 @@ test(function() { params.has(); }, /^TypeError: "name" argument must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.has(obj), /^Error: toString$/); assert.throws(() => params.has(sym), diff --git a/test/parallel/test-whatwg-url-searchparams-set.js b/test/parallel/test-whatwg-url-searchparams-set.js index 0eee7b5c9a0130..acd62955d22a44 100644 --- a/test/parallel/test-whatwg-url-searchparams-set.js +++ b/test/parallel/test-whatwg-url-searchparams-set.js @@ -44,7 +44,10 @@ test(function() { params.set('a'); }, /^TypeError: "name" and "value" arguments must be specified$/); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); assert.throws(() => params.append(obj, 'b'), /^Error: toString$/); assert.throws(() => params.append('a', obj), /^Error: toString$/); diff --git a/test/parallel/test-whatwg-url-setters.js b/test/parallel/test-whatwg-url-setters.js index 6e1f4bccbd2314..253415dad6e5a9 100644 --- a/test/parallel/test-whatwg-url-setters.js +++ b/test/parallel/test-whatwg-url-setters.js @@ -107,7 +107,10 @@ startURLSettersTests() { const url = new URL('http://example.com/'); - const obj = { toString() { throw new Error('toString'); } }; + const obj = { + toString() { throw new Error('toString'); }, + valueOf() { throw new Error('valueOf'); } + }; const sym = Symbol(); const props = Object.getOwnPropertyDescriptors(Object.getPrototypeOf(url)); for (const [name, { set }] of Object.entries(props)) { From 6b2cb6dd2efcca42a194383c532967b2dde4619f Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Fri, 3 Feb 2017 17:34:47 -0800 Subject: [PATCH 05/21] url: spec-compliant URLSearchParams serializer PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- ...cy-vs-whatwg-url-searchparams-serialize.js | 2 +- lib/internal/url.js | 105 ++++++++++++++++-- test/fixtures/url-tests.js | 2 +- test/parallel/test-whatwg-url-constructor.js | 6 +- ...est-whatwg-url-searchparams-constructor.js | 4 +- ...est-whatwg-url-searchparams-stringifier.js | 24 ++-- test/parallel/test-whatwg-url-searchparams.js | 2 +- 7 files changed, 113 insertions(+), 32 deletions(-) diff --git a/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js b/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js index 7e56b5fba6e4f8..2b8d2c36a810b3 100644 --- a/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js +++ b/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js @@ -7,7 +7,7 @@ const inputs = require('../fixtures/url-inputs.js').searchParams; const bench = common.createBenchmark(main, { type: Object.keys(inputs), method: ['legacy', 'whatwg'], - n: [1e5] + n: [1e6] }); function useLegacy(n, input, prop) { diff --git a/lib/internal/url.js b/lib/internal/url.js index 56fbfd27070863..0e43364a792ae1 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1,7 +1,7 @@ 'use strict'; const util = require('util'); -const { StorageObject } = require('internal/querystring'); +const { hexTable, StorageObject } = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); const cannotBeBase = Symbol('cannot-be-base'); @@ -594,18 +594,99 @@ function getParamsFromObject(obj) { return values; } -function getObjectFromParams(array) { - const obj = new StorageObject(); - for (var i = 0; i < array.length; i += 2) { - const name = array[i]; - const value = array[i + 1]; - if (obj[name]) { - obj[name].push(value); - } else { - obj[name] = [value]; +// Adapted from querystring's implementation. +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer +const noEscape = [ +//0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 0x70 - 0x7F +]; + +// Special version of hexTable that uses `+` for U+0020 SPACE. +const paramHexTable = hexTable.slice(); +paramHexTable[0x20] = '+'; + +function escapeParam(str) { + const len = str.length; + if (len === 0) + return ''; + + var out = ''; + var lastPos = 0; + + for (var i = 0; i < len; i++) { + var c = str.charCodeAt(i); + + // ASCII + if (c < 0x80) { + if (noEscape[c] === 1) + continue; + if (lastPos < i) + out += str.slice(lastPos, i); + lastPos = i + 1; + out += paramHexTable[c]; + continue; + } + + if (lastPos < i) + out += str.slice(lastPos, i); + + // Multi-byte characters ... + if (c < 0x800) { + lastPos = i + 1; + out += paramHexTable[0xC0 | (c >> 6)] + + paramHexTable[0x80 | (c & 0x3F)]; + continue; + } + if (c < 0xD800 || c >= 0xE000) { + lastPos = i + 1; + out += paramHexTable[0xE0 | (c >> 12)] + + paramHexTable[0x80 | ((c >> 6) & 0x3F)] + + paramHexTable[0x80 | (c & 0x3F)]; + continue; } + // Surrogate pair + ++i; + var c2; + if (i < len) + c2 = str.charCodeAt(i) & 0x3FF; + else { + // This branch should never happen because all URLSearchParams entries + // should already be converted to USVString. But, included for + // completion's sake anyway. + c2 = 0; + } + lastPos = i + 1; + c = 0x10000 + (((c & 0x3FF) << 10) | c2); + out += paramHexTable[0xF0 | (c >> 18)] + + paramHexTable[0x80 | ((c >> 12) & 0x3F)] + + paramHexTable[0x80 | ((c >> 6) & 0x3F)] + + paramHexTable[0x80 | (c & 0x3F)]; } - return obj; + if (lastPos === 0) + return str; + if (lastPos < len) + return out + str.slice(lastPos); + return out; +} + +// application/x-www-form-urlencoded serializer +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-serializer +function serializeParams(array) { + const len = array.length; + if (len === 0) + return ''; + + var output = `${escapeParam(array[0])}=${escapeParam(array[1])}`; + for (var i = 2; i < len; i += 2) + output += `&${escapeParam(array[i])}=${escapeParam(array[i + 1])}`; + return output; } // Mainly to mitigate func-name-matching ESLint rule @@ -990,7 +1071,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { throw new TypeError('Value of `this` is not a URLSearchParams'); } - return querystring.stringify(getObjectFromParams(this[searchParams])); + return serializeParams(this[searchParams]); } }); diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index 0e510eb366d0f2..a4e7de9f26b199 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -4639,7 +4639,7 @@ module.exports = "port": "", "pathname": "/foo/bar", "search": "??a=b&c=d", - // "searchParams": "%3Fa=b&c=d", + "searchParams": "%3Fa=b&c=d", "hash": "" }, "# Scheme only", diff --git a/test/parallel/test-whatwg-url-constructor.js b/test/parallel/test-whatwg-url-constructor.js index c5d70b3f4c1544..c2773b9af105fb 100644 --- a/test/parallel/test-whatwg-url-constructor.js +++ b/test/parallel/test-whatwg-url-constructor.js @@ -120,12 +120,12 @@ function runURLSearchParamTests() { // And in the other direction, altering searchParams propagates // back to 'search'. searchParams.append('i', ' j ') - // assert_equals(url.search, '?e=f&g=h&i=+j+') - // assert_equals(url.searchParams.toString(), 'e=f&g=h&i=+j+') + assert_equals(url.search, '?e=f&g=h&i=+j+') + assert_equals(url.searchParams.toString(), 'e=f&g=h&i=+j+') assert_equals(searchParams.get('i'), ' j ') searchParams.set('e', 'updated') - // assert_equals(url.search, '?e=updated&g=h&i=+j+') + assert_equals(url.search, '?e=updated&g=h&i=+j+') assert_equals(searchParams.get('e'), 'updated') var url2 = bURL('http://example.org/file??a=b&c=d') diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index 236d01396095f1..da459fe99c7fb8 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -11,7 +11,7 @@ const { /* eslint-disable */ var params; // Strict mode fix for WPT. /* WPT Refs: - https://github.com/w3c/web-platform-tests/blob/405394a/url/urlsearchparams-constructor.html + https://github.com/w3c/web-platform-tests/blob/e94c604916/url/urlsearchparams-constructor.html License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ test(function() { @@ -154,7 +154,7 @@ test(function() { }, "Constructor with sequence of sequences of strings"); [ -// { "input": {"+": "%C2"}, "output": [[" ", "\uFFFD"]], "name": "object with +" }, + { "input": {"+": "%C2"}, "output": [["+", "%C2"]], "name": "object with +" }, { "input": {c: "x", a: "?"}, "output": [["c", "x"], ["a", "?"]], "name": "object with two keys" }, { "input": [["c", "x"], ["a", "?"]], "output": [["c", "x"], ["a", "?"]], "name": "array with two keys" } ].forEach((val) => { diff --git a/test/parallel/test-whatwg-url-searchparams-stringifier.js b/test/parallel/test-whatwg-url-searchparams-stringifier.js index 7e85b9726167ad..ac09979e027b7c 100644 --- a/test/parallel/test-whatwg-url-searchparams-stringifier.js +++ b/test/parallel/test-whatwg-url-searchparams-stringifier.js @@ -10,14 +10,14 @@ const { test, assert_equals } = common.WPT; https://github.com/w3c/web-platform-tests/blob/8791bed/url/urlsearchparams-stringifier.html License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ -// test(function() { -// var params = new URLSearchParams(); -// params.append('a', 'b c'); -// assert_equals(params + '', 'a=b+c'); -// params.delete('a'); -// params.append('a b', 'c'); -// assert_equals(params + '', 'a+b=c'); -// }, 'Serialize space'); +test(function() { + var params = new URLSearchParams(); + params.append('a', 'b c'); + assert_equals(params + '', 'a=b+c'); + params.delete('a'); + params.append('a b', 'c'); + assert_equals(params + '', 'a+b=c'); +}, 'Serialize space'); test(function() { var params = new URLSearchParams(); @@ -112,10 +112,10 @@ test(function() { test(function() { var params; - // params = new URLSearchParams('a=b&c=d&&e&&'); - // assert_equals(params.toString(), 'a=b&c=d&e='); - // params = new URLSearchParams('a = b &a=b&c=d%20'); - // assert_equals(params.toString(), 'a+=+b+&a=b&c=d+'); + params = new URLSearchParams('a=b&c=d&&e&&'); + assert_equals(params.toString(), 'a=b&c=d&e='); + params = new URLSearchParams('a = b &a=b&c=d%20'); + assert_equals(params.toString(), 'a+=+b+&a=b&c=d+'); // The lone '=' _does_ survive the roundtrip. params = new URLSearchParams('a=&a=b'); assert_equals(params.toString(), 'a=&a=b'); diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js index e0d1826596704c..7d6df646407269 100644 --- a/test/parallel/test-whatwg-url-searchparams.js +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -7,7 +7,7 @@ const URL = require('url').URL; // Tests below are not from WPT. const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' + '&a=%EF%BF%BD&a=%F0%9F%98%80&a=%EF%BF%BD%EF%BF%BD' + - '&a=%5Bobject%20Object%5D'; + '&a=%5Bobject+Object%5D'; const values = ['a', 1, true, undefined, null, '\uD83D', '\uDE00', '\uD83D\uDE00', '\uDE00\uD83D', {}]; const normalizedValues = ['a', '1', 'true', 'undefined', 'null', '\uFFFD', From 7e7fd662fb94e8f31230a304b9d06fd016dcc047 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Mar 2017 23:41:57 -0700 Subject: [PATCH 06/21] src: remove explicit UTF-8 validity check in url This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: https://github.com/nodejs/node/pull/12507 Refs: c2a302c50b3787666339371 "src: do not ignore IDNA conversion error" Reviewed-By: James M Snell --- src/node_url.cc | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index d9213738e7f894..6cd78c2c6c04c8 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -15,11 +15,6 @@ #include #include -#if defined(NODE_HAVE_I18N_SUPPORT) -#include -#include -#endif - #define UNICODE_REPLACEMENT_CHARACTER 0xFFFD namespace node { @@ -74,21 +69,6 @@ namespace url { output->assign(*buf, buf.length()); return true; } - - // Unfortunately there's not really a better way to do this. - // Iterate through each encoded codepoint and verify that - // it is a valid unicode codepoint. - static bool IsValidUTF8(std::string* input) { - const char* p = input->c_str(); - int32_t len = input->length(); - for (int32_t i = 0; i < len;) { - UChar32 c; - U8_NEXT_UNSAFE(p, i, c); - if (!U_IS_UNICODE_CHAR(c)) - return false; - } - return true; - } #else // Intentional non-ops if ICU is not present. static inline bool ToUnicode(std::string* input, std::string* output) { @@ -100,10 +80,6 @@ namespace url { *output = *input; return true; } - - static bool IsValidUTF8(std::string* input) { - return true; - } #endif // If a UTF-16 character is a low/trailing surrogate. @@ -355,12 +331,6 @@ namespace url { // First, we have to percent decode PercentDecode(input, length, &decoded); - // If there are any invalid UTF8 byte sequences, we have to fail. - // Unfortunately this means iterating through the string and checking - // each decoded codepoint. - if (!IsValidUTF8(&decoded)) - goto end; - // Then we have to punycode toASCII if (!ToASCII(&decoded, &decoded)) goto end; From 4a94c2d6203af6e8169475fbd8034322a86d134a Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Mar 2017 20:54:13 -0700 Subject: [PATCH 07/21] querystring: move isHexTable to internal PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/querystring.js | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/lib/internal/querystring.js b/lib/internal/querystring.js index 2f8d77d3e9d2e7..c5dc0f63c7b30b 100644 --- a/lib/internal/querystring.js +++ b/lib/internal/querystring.js @@ -4,6 +4,25 @@ const hexTable = new Array(256); for (var i = 0; i < 256; ++i) hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); +const isHexTable = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 15 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 48 - 63 + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64 - 79 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80 - 95 + 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96 - 111 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 112 - 127 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128 ... + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // ... 256 +]; + // Instantiating this is faster than explicitly calling `Object.create(null)` // to get a "clean" empty object (tested with v8 v4.9). function StorageObject() {} @@ -11,5 +30,6 @@ StorageObject.prototype = Object.create(null); module.exports = { hexTable, + isHexTable, StorageObject }; From d86f0d702afa8277e6f6bf1f7d4cd786420a05d3 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 14 Mar 2017 21:01:04 -0700 Subject: [PATCH 08/21] url: spec-compliant URLSearchParams parser The entire `URLSearchParams` class is now fully spec-compliant. PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/10821 Reviewed-By: James M Snell --- ...legacy-vs-whatwg-url-searchparams-parse.js | 2 +- lib/internal/url.js | 115 +++++++++++++++--- test/fixtures/url-searchparams.js | 68 +++++++++++ test/parallel/test-whatwg-url-searchparams.js | 29 ++++- 4 files changed, 197 insertions(+), 17 deletions(-) create mode 100644 test/fixtures/url-searchparams.js diff --git a/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js b/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js index 86714df6c196a7..b4a80af4e5eabd 100644 --- a/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js +++ b/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js @@ -7,7 +7,7 @@ const inputs = require('../fixtures/url-inputs.js').searchParams; const bench = common.createBenchmark(main, { type: Object.keys(inputs), method: ['legacy', 'whatwg'], - n: [1e5] + n: [1e6] }); function useLegacy(n, input) { diff --git a/lib/internal/url.js b/lib/internal/url.js index 0e43364a792ae1..7a6ff227ed4191 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1,7 +1,11 @@ 'use strict'; const util = require('util'); -const { hexTable, StorageObject } = require('internal/querystring'); +const { + hexTable, + isHexTable, + StorageObject +} = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); const cannotBeBase = Symbol('cannot-be-base'); @@ -575,23 +579,106 @@ function initSearchParams(url, init) { url[searchParams] = []; return; } - url[searchParams] = getParamsFromObject(querystring.parse(init)); + url[searchParams] = parseParams(init); } -function getParamsFromObject(obj) { - const keys = Object.keys(obj); - const values = []; - for (var i = 0; i < keys.length; i++) { - const name = keys[i]; - const value = obj[name]; - if (Array.isArray(value)) { - for (const item of value) - values.push(name, item); - } else { - values.push(name, value); +// application/x-www-form-urlencoded parser +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-parser +function parseParams(qs) { + const out = []; + var pairStart = 0; + var lastPos = 0; + var seenSep = false; + var buf = ''; + var encoded = false; + var encodeCheck = 0; + var i; + for (i = 0; i < qs.length; ++i) { + const code = qs.charCodeAt(i); + + // Try matching key/value pair separator + if (code === 38/*&*/) { + if (pairStart === i) { + // We saw an empty substring between pair separators + lastPos = pairStart = i + 1; + continue; + } + + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + // If `buf` is the key, add an empty value. + if (!seenSep) + out.push(''); + + seenSep = false; + buf = ''; + encoded = false; + encodeCheck = 0; + lastPos = pairStart = i + 1; + continue; + } + + // Try matching key/value separator (e.g. '=') if we haven't already + if (!seenSep && code === 61/*=*/) { + // Key/value separator match! + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + seenSep = true; + buf = ''; + encoded = false; + encodeCheck = 0; + lastPos = i + 1; + continue; + } + + // Handle + and percent decoding. + if (code === 43/*+*/) { + if (lastPos < i) + buf += qs.slice(lastPos, i); + buf += ' '; + lastPos = i + 1; + } else if (!encoded) { + // Try to match an (valid) encoded byte (once) to minimize unnecessary + // calls to string decoding functions + if (code === 37/*%*/) { + encodeCheck = 1; + } else if (encodeCheck > 0) { + // eslint-disable-next-line no-extra-boolean-cast + if (!!isHexTable[code]) { + if (++encodeCheck === 3) + encoded = true; + } else { + encodeCheck = 0; + } + } } } - return values; + + // Deal with any leftover key or value data + + // There is a trailing &. No more processing is needed. + if (pairStart === i) + return out; + + if (lastPos < i) + buf += qs.slice(lastPos, i); + if (encoded) + buf = querystring.unescape(buf); + out.push(buf); + + // If `buf` is the key, add an empty value. + if (!seenSep) + out.push(''); + + return out; } // Adapted from querystring's implementation. diff --git a/test/fixtures/url-searchparams.js b/test/fixtures/url-searchparams.js new file mode 100644 index 00000000000000..3b186fc97bc38b --- /dev/null +++ b/test/fixtures/url-searchparams.js @@ -0,0 +1,68 @@ +module.exports = [ + ['', '', []], + [ + 'foo=918854443121279438895193', + 'foo=918854443121279438895193', + [['foo', '918854443121279438895193']] + ], + ['foo=bar', 'foo=bar', [['foo', 'bar']]], + ['foo=bar&foo=quux', 'foo=bar&foo=quux', [['foo', 'bar'], ['foo', 'quux']]], + ['foo=1&bar=2', 'foo=1&bar=2', [['foo', '1'], ['bar', '2']]], + [ + "my%20weird%20field=q1!2%22'w%245%267%2Fz8)%3F", + 'my+weird+field=q1%212%22%27w%245%267%2Fz8%29%3F', + [['my weird field', 'q1!2"\'w$5&7/z8)?']] + ], + ['foo%3Dbaz=bar', 'foo%3Dbaz=bar', [['foo=baz', 'bar']]], + ['foo=baz=bar', 'foo=baz%3Dbar', [['foo', 'baz=bar']]], + [ + 'str=foo&arr=1&somenull&arr=2&undef=&arr=3', + 'str=foo&arr=1&somenull=&arr=2&undef=&arr=3', + [ + ['str', 'foo'], + ['arr', '1'], + ['somenull', ''], + ['arr', '2'], + ['undef', ''], + ['arr', '3'] + ] + ], + [' foo = bar ', '+foo+=+bar+', [[' foo ', ' bar ']]], + ['foo=%zx', 'foo=%25zx', [['foo', '%zx']]], + ['foo=%EF%BF%BD', 'foo=%EF%BF%BD', [['foo', '\ufffd']]], + // See: https://github.com/joyent/node/issues/3058 + ['foo&bar=baz', 'foo=&bar=baz', [['foo', ''], ['bar', 'baz']]], + ['a=b&c&d=e', 'a=b&c=&d=e', [['a', 'b'], ['c', ''], ['d', 'e']]], + ['a=b&c=&d=e', 'a=b&c=&d=e', [['a', 'b'], ['c', ''], ['d', 'e']]], + ['a=b&=c&d=e', 'a=b&=c&d=e', [['a', 'b'], ['', 'c'], ['d', 'e']]], + ['a=b&=&d=e', 'a=b&=&d=e', [['a', 'b'], ['', ''], ['d', 'e']]], + ['&&foo=bar&&', 'foo=bar', [['foo', 'bar']]], + ['&', '', []], + ['&&&&', '', []], + ['&=&', '=', [['', '']]], + ['&=&=', '=&=', [['', ''], ['', '']]], + ['=', '=', [['', '']]], + ['+', '+=', [[' ', '']]], + ['+=', '+=', [[' ', '']]], + ['=+', '=+', [['', ' ']]], + ['+=&', '+=', [[' ', '']]], + ['a&&b', 'a=&b=', [['a', ''], ['b', '']]], + ['a=a&&b=b', 'a=a&b=b', [['a', 'a'], ['b', 'b']]], + ['&a', 'a=', [['a', '']]], + ['&=', '=', [['', '']]], + ['a&a&', 'a=&a=', [['a', ''], ['a', '']]], + ['a&a&a&', 'a=&a=&a=', [['a', ''], ['a', ''], ['a', '']]], + ['a&a&a&a&', 'a=&a=&a=&a=', [['a', ''], ['a', ''], ['a', ''], ['a', '']]], + ['a=&a=value&a=', 'a=&a=value&a=', [['a', ''], ['a', 'value'], ['a', '']]], + ['foo%20bar=baz%20quux', 'foo+bar=baz+quux', [['foo bar', 'baz quux']]], + ['+foo=+bar', '+foo=+bar', [[' foo', ' bar']]], + [ + // fake percent encoding + 'foo=%©ar&baz=%A©uux&xyzzy=%©ud', + 'foo=%25%C2%A9ar&baz=%25A%C2%A9uux&xyzzy=%25%C2%A9ud', + [['foo', '%©ar'], ['baz', '%A©uux'], ['xyzzy', '%©ud']] + ], + // always preserve order of key-value pairs + ['a=1&b=2&a=3', 'a=1&b=2&a=3', [['a', '1'], ['b', '2'], ['a', '3']]], + ['?a', '%3Fa=', [['?a', '']]] +]; diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js index 7d6df646407269..c7acb7d909d98c 100644 --- a/test/parallel/test-whatwg-url-searchparams.js +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -1,8 +1,9 @@ 'use strict'; -require('../common'); +const common = require('../common'); const assert = require('assert'); -const URL = require('url').URL; +const path = require('path'); +const { URL, URLSearchParams } = require('url'); // Tests below are not from WPT. const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' + @@ -77,3 +78,27 @@ assert.throws(() => sp.forEach(1), m.search = '?a=a&b=b'; assert.strictEqual(sp.toString(), 'a=a&b=b'); + +const tests = require(path.join(common.fixturesDir, 'url-searchparams.js')); + +for (const [input, expected, parsed] of tests) { + if (input[0] !== '?') { + const sp = new URLSearchParams(input); + assert.strictEqual(String(sp), expected); + assert.deepStrictEqual(Array.from(sp), parsed); + + m.search = input; + assert.strictEqual(String(m.searchParams), expected); + assert.deepStrictEqual(Array.from(m.searchParams), parsed); + } + + { + const sp = new URLSearchParams(`?${input}`); + assert.strictEqual(String(sp), expected); + assert.deepStrictEqual(Array.from(sp), parsed); + + m.search = `?${input}`; + assert.strictEqual(String(m.searchParams), expected); + assert.deepStrictEqual(Array.from(m.searchParams), parsed); + } +} From a2a3d6ce4fd00458140b28fb7443637ae6126ded Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Wed, 22 Mar 2017 11:39:13 -0700 Subject: [PATCH 09/21] url: use a class for WHATWG url[context] The object is used as a structure, not as a map, which `StorageObject` was designed for. PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 7a6ff227ed4191..64156803d8d30e 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -3,8 +3,7 @@ const util = require('util'); const { hexTable, - isHexTable, - StorageObject + isHexTable } = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); @@ -97,6 +96,26 @@ class TupleOrigin { } } +// This class provides the internal state of a URL object. An instance of this +// class is stored in every URL object and is accessed internally by setters +// and getters. It roughly corresponds to the concept of a URL record in the +// URL Standard, with a few differences. It is also the object transported to +// the C++ binding. +// Refs: https://url.spec.whatwg.org/#concept-url +class URLContext { + constructor() { + this.flags = 0; + this.scheme = undefined; + this.username = undefined; + this.password = undefined; + this.host = undefined; + this.port = undefined; + this.path = []; + this.query = undefined; + this.fragment = undefined; + } +} + function onParseComplete(flags, protocol, username, password, host, port, path, query, fragment) { var ctx = this[context]; @@ -125,7 +144,7 @@ function onParseError(flags, input) { // Reused by URL constructor and URL#href setter. function parse(url, input, base) { const base_context = base ? base[context] : undefined; - url[context] = new StorageObject(); + url[context] = new URLContext(); binding.parse(input.trim(), -1, base_context, undefined, onParseComplete.bind(url), onParseError); From 75ef213b22770465ac36c962d6621c592713c100 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Sun, 26 Mar 2017 19:49:33 -0700 Subject: [PATCH 10/21] url: add ToObject method to native URL class Provides a factory method to convert a native URL class into a JS URL object. ```c++ Environment* env = ... URL url("http://example.org/a/b/c?query#fragment"); MaybeLocal val = url.ToObject(env); ``` PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/bootstrap_node.js | 4 ++ lib/internal/url.js | 25 +++++++++ src/env.h | 1 + src/node_url.cc | 96 ++++++++++++++++++++++++++++------ src/node_url.h | 9 ++++ 5 files changed, 118 insertions(+), 17 deletions(-) diff --git a/lib/internal/bootstrap_node.js b/lib/internal/bootstrap_node.js index 1aa2f8c23b442a..3e53d6829bc1a3 100644 --- a/lib/internal/bootstrap_node.js +++ b/lib/internal/bootstrap_node.js @@ -54,6 +54,10 @@ _process.setupRawDebug(); + // Ensure setURLConstructor() is called before the native + // URL::ToObject() method is used. + NativeModule.require('internal/url'); + Object.defineProperty(process, 'argv0', { enumerable: true, configurable: false, diff --git a/lib/internal/url.js b/lib/internal/url.js index 64156803d8d30e..5fcabb803ef473 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1400,6 +1400,31 @@ function getPathFromURL(path) { return isWindows ? getPathFromURLWin32(path) : getPathFromURLPosix(path); } +function NativeURL(ctx) { + this[context] = ctx; +} +NativeURL.prototype = URL.prototype; + +function constructUrl(flags, protocol, username, password, + host, port, path, query, fragment) { + var ctx = new URLContext(); + ctx.flags = flags; + ctx.scheme = protocol; + ctx.username = username; + ctx.password = password; + ctx.port = port; + ctx.path = path; + ctx.query = query; + ctx.fragment = fragment; + ctx.host = host; + const url = new NativeURL(ctx); + url[searchParams] = new URLSearchParams(); + url[searchParams][context] = url; + initSearchParams(url[searchParams], query); + return url; +} +binding.setURLConstructor(constructUrl); + module.exports = { toUSVString, getPathFromURL, diff --git a/src/env.h b/src/env.h index 28f9e0c1728fd9..2e3337c44fe30f 100644 --- a/src/env.h +++ b/src/env.h @@ -249,6 +249,7 @@ namespace node { V(tls_wrap_constructor_template, v8::FunctionTemplate) \ V(tty_constructor_template, v8::FunctionTemplate) \ V(udp_constructor_function, v8::Function) \ + V(url_constructor_function, v8::Function) \ V(write_wrap_constructor_function, v8::Function) \ class Environment; diff --git a/src/node_url.cc b/src/node_url.cc index 6cd78c2c6c04c8..4f3525332ebd94 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -27,9 +27,11 @@ using v8::HandleScope; using v8::Integer; using v8::Isolate; using v8::Local; +using v8::MaybeLocal; using v8::Null; using v8::Object; using v8::String; +using v8::TryCatch; using v8::Undefined; using v8::Value; @@ -1226,6 +1228,29 @@ namespace url { } } + static inline void SetArgs(Environment* env, + Local argv[], + const struct url_data* url) { + Isolate* isolate = env->isolate(); + argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); + if (url->flags & URL_FLAGS_HAS_SCHEME) + argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); + if (url->flags & URL_FLAGS_HAS_USERNAME) + argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); + if (url->flags & URL_FLAGS_HAS_PASSWORD) + argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password); + if (url->flags & URL_FLAGS_HAS_HOST) + argv[ARG_HOST] = UTF8STRING(isolate, url->host); + if (url->flags & URL_FLAGS_HAS_QUERY) + argv[ARG_QUERY] = UTF8STRING(isolate, url->query); + if (url->flags & URL_FLAGS_HAS_FRAGMENT) + argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment); + if (url->port > -1) + argv[ARG_PORT] = Integer::New(isolate, url->port); + if (url->flags & URL_FLAGS_HAS_PATH) + argv[ARG_PATH] = Copy(env, url->path); + } + static void Parse(Environment* env, Local recv, const char* input, @@ -1267,23 +1292,7 @@ namespace url { undef, undef, }; - argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - if (url.flags & URL_FLAGS_HAS_SCHEME) - argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str()); - if (url.flags & URL_FLAGS_HAS_USERNAME) - argv[ARG_USERNAME] = UTF8STRING(isolate, url.username); - if (url.flags & URL_FLAGS_HAS_PASSWORD) - argv[ARG_PASSWORD] = UTF8STRING(isolate, url.password); - if (url.flags & URL_FLAGS_HAS_HOST) - argv[ARG_HOST] = UTF8STRING(isolate, url.host); - if (url.flags & URL_FLAGS_HAS_QUERY) - argv[ARG_QUERY] = UTF8STRING(isolate, url.query); - if (url.flags & URL_FLAGS_HAS_FRAGMENT) - argv[ARG_FRAGMENT] = UTF8STRING(isolate, url.fragment); - if (url.port > -1) - argv[ARG_PORT] = Integer::New(isolate, url.port); - if (url.flags & URL_FLAGS_HAS_PATH) - argv[ARG_PATH] = Copy(env, url.path); + SetArgs(env, argv, &url); (void)cb->Call(context, recv, arraysize(argv), argv); } else if (error_cb->IsFunction()) { Local argv[2] = { undef, undef }; @@ -1418,6 +1427,58 @@ namespace url { v8::NewStringType::kNormal).ToLocalChecked()); } + // This function works by calling out to a JS function that creates and + // returns the JS URL object. Be mindful of the JS<->Native boundary + // crossing that is required. + const Local URL::ToObject(Environment* env) const { + Isolate* isolate = env->isolate(); + Local context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const Local undef = Undefined(isolate); + + if (context_.flags & URL_FLAGS_FAILED) + return Local(); + + Local argv[9] = { + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + SetArgs(env, argv, &context_); + + TryCatch try_catch(isolate); + + // The SetURLConstructor method must have been called already to + // set the constructor function used below. SetURLConstructor is + // called automatically when the internal/url.js module is loaded + // during the internal/bootstrap_node.js processing. + MaybeLocal ret = + env->url_constructor_function() + ->Call(env->context(), undef, 9, argv); + + if (ret.IsEmpty()) { + ClearFatalExceptionHandlers(env); + FatalException(isolate, try_catch); + } + + return ret.ToLocalChecked(); + } + + static void SetURLConstructor(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_EQ(args.Length(), 1); + CHECK(args[0]->IsFunction()); + env->set_url_constructor_function(args[0].As()); + } + static void Init(Local target, Local unused, Local context, @@ -1428,6 +1489,7 @@ namespace url { env->SetMethod(target, "toUSVString", ToUSVString); env->SetMethod(target, "domainToASCII", DomainToASCII); env->SetMethod(target, "domainToUnicode", DomainToUnicode); + env->SetMethod(target, "setURLConstructor", SetURLConstructor); #define XX(name, _) NODE_DEFINE_CONSTANT(target, name); FLAGS(XX) diff --git a/src/node_url.h b/src/node_url.h index b9d91782be9e59..4d18eb6f0a910d 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -4,11 +4,18 @@ #if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS #include "node.h" +#include "env.h" +#include "env-inl.h" + #include namespace node { namespace url { +using v8::Local; +using v8::Value; + + #define BIT_AT(a, i) \ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ (1 << ((unsigned int) (i) & 7)))) @@ -619,6 +626,8 @@ class URL { return ret; } + const Local ToObject(Environment* env) const; + private: struct url_data context_; }; From 5b7b775e54a541a03b93cbb14679f7f5ad2d5358 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Wed, 15 Mar 2017 19:33:08 -0700 Subject: [PATCH 11/21] src: WHATWG URL C++ parser cleanup - Clarify port state - Remove scheme flag - Clarify URL_FLAG_TERMINATED PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- src/node_url.cc | 54 ++++++++++++++++++++++++++++--------------------- src/node_url.h | 13 ++++++------ 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 4f3525332ebd94..54a2944588071c 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -494,7 +494,9 @@ namespace url { if (flags->IsInt32()) base->flags = flags->Int32Value(context).FromJust(); - GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME); + Local scheme = GET(env, base_obj, "scheme"); + base->scheme = Utf8Value(env->isolate(), scheme).out(); + GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); @@ -644,7 +646,7 @@ namespace url { state = kNoScheme; continue; } else { - url->flags |= URL_FLAGS_TERMINATED; + url->flags |= URL_FLAGS_FAILED; return; } break; @@ -654,10 +656,12 @@ namespace url { p++; continue; } else if (ch == ':' || (has_state_override && ch == kEOL)) { - buffer += ':'; if (buffer.size() > 0) { - url->flags |= URL_FLAGS_HAS_SCHEME; + buffer += ':'; url->scheme = buffer; + } else if (has_state_override) { + url->flags |= URL_FLAGS_TERMINATED; + return; } if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -672,7 +676,6 @@ namespace url { state = kFile; } else if (special && has_base && - base->flags & URL_FLAGS_HAS_SCHEME && url->scheme == base->scheme) { state = kSpecialRelativeOrAuthority; } else if (special) { @@ -692,7 +695,7 @@ namespace url { p = input; continue; } else { - url->flags |= URL_FLAGS_TERMINATED; + url->flags |= URL_FLAGS_FAILED; return; } break; @@ -702,7 +705,6 @@ namespace url { url->flags |= URL_FLAGS_FAILED; return; } else if (cannot_be_base && ch == '#') { - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = base->scheme; if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -725,12 +727,10 @@ namespace url { url->flags |= URL_FLAGS_CANNOT_BE_BASE; state = kFragment; } else if (has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme != "file:") { state = kRelative; continue; } else { - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = "file:"; url->flags |= URL_FLAGS_SPECIAL; special = true; @@ -756,7 +756,6 @@ namespace url { } break; case kRelative: - url->flags |= URL_FLAGS_HAS_SCHEME; url->scheme = base->scheme; if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -951,7 +950,6 @@ namespace url { buffer.clear(); state = kPort; if (state_override == kHostname) { - url->flags |= URL_FLAGS_TERMINATED; return; } } else if (ch == kEOL || @@ -972,7 +970,6 @@ namespace url { buffer.clear(); state = kPathStart; if (has_state_override) { - url->flags |= URL_FLAGS_TERMINATED; return; } } else { @@ -996,13 +993,26 @@ namespace url { int port = 0; for (size_t i = 0; i < buffer.size(); i++) port = port * 10 + buffer[i] - '0'; - if (port >= 0 && port <= 0xffff) { - url->port = NormalizePort(url->scheme, port); - } else if (!has_state_override) { - url->flags |= URL_FLAGS_FAILED; + if (port < 0 || port > 0xffff) { + // TODO(TimothyGu): This hack is currently needed for the host + // setter since it needs access to hostname if it is valid, and + // if the FAILED flag is set the entire response to JS layer + // will be empty. + if (state_override == kHost) + url->port = -1; + else + url->flags |= URL_FLAGS_FAILED; return; } + url->port = NormalizePort(url->scheme, port); buffer.clear(); + } else if (has_state_override) { + // TODO(TimothyGu): Similar case as above. + if (state_override == kHost) + url->port = -1; + else + url->flags |= URL_FLAGS_TERMINATED; + return; } state = kPathStart; continue; @@ -1014,7 +1024,6 @@ namespace url { case kFile: base_is_file = ( has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme == "file:"); switch (ch) { case kEOL: @@ -1097,7 +1106,6 @@ namespace url { state = kFileHost; } else { if (has_base && - base->flags & URL_FLAGS_HAS_SCHEME && base->scheme == "file:" && base->flags & URL_FLAGS_HAS_PATH && base->path.size() > 0 && @@ -1158,8 +1166,7 @@ namespace url { url->path.push_back(""); } } else { - if (url->flags & URL_FLAGS_HAS_SCHEME && - url->scheme == "file:" && + if (url->scheme == "file:" && url->path.empty() && buffer.size() == 2 && WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { @@ -1233,8 +1240,7 @@ namespace url { const struct url_data* url) { Isolate* isolate = env->isolate(); argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); - if (url->flags & URL_FLAGS_HAS_SCHEME) - argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); + argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); if (url->flags & URL_FLAGS_HAS_USERNAME) argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); if (url->flags & URL_FLAGS_HAS_PASSWORD) @@ -1275,7 +1281,9 @@ namespace url { HarvestBase(env, &base, base_obj.As()); URL::Parse(input, len, state_override, &url, &base, has_base); - if (url.flags & URL_FLAGS_INVALID_PARSE_STATE) + if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || + ((state_override != kUnknownState) && + (url.flags & URL_FLAGS_TERMINATED))) return; // Define the return value placeholders diff --git a/src/node_url.h b/src/node_url.h index 4d18eb6f0a910d..5b5b65b7c27e87 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -451,13 +451,12 @@ static inline void PercentDecode(const char* input, XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ XX(URL_FLAGS_TERMINATED, 0x08) \ XX(URL_FLAGS_SPECIAL, 0x10) \ - XX(URL_FLAGS_HAS_SCHEME, 0x20) \ - XX(URL_FLAGS_HAS_USERNAME, 0x40) \ - XX(URL_FLAGS_HAS_PASSWORD, 0x80) \ - XX(URL_FLAGS_HAS_HOST, 0x100) \ - XX(URL_FLAGS_HAS_PATH, 0x200) \ - XX(URL_FLAGS_HAS_QUERY, 0x400) \ - XX(URL_FLAGS_HAS_FRAGMENT, 0x800) + XX(URL_FLAGS_HAS_USERNAME, 0x20) \ + XX(URL_FLAGS_HAS_PASSWORD, 0x40) \ + XX(URL_FLAGS_HAS_HOST, 0x80) \ + XX(URL_FLAGS_HAS_PATH, 0x100) \ + XX(URL_FLAGS_HAS_QUERY, 0x200) \ + XX(URL_FLAGS_HAS_FRAGMENT, 0x400) #define ARGS(XX) \ XX(ARG_FLAGS) \ From d912e28370ead511f3121376ca3c1b3b8c50de0a Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Mon, 3 Apr 2017 17:44:43 +0900 Subject: [PATCH 12/21] url: change path parsing for non-special URLs This changes to the way path parsing for non-special URLs. It allows paths to be empty for non-special URLs and also takes that into account when serializing. PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/11962 Refs: https://github.com/whatwg/url/pull/213 Reviewed-By: James M Snell --- src/node_url.cc | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 54a2944588071c..f9965d537b9abf 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -862,8 +862,10 @@ namespace url { } break; case kRelativeSlash: - if (ch == '/' || special_back_slash) { + if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) { state = kSpecialAuthorityIgnoreSlashes; + } else if (ch == '/') { + state = kAuthority; } else { if (base->flags & URL_FLAGS_HAS_USERNAME) { url->flags |= URL_FLAGS_HAS_USERNAME; @@ -1145,9 +1147,25 @@ namespace url { } break; case kPathStart: - state = kPath; - if (ch != '/' && !special_back_slash) - continue; + if (IsSpecial(url->scheme)) { + state = kPath; + if (ch != '/' && ch != '\\') { + continue; + } + } else if (!has_state_override && ch == '?') { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query.clear(); + state = kQuery; + } else if (!has_state_override && ch == '#') { + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment.clear(); + state = kFragment; + } else if (ch != kEOL) { + state = kPath; + if (ch != '/') { + continue; + } + } break; case kPath: if (ch == kEOL || @@ -1165,7 +1183,7 @@ namespace url { url->flags |= URL_FLAGS_HAS_PATH; url->path.push_back(""); } - } else { + } else if (!IsSingleDotSegment(buffer)) { if (url->scheme == "file:" && url->path.empty() && buffer.size() == 2 && From dceb12e1b1a431ad6c93b113b51190149e2eb1d9 Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Mon, 3 Apr 2017 17:47:46 +0900 Subject: [PATCH 13/21] test: synchronize WPT url test data PR-URL: https://github.com/nodejs/node/pull/12507 Refs: https://github.com/w3c/web-platform-tests/pull/4586 Refs: https://github.com/nodejs/node/pull/11887 Reviewed-By: James M Snell --- test/fixtures/url-setter-tests.js | 59 ++++++++++++++++----- test/fixtures/url-tests.js | 86 +++++++++++++++---------------- 2 files changed, 90 insertions(+), 55 deletions(-) diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index 9b39d0bed67bfc..8c15a3cc5ac885 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -34,7 +34,7 @@ module.exports = "href": "a://example.net", "new_value": "", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -42,7 +42,7 @@ module.exports = "href": "a://example.net", "new_value": "b", "expected": { - "href": "b://example.net/", + "href": "b://example.net", "protocol": "b:" } }, @@ -59,7 +59,7 @@ module.exports = "href": "a://example.net", "new_value": "B", "expected": { - "href": "b://example.net/", + "href": "b://example.net", "protocol": "b:" } }, @@ -68,7 +68,7 @@ module.exports = "href": "a://example.net", "new_value": "é", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -77,7 +77,7 @@ module.exports = "href": "a://example.net", "new_value": "0b", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -86,7 +86,7 @@ module.exports = "href": "a://example.net", "new_value": "+b", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -94,7 +94,7 @@ module.exports = "href": "a://example.net", "new_value": "bC0+-.", "expected": { - "href": "bc0+-.://example.net/", + "href": "bc0+-.://example.net", "protocol": "bc0+-.:" } }, @@ -103,7 +103,7 @@ module.exports = "href": "a://example.net", "new_value": "b,c", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -112,7 +112,7 @@ module.exports = "href": "a://example.net", "new_value": "bé", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -213,7 +213,7 @@ module.exports = "href": "ssh://me@example.net", "new_value": "http", "expected": { - "href": "ssh://me@example.net/", + "href": "ssh://me@example.net", "protocol": "ssh:" } }, @@ -221,7 +221,7 @@ module.exports = "href": "ssh://me@example.net", "new_value": "gopher", "expected": { - "href": "ssh://me@example.net/", + "href": "ssh://me@example.net", "protocol": "ssh:" } }, @@ -229,7 +229,15 @@ module.exports = "href": "ssh://me@example.net", "new_value": "file", "expected": { - "href": "ssh://me@example.net/", + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://example.net", + "new_value": "file", + "expected": { + "href": "ssh://example.net", "protocol": "ssh:" } }, @@ -1585,6 +1593,33 @@ module.exports = "href": "http://example.net/%3F", "pathname": "/%3F" } + }, + { + "comment": "# needs to be encoded", + "href": "http://example.net", + "new_value": "#", + "expected": { + "href": "http://example.net/%23", + "pathname": "/%23" + } + }, + { + "comment": "? needs to be encoded, non-special scheme", + "href": "sc://example.net", + "new_value": "?", + "expected": { + "href": "sc://example.net/%3F", + "pathname": "/%3F" + } + }, + { + "comment": "# needs to be encoded, non-special scheme", + "href": "sc://example.net", + "new_value": "#", + "expected": { + "href": "sc://example.net/%23", + "pathname": "/%23" + } } ], "search": [ diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index a4e7de9f26b199..c7e63f50331c3b 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -571,21 +571,21 @@ module.exports = "search": "", "hash": "" }, - // { - // "input": "foo://", - // "base": "http://example.org/foo/bar", - // "href": "foo://", - // "origin": "null", - // "protocol": "foo:", - // "username": "", - // "password": "", - // "host": "", - // "hostname": "", - // "port": "", - // "pathname": "", - // "search": "", - // "hash": "" - // }, + { + "input": "foo://", + "base": "http://example.org/foo/bar", + "href": "foo://", + "origin": "null", + "protocol": "foo:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, { "input": "http://a:b@c:29/d", "base": "http://example.org/foo/bar", @@ -5338,34 +5338,34 @@ module.exports = "search": "", "hash": "" }, - // { - // "input": "////", - // "base": "sc://x/", - // "href": "sc:////", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "", - // "hostname": "", - // "port": "", - // "pathname": "//", - // "search": "", - // "hash": "" - // }, - // { - // "input": "////x/", - // "base": "sc://x/", - // "href": "sc:////x/", - // "protocol": "sc:", - // "username": "", - // "password": "", - // "host": "", - // "hostname": "", - // "port": "", - // "pathname": "//x/", - // "search": "", - // "hash": "" - // }, + { + "input": "////", + "base": "sc://x/", + "href": "sc:////", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "////x/", + "base": "sc://x/", + "href": "sc:////x/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//x/", + "search": "", + "hash": "" + }, { "input": "tftp://foobar.com/someconfig;mode=netascii", "base": "about:blank", From 43faf56f7b35e3872d1a0fb047f1405996065933 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Mon, 20 Mar 2017 14:29:54 -0700 Subject: [PATCH 14/21] url: error when domainTo*() is called w/o argument PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 6 ++++++ test/parallel/test-whatwg-url-domainto.js | 9 +++++++++ 2 files changed, 15 insertions(+) diff --git a/lib/internal/url.js b/lib/internal/url.js index 5fcabb803ef473..7fafc783dba4ae 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1308,11 +1308,17 @@ function originFor(url, base) { } function domainToASCII(domain) { + if (arguments.length < 1) + throw new TypeError('"domain" argument must be specified'); + // toUSVString is not needed. return binding.domainToASCII(`${domain}`); } function domainToUnicode(domain) { + if (arguments.length < 1) + throw new TypeError('"domain" argument must be specified'); + // toUSVString is not needed. return binding.domainToUnicode(`${domain}`); } diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js index f891f95a19cd3b..70b32c8dce279c 100644 --- a/test/parallel/test-whatwg-url-domainto.js +++ b/test/parallel/test-whatwg-url-domainto.js @@ -12,6 +12,15 @@ const { domainToASCII, domainToUnicode } = require('url'); // Tests below are not from WPT. const tests = require('../fixtures/url-idna.js'); +{ + assert.throws(() => domainToASCII(), + /^TypeError: "domain" argument must be specified$/); + assert.throws(() => domainToUnicode(), + /^TypeError: "domain" argument must be specified$/); + assert.strictEqual(domainToASCII(undefined), 'undefined'); + assert.strictEqual(domainToUnicode(undefined), 'undefined'); +} + { for (const [i, { ascii, unicode }] of tests.valid.entries()) { assert.strictEqual(ascii, domainToASCII(unicode), From dafa6008d143ef3d049e34c36f5c148dc2dce9c7 Mon Sep 17 00:00:00 2001 From: Brian White Date: Sun, 5 Mar 2017 05:29:35 -0500 Subject: [PATCH 15/21] url: avoid instanceof for WHATWG URL PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- benchmark/url/url-searchparams-read.js | 2 +- benchmark/url/whatwg-url-properties.js | 2 +- lib/internal/url.js | 36 +++++++++++++++----------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/benchmark/url/url-searchparams-read.js b/benchmark/url/url-searchparams-read.js index 94ddaf1cfa4072..762ffcca03d69d 100644 --- a/benchmark/url/url-searchparams-read.js +++ b/benchmark/url/url-searchparams-read.js @@ -5,7 +5,7 @@ const { URLSearchParams } = require('url'); const bench = common.createBenchmark(main, { method: ['get', 'getAll', 'has'], param: ['one', 'two', 'three', 'nonexistent'], - n: [1e6] + n: [2e7] }); const str = 'one=single&two=first&three=first&two=2nd&three=2nd&three=3rd'; diff --git a/benchmark/url/whatwg-url-properties.js b/benchmark/url/whatwg-url-properties.js index 9bdc9778a8c922..3a865d2335ab3c 100644 --- a/benchmark/url/whatwg-url-properties.js +++ b/benchmark/url/whatwg-url-properties.js @@ -8,7 +8,7 @@ const bench = common.createBenchmark(main, { prop: ['href', 'origin', 'protocol', 'username', 'password', 'host', 'hostname', 'port', 'pathname', 'search', 'searchParams', 'hash'], - n: [1e4] + n: [3e5] }); function setAndGet(n, url, prop, alternative) { diff --git a/lib/internal/url.js b/lib/internal/url.js index 7fafc783dba4ae..9a70838c30d4a1 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -239,8 +239,10 @@ class URL { constructor(input, base) { // toUSVString is not needed. input = `${input}`; - if (base !== undefined && !(base instanceof URL)) + if (base !== undefined && + (!base[searchParams] || !base[searchParams][searchParams])) { base = new URL(base); + } parse(this, input, base); } @@ -885,7 +887,7 @@ class URLSearchParams { } [util.inspect.custom](recurseTimes, ctx) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -947,7 +949,7 @@ function merge(out, start, mid, end, lBuffer, rBuffer) { defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { append(name, value) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 2) { @@ -961,7 +963,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, delete(name) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 1) { @@ -982,7 +984,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, get(name) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 1) { @@ -1000,7 +1002,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, getAll(name) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 1) { @@ -1019,7 +1021,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, has(name) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 1) { @@ -1037,7 +1039,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, set(name, value) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (arguments.length < 2) { @@ -1125,7 +1127,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { // Define entries here rather than [Symbol.iterator] as the function name // must be set to `entries`. entries() { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -1133,7 +1135,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, forEach(callback, thisArg = undefined) { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } if (typeof callback !== 'function') { @@ -1155,7 +1157,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { // https://heycam.github.io/webidl/#es-iterable keys() { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -1163,7 +1165,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { }, values() { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -1173,7 +1175,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { // https://heycam.github.io/webidl/#es-stringifier // https://url.spec.whatwg.org/#urlsearchparams-stringification-behavior toString() { - if (!this || !(this instanceof URLSearchParams)) { + if (!this || !this[searchParams] || this[searchParams][searchParams]) { throw new TypeError('Value of `this` is not a URLSearchParams'); } @@ -1275,8 +1277,10 @@ defineIDLClass(URLSearchParamsIteratorPrototype, 'URLSearchParamsIterator', { }); function originFor(url, base) { - if (!(url instanceof URL)) + if (url != undefined && + (!url[searchParams] || !url[searchParams][searchParams])) { url = new URL(url, base); + } var origin; const protocol = url.protocol; switch (protocol) { @@ -1399,8 +1403,10 @@ function getPathFromURLPosix(url) { } function getPathFromURL(path) { - if (!(path instanceof URL)) + if (path == undefined || !path[searchParams] || + !path[searchParams][searchParams]) { return path; + } if (path.protocol !== 'file:') return new TypeError('Only `file:` URLs are supported'); return isWindows ? getPathFromURLWin32(path) : getPathFromURLPosix(path); From 68cf850bc43901ba4cfa82cb1fff87cd1e4c34bf Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Mon, 10 Apr 2017 18:09:06 +0200 Subject: [PATCH 16/21] url: trim leading slashes of file URL paths It should trim the slashes after the colon into three for file URL. PR-URL: https://github.com/nodejs/node/pull/12507 Refs: https://github.com/w3c/web-platform-tests/pull/5195 Fixes: https://github.com/nodejs/node/issues/11188 Reviewed-By: James M Snell --- src/node_url.cc | 22 +- test/fixtures/url-setter-tests.js | 29 ++- test/fixtures/url-tests.js | 352 +++++++++++++++++++++++++++++- 3 files changed, 393 insertions(+), 10 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index f9965d537b9abf..16a4cdd45b54b4 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -1108,12 +1108,14 @@ namespace url { state = kFileHost; } else { if (has_base && - base->scheme == "file:" && - base->flags & URL_FLAGS_HAS_PATH && - base->path.size() > 0 && - NORMALIZED_WINDOWS_DRIVE_LETTER(base->path[0])) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(base->path[0]); + base->scheme == "file:") { + if (NORMALIZED_WINDOWS_DRIVE_LETTER(base->path[0])) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(base->path[0]); + } else { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } } state = kPath; continue; @@ -1196,6 +1198,14 @@ namespace url { url->path.push_back(segment); } buffer.clear(); + if (url->scheme == "file:" && + (ch == kEOL || + ch == '?' || + ch == '#')) { + while (url->path.size() > 1 && url->path[0].length() == 0) { + url->path.erase(url->path.begin()); + } + } if (ch == '?') { url->flags |= URL_FLAGS_HAS_QUERY; state = kQuery; diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index 8c15a3cc5ac885..d0688611a01c0f 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -1,7 +1,7 @@ 'use strict'; /* WPT Refs: - https://github.com/w3c/web-platform-tests/blob/e48dd15/url/setters_tests.json + https://github.com/w3c/web-platform-tests/blob/3eff1bd/url/setters_tests.json License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ module.exports = @@ -1620,6 +1620,33 @@ module.exports = "href": "sc://example.net/%23", "pathname": "/%23" } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file://monkey/", + "new_value": "\\\\", + "expected": { + "href": "file://monkey/", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//\\/", + "expected": { + "href": "file:///", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//monkey/..//", + "expected": { + "href": "file:///", + "pathname": "/" + } } ], "search": [ diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index c7e63f50331c3b..3858f12db55e0d 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -1,7 +1,7 @@ 'use strict'; /* WPT Refs: - https://github.com/w3c/web-platform-tests/blob/b207902/url/urltestdata.json + https://github.com/w3c/web-platform-tests/blob/3eff1bd/url/urltestdata.json License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ module.exports = @@ -281,6 +281,11 @@ module.exports = "base": "http://example.org/foo/bar", "failure": true }, + { + "input": "non-special://f:999999/c", + "base": "http://example.org/foo/bar", + "failure": true + }, { "input": "http://f: 21 / b ? d # e ", "base": "http://example.org/foo/bar", @@ -3669,6 +3674,35 @@ module.exports = "search": "", "hash": "" }, + // { + // "input": "https://faß.ExAmPlE/", + // "base": "about:blank", + // "href": "https://xn--fa-hia.example/", + // "origin": "https://faß.example", + // "protocol": "https:", + // "username": "", + // "password": "", + // "host": "xn--fa-hia.example", + // "hostname": "xn--fa-hia.example", + // "port": "", + // "pathname": "/", + // "search": "", + // "hash": "" + // }, + // { + // "input": "sc://faß.ExAmPlE/", + // "base": "about:blank", + // "href": "sc://fa%C3%9F.ExAmPlE/", + // "protocol": "sc:", + // "username": "", + // "password": "", + // "host": "fa%C3%9F.ExAmPlE", + // "hostname": "fa%C3%9F.ExAmPlE", + // "port": "", + // "pathname": "/", + // "search": "", + // "hash": "" + // }, "Invalid escaped characters should fail and the percents should be escaped. https://www.w3.org/Bugs/Public/show_bug.cgi?id=24191", { "input": "http://%zz%66%a.com", @@ -5110,6 +5144,318 @@ module.exports = "search": "?test", "hash": "#x" }, + "# File URLs and many (back)slashes", + { + "input": "file:\\\\//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\?fox", + "base": "about:blank", + "href": "file:///?fox", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "?fox", + "hash": "" + }, + { + "input": "file:\\\\\\\\#guppy", + "base": "about:blank", + "href": "file:///#guppy", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "#guppy" + }, + { + "input": "file://spider///", + "base": "about:blank", + "href": "file://spider/", + "protocol": "file:", + "username": "", + "password": "", + "host": "spider", + "hostname": "spider", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\localhost//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:///localhost//cat", + "base": "about:blank", + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://\\/localhost//cat", + "base": "about:blank", + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://localhost//a//../..//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/////mouse", + "base": "file:///elephant", + "href": "file:///mouse", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/mouse", + "search": "", + "hash": "" + }, + { + "input": "\\//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "\\/localhost//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "//localhost//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + // { + // "input": "/..//localhost//pig", + // "base": "file://lion/", + // "href": "file://lion/localhost//pig", + // "protocol": "file:", + // "username": "", + // "password": "", + // "host": "lion", + // "hostname": "lion", + // "port": "", + // "pathname": "/localhost//pig", + // "search": "", + // "hash": "" + // }, + { + "input": "file://", + "base": "file://ape/", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + "# File URLs with non-empty hosts", + // { + // "input": "/rooibos", + // "base": "file://tea/", + // "href": "file://tea/rooibos", + // "protocol": "file:", + // "username": "", + // "password": "", + // "host": "tea", + // "hostname": "tea", + // "port": "", + // "pathname": "/rooibos", + // "search": "", + // "hash": "" + // }, + // { + // "input": "/?chai", + // "base": "file://tea/", + // "href": "file://tea/?chai", + // "protocol": "file:", + // "username": "", + // "password": "", + // "host": "tea", + // "hostname": "tea", + // "port": "", + // "pathname": "/", + // "search": "?chai", + // "hash": "" + // }, + "# Windows drive letter quirk with not empty host", + { + "input": "file://example.net/C:/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://1.2.3.4/C:/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://[1::8]/C:/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + "# Windows drive letter quirk (no host)", + { + "input": "file:/C|/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, + { + "input": "file://C|/", + "base": "about:blank", + "href": "file:///C:/", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:/", + "search": "", + "hash": "" + }, "# file URLs without base URL by Rimas Misevičius", { "input": "file:", @@ -5213,12 +5559,12 @@ module.exports = { "input": "http://?", "base": "about:blank", - "failure": "true" + "failure": true }, { "input": "http://#", "base": "about:blank", - "failure": "true" + "failure": true }, "# Non-special-URL path tests", // { From 752097c277959c280909f71e89e4b34bfb183e64 Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Wed, 12 Apr 2017 20:43:22 +0200 Subject: [PATCH 17/21] url: remove javascript URL special case PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/11485 Reviewed-By: James M Snell --- lib/internal/url.js | 2 -- test/fixtures/url-setter-tests.js | 16 ++++++++-------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 9a70838c30d4a1..74d8de63d90310 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -556,8 +556,6 @@ Object.defineProperties(URL.prototype, { const ctx = this[context]; // toUSVString is not needed. hash = `${hash}`; - if (this.protocol === 'javascript:') - return; if (!hash) { ctx.fragment = null; ctx.flags &= ~binding.URL_FLAGS_HAS_FRAGMENT; diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index d0688611a01c0f..4c39cb7311791b 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -1800,13 +1800,13 @@ module.exports = "hash": "#%c3%89t%C3%A9" } }, - // { - // "href": "javascript:alert(1)", - // "new_value": "castle", - // "expected": { - // "href": "javascript:alert(1)#castle", - // "hash": "#castle" - // } - // } + { + "href": "javascript:alert(1)", + "new_value": "castle", + "expected": { + "href": "javascript:alert(1)#castle", + "hash": "#castle" + } + } ] } From f484cfdf29318a27e317b48d50e67d978b5d7214 Mon Sep 17 00:00:00 2001 From: Daijiro Wachi Date: Fri, 14 Apr 2017 18:12:16 +0200 Subject: [PATCH 18/21] url: disallow invalid IPv4 in IPv6 parser PR-URL: https://github.com/nodejs/node/pull/12507 Fixes: https://github.com/nodejs/node/issues/10655 Reviewed-By: James M Snell --- src/node_url.cc | 26 +++--- test/fixtures/url-setter-tests.js | 144 +++++++++++++++--------------- test/fixtures/url-tests.js | 40 ++++----- 3 files changed, 106 insertions(+), 104 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 16a4cdd45b54b4..39f56ece679005 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -110,7 +110,7 @@ namespace url { uint16_t* compress_pointer = nullptr; const char* pointer = input; const char* end = pointer + length; - unsigned value, len, swaps, dots; + unsigned value, len, swaps, numbers_seen; char ch = pointer < end ? pointer[0] : kEOL; if (ch == ':') { if (length < 2 || pointer[1] != ':') @@ -148,9 +148,17 @@ namespace url { ch = pointer < end ? pointer[0] : kEOL; if (piece_pointer > last_piece - 2) goto end; - dots = 0; + numbers_seen = 0; while (ch != kEOL) { value = 0xffffffff; + if (numbers_seen > 0) { + if (ch == '.' && numbers_seen < 4) { + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + } else { + goto end; + } + } if (!ASCII_DIGIT(ch)) goto end; while (ASCII_DIGIT(ch)) { @@ -167,19 +175,13 @@ namespace url { pointer++; ch = pointer < end ? pointer[0] : kEOL; } - if (dots < 3 && ch != '.') - goto end; *piece_pointer = *piece_pointer * 0x100 + value; - if (dots & 0x1) + numbers_seen++; + if (numbers_seen == 2 || numbers_seen == 4) piece_pointer++; - if (ch != kEOL) { - pointer++; - ch = pointer < end ? pointer[0] : kEOL; - } - if (dots == 3 && ch != kEOL) - goto end; - dots++; } + if (numbers_seen != 4) + goto end; continue; case ':': pointer++; diff --git a/test/fixtures/url-setter-tests.js b/test/fixtures/url-setter-tests.js index 4c39cb7311791b..f537075674b77a 100644 --- a/test/fixtures/url-setter-tests.js +++ b/test/fixtures/url-setter-tests.js @@ -880,42 +880,42 @@ module.exports = "hostname": "example.net" } }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.3.4x]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.3.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.4x]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, // { // "href": "file://y/", // "new_value": "x:123", @@ -1214,42 +1214,42 @@ module.exports = "hostname": "example.net" } }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.3.4x]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.3.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.2.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, - // { - // "href": "http://example.net/", - // "new_value": "[::1.]", - // "expected": { - // "href": "http://example.net/", - // "host": "example.net", - // "hostname": "example.net" - // } - // }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.4x]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.3.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.2.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, + { + "href": "http://example.net/", + "new_value": "[::1.]", + "expected": { + "href": "http://example.net/", + "host": "example.net", + "hostname": "example.net" + } + }, // { // "href": "file://y/", // "new_value": "x:123", diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index 3858f12db55e0d..d44a36bcfe7e13 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -3800,26 +3800,26 @@ module.exports = "base": "http://other.com/", "failure": true }, - // { - // "input": "http://[::1.2.3.4x]", - // "base": "http://other.com/", - // "failure": true - // }, - // { - // "input": "http://[::1.2.3.]", - // "base": "http://other.com/", - // "failure": true - // }, - // { - // "input": "http://[::1.2.]", - // "base": "http://other.com/", - // "failure": true - // }, - // { - // "input": "http://[::1.]", - // "base": "http://other.com/", - // "failure": true - // }, + { + "input": "http://[::1.2.3.4x]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.3.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.2.]", + "base": "http://other.com/", + "failure": true + }, + { + "input": "http://[::1.]", + "base": "http://other.com/", + "failure": true + }, "Misc Unicode", { "input": "http://foo:💩@example.com/bar", From 9288b735d8a0c15c240123669b6c6a15b01d7d5d Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 4 Apr 2017 19:13:37 -0700 Subject: [PATCH 19/21] url: clean up WHATWG URL origin generation - Use ordinary properties instead of symbols/getter redirection for internal object - Use template string literals - Remove unneeded custom inspection for internal objects - Remove unneeded OpaqueOrigin class - Remove unneeded type checks PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 124 +++++++++++--------------------------------- 1 file changed, 29 insertions(+), 95 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 74d8de63d90310..629bbb2ecedd02 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -15,10 +15,6 @@ const os = require('os'); const isWindows = process.platform === 'win32'; -const kScheme = Symbol('scheme'); -const kHost = Symbol('host'); -const kPort = Symbol('port'); -const kDomain = Symbol('domain'); const kFormat = Symbol('format'); // https://tc39.github.io/ecma262/#sec-%iteratorprototype%-object @@ -38,62 +34,15 @@ function toUSVString(val) { return binding.toUSVString(str, match.index); } -class OpaqueOrigin { - toString() { - return 'null'; - } +// Refs: https://html.spec.whatwg.org/multipage/browsers.html#concept-origin-opaque +const kOpaqueOrigin = 'null'; - get effectiveDomain() { - return this; - } -} - -class TupleOrigin { - constructor(scheme, host, port, domain) { - this[kScheme] = scheme; - this[kHost] = host; - this[kPort] = port; - this[kDomain] = domain; - } - - get scheme() { - return this[kScheme]; - } - - get host() { - return this[kHost]; - } - - get port() { - return this[kPort]; - } - - get domain() { - return this[kDomain]; - } - - get effectiveDomain() { - return this[kDomain] || this[kHost]; - } - - // https://url.spec.whatwg.org/#dom-url-origin - toString(unicode = true) { - var result = this[kScheme]; - result += '://'; - result += unicode ? domainToUnicode(this[kHost]) : this[kHost]; - if (this[kPort] !== undefined && this[kPort] !== null) - result += `:${this[kPort]}`; - return result; - } - - [util.inspect.custom]() { - return `TupleOrigin { - scheme: ${this[kScheme]}, - host: ${this[kHost]}, - port: ${this[kPort]}, - domain: ${this[kDomain]} - }`; - } +// Refs: +// - https://html.spec.whatwg.org/multipage/browsers.html#unicode-serialisation-of-an-origin +// - https://html.spec.whatwg.org/multipage/browsers.html#ascii-serialisation-of-an-origin +function serializeTupleOrigin(scheme, host, port, unicode = true) { + const unicodeHost = unicode ? domainToUnicode(host) : host; + return `${scheme}//${unicodeHost}${port == null ? '' : `:${port}`}`; } // This class provides the internal state of a URL object. An instance of this @@ -359,7 +308,27 @@ Object.defineProperties(URL.prototype, { enumerable: true, configurable: true, get() { - return originFor(this).toString(); + // Refs: https://url.spec.whatwg.org/#concept-url-origin + const ctx = this[context]; + switch (ctx.scheme) { + case 'blob:': + if (ctx.path.length > 0) { + try { + return (new URL(ctx.path[0])).origin; + } catch (err) { + // fall through... do nothing + } + } + return kOpaqueOrigin; + case 'ftp:': + case 'gopher:': + case 'http:': + case 'https:': + case 'ws:': + case 'wss:': + return serializeTupleOrigin(ctx.scheme, ctx.host, ctx.port); + } + return kOpaqueOrigin; } }, protocol: { @@ -1274,41 +1243,6 @@ defineIDLClass(URLSearchParamsIteratorPrototype, 'URLSearchParamsIterator', { } }); -function originFor(url, base) { - if (url != undefined && - (!url[searchParams] || !url[searchParams][searchParams])) { - url = new URL(url, base); - } - var origin; - const protocol = url.protocol; - switch (protocol) { - case 'blob:': - if (url[context].path && url[context].path.length > 0) { - try { - return (new URL(url[context].path[0])).origin; - } catch (err) { - // fall through... do nothing - } - } - origin = new OpaqueOrigin(); - break; - case 'ftp:': - case 'gopher:': - case 'http:': - case 'https:': - case 'ws:': - case 'wss:': - origin = new TupleOrigin(protocol.slice(0, -1), - url[context].host, - url[context].port, - null); - break; - default: - origin = new OpaqueOrigin(); - } - return origin; -} - function domainToASCII(domain) { if (arguments.length < 1) throw new TypeError('"domain" argument must be specified'); From 8f702ef13503cbd9ee7ffcdfca1dc25b4f81bcde Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Tue, 4 Apr 2017 21:03:14 -0700 Subject: [PATCH 20/21] url: improve WHATWG URL inspection PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- lib/internal/url.js | 78 +++++++++++----- test/parallel/test-whatwg-url-inspect.js | 114 ++++++++++------------- 2 files changed, 103 insertions(+), 89 deletions(-) diff --git a/lib/internal/url.js b/lib/internal/url.js index 629bbb2ecedd02..771a916d704bac 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -184,6 +184,17 @@ function onParseHashComplete(flags, protocol, username, password, } } +function getEligibleConstructor(obj) { + while (obj !== null) { + if (Object.prototype.hasOwnProperty.call(obj, 'constructor') && + typeof obj.constructor === 'function') { + return obj.constructor; + } + obj = Object.getPrototypeOf(obj); + } + return null; +} + class URL { constructor(input, base) { // toUSVString is not needed. @@ -204,33 +215,43 @@ class URL { } [util.inspect.custom](depth, opts) { + if (this == null || + Object.getPrototypeOf(this[context]) !== URLContext.prototype) { + throw new TypeError('Value of `this` is not a URL'); + } + const ctx = this[context]; - var ret = 'URL {\n'; - ret += ` href: ${this.href}\n`; - if (ctx.scheme !== undefined) - ret += ` protocol: ${this.protocol}\n`; - if (ctx.username !== undefined) - ret += ` username: ${this.username}\n`; - if (ctx.password !== undefined) { - const pwd = opts.showHidden ? ctx.password : '--------'; - ret += ` password: ${pwd}\n`; - } - if (ctx.host !== undefined) - ret += ` hostname: ${this.hostname}\n`; - if (ctx.port !== undefined) - ret += ` port: ${this.port}\n`; - if (ctx.path !== undefined) - ret += ` pathname: ${this.pathname}\n`; - if (ctx.query !== undefined) - ret += ` search: ${this.search}\n`; - if (ctx.fragment !== undefined) - ret += ` hash: ${this.hash}\n`; + + if (typeof depth === 'number' && depth < 0) + return opts.stylize('[Object]', 'special'); + + const ctor = getEligibleConstructor(this); + + const obj = Object.create({ + constructor: ctor === null ? URL : ctor + }); + + obj.href = this.href; + obj.origin = this.origin; + obj.protocol = this.protocol; + obj.username = this.username; + obj.password = (opts.showHidden || ctx.password == null) ? + this.password : '--------'; + obj.host = this.host; + obj.hostname = this.hostname; + obj.port = this.port; + obj.pathname = this.pathname; + obj.search = this.search; + obj.searchParams = this.searchParams; + obj.hash = this.hash; + if (opts.showHidden) { - ret += ` cannot-be-base: ${this[cannotBeBase]}\n`; - ret += ` special: ${this[special]}\n`; + obj.cannotBeBase = this[cannotBeBase]; + obj.special = this[special]; + obj[context] = this[context]; } - ret += '}'; - return ret; + + return util.inspect(obj, opts); } } @@ -858,6 +879,9 @@ class URLSearchParams { throw new TypeError('Value of `this` is not a URLSearchParams'); } + if (typeof recurseTimes === 'number' && recurseTimes < 0) + return ctx.stylize('[Object]', 'special'); + const separator = ', '; const innerOpts = Object.assign({}, ctx); if (recurseTimes !== null) { @@ -1211,6 +1235,12 @@ defineIDLClass(URLSearchParamsIteratorPrototype, 'URLSearchParamsIterator', { }; }, [util.inspect.custom](recurseTimes, ctx) { + if (this == null || this[context] == null || this[context].target == null) + throw new TypeError('Value of `this` is not a URLSearchParamsIterator'); + + if (typeof recurseTimes === 'number' && recurseTimes < 0) + return ctx.stylize('[Object]', 'special'); + const innerOpts = Object.assign({}, ctx); if (recurseTimes !== null) { innerOpts.depth = recurseTimes - 1; diff --git a/test/parallel/test-whatwg-url-inspect.js b/test/parallel/test-whatwg-url-inspect.js index 4afbbc13102905..a8a59b77873f12 100644 --- a/test/parallel/test-whatwg-url-inspect.js +++ b/test/parallel/test-whatwg-url-inspect.js @@ -3,7 +3,6 @@ const common = require('../common'); const util = require('util'); const URL = require('url').URL; -const path = require('path'); const assert = require('assert'); if (!common.hasIntl) { @@ -13,71 +12,56 @@ if (!common.hasIntl) { } // Tests below are not from WPT. -const tests = require(path.join(common.fixturesDir, 'url-tests')); -const additional_tests = require( - path.join(common.fixturesDir, 'url-tests-additional')); +const url = new URL('https://username:password@host.name:8080/path/name/?que=ry#hash'); -const allTests = additional_tests.slice(); -for (const test of tests) { - if (test.failure || typeof test === 'string') continue; - allTests.push(test); -} - -for (const test of allTests) { - const url = test.url ? new URL(test.url) : new URL(test.input, test.base); - - for (const showHidden of [true, false]) { - const res = util.inspect(url, { - showHidden - }); - - const lines = res.split('\n'); +assert.strictEqual( + util.inspect(url), + `URL { + href: 'https://username:password@host.name:8080/path/name/?que=ry#hash', + origin: 'https://host.name:8080', + protocol: 'https:', + username: 'username', + password: '--------', + host: 'host.name:8080', + hostname: 'host.name', + port: '8080', + pathname: '/path/name/', + search: '?que=ry', + searchParams: URLSearchParams { 'que' => 'ry' }, + hash: '#hash' }`); - const firstLine = lines[0]; - assert.strictEqual(firstLine, 'URL {'); +assert.strictEqual( + util.inspect(url, { showHidden: true }), + `URL { + href: 'https://username:password@host.name:8080/path/name/?que=ry#hash', + origin: 'https://host.name:8080', + protocol: 'https:', + username: 'username', + password: 'password', + host: 'host.name:8080', + hostname: 'host.name', + port: '8080', + pathname: '/path/name/', + search: '?que=ry', + searchParams: URLSearchParams { 'que' => 'ry' }, + hash: '#hash', + cannotBeBase: false, + special: true, + [Symbol(context)]:\x20 + URLContext { + flags: 2032, + scheme: 'https:', + username: 'username', + password: 'password', + host: 'host.name', + port: 8080, + path: [ 'path', 'name', '', [length]: 3 ], + query: 'que=ry', + fragment: 'hash' } }`); - const lastLine = lines[lines.length - 1]; - assert.strictEqual(lastLine, '}'); +assert.strictEqual( + util.inspect({ a: url }, { depth: 0 }), + '{ a: [Object] }'); - const innerLines = lines.slice(1, lines.length - 1); - const keys = new Set(); - for (const line of innerLines) { - const i = line.indexOf(': '); - const k = line.slice(0, i).trim(); - const v = line.slice(i + 2); - assert.strictEqual(keys.has(k), false, 'duplicate key found: ' + k); - keys.add(k); - - const hidden = new Set([ - 'password', - 'cannot-be-base', - 'special' - ]); - if (showHidden) { - if (!hidden.has(k)) { - assert.strictEqual(v, url[k], k); - continue; - } - - if (k === 'password') { - assert.strictEqual(v, url[k], k); - } - if (k === 'cannot-be-base') { - assert.ok(v.match(/^true$|^false$/), k + ' is Boolean'); - } - if (k === 'special') { - assert.ok(v.match(/^true$|^false$/), k + ' is Boolean'); - } - continue; - } - - // showHidden is false - if (k === 'password') { - assert.strictEqual(v, '--------', k); - continue; - } - assert.strictEqual(hidden.has(k), false, 'no hidden keys: ' + k); - assert.strictEqual(v, url[k], k); - } - } -} +class MyURL extends URL {} +assert(util.inspect(new MyURL(url.href)).startsWith('MyURL {')); From 473bd5e64783a93b7598c43024c71137743c6ee7 Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Wed, 5 Apr 2017 21:22:53 -0700 Subject: [PATCH 21/21] src: clean up WHATWG WG parser * reduce indentation * refactor URL inlined methods * prefer templates over macros * do not export ARG_* flags in url binding PR-URL: https://github.com/nodejs/node/pull/12507 Reviewed-By: James M Snell --- src/node_url.cc | 3113 +++++++++++++++++++++++++++-------------------- src/node_url.h | 467 ------- 2 files changed, 1797 insertions(+), 1783 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index 39f56ece679005..7df9461fdd25b1 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -15,8 +15,6 @@ #include #include -#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD - namespace node { using v8::Array; @@ -55,709 +53,1142 @@ using v8::Value; namespace url { -#if defined(NODE_HAVE_I18N_SUPPORT) - static inline bool ToUnicode(std::string* input, std::string* output) { - MaybeStackBuffer buf; - if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0) - return false; - output->assign(*buf, buf.length()); - return true; - } +// https://url.spec.whatwg.org/#eof-code-point +static const char kEOL = -1; + +// Used in ToUSVString(). +static const char16_t kUnicodeReplacementCharacter = 0xFFFD; + +union url_host_value { + std::string domain; + uint32_t ipv4; + uint16_t ipv6[8]; + ~url_host_value() {} +}; + +enum url_host_type { + HOST_TYPE_FAILED = -1, + HOST_TYPE_DOMAIN = 0, + HOST_TYPE_IPV4 = 1, + HOST_TYPE_IPV6 = 2 +}; + +struct url_host { + url_host_value value; + enum url_host_type type; +}; + +#define ARGS(XX) \ + XX(ARG_FLAGS) \ + XX(ARG_PROTOCOL) \ + XX(ARG_USERNAME) \ + XX(ARG_PASSWORD) \ + XX(ARG_HOST) \ + XX(ARG_PORT) \ + XX(ARG_PATH) \ + XX(ARG_QUERY) \ + XX(ARG_FRAGMENT) + +#define ERR_ARGS(XX) \ + XX(ERR_ARG_FLAGS) \ + XX(ERR_ARG_INPUT) \ + +enum url_cb_args { +#define XX(name) name, + ARGS(XX) +#undef XX +}; - static inline bool ToASCII(std::string* input, std::string* output) { - MaybeStackBuffer buf; - if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0) - return false; - output->assign(*buf, buf.length()); - return true; - } -#else - // Intentional non-ops if ICU is not present. - static inline bool ToUnicode(std::string* input, std::string* output) { - *output = *input; - return true; +enum url_error_cb_args { +#define XX(name) name, + ERR_ARGS(XX) +#undef XX +}; + +#define CHAR_TEST(bits, name, expr) \ + template \ + static inline bool name(const T ch) { \ + static_assert(sizeof(ch) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return (expr); \ } - static inline bool ToASCII(std::string* input, std::string* output) { - *output = *input; - return true; +#define TWO_CHAR_STRING_TEST(bits, name, expr) \ + template \ + static inline bool name(const T ch1, const T ch2) { \ + static_assert(sizeof(ch1) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return (expr); \ + } \ + template \ + static inline bool name(const std::basic_string& str) { \ + static_assert(sizeof(str[0]) >= (bits) / 8, \ + "Character must be wider than " #bits " bits"); \ + return str.length() >= 2 && name(str[0], str[1]); \ } -#endif - // If a UTF-16 character is a low/trailing surrogate. - static inline bool IsUnicodeTrail(uint16_t c) { - return (c & 0xFC00) == 0xDC00; +// https://infra.spec.whatwg.org/#ascii-tab-or-newline +CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) + +// https://infra.spec.whatwg.org/#ascii-digit +CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) + +// https://infra.spec.whatwg.org/#ascii-hex-digit +CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || + (ch >= 'A' && ch <= 'F') || + (ch >= 'a' && ch <= 'f'))) + +// https://infra.spec.whatwg.org/#ascii-alpha +CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') || + (ch >= 'a' && ch <= 'z'))) + +// https://infra.spec.whatwg.org/#ascii-alphanumeric +CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) + +// https://infra.spec.whatwg.org/#ascii-lowercase +template +static inline T ASCIILowercase(T ch) { + return IsASCIIAlpha(ch) ? (ch | 0x20) : ch; +} + +// https://url.spec.whatwg.org/#windows-drive-letter +TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, + (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|'))) + +// https://url.spec.whatwg.org/#normalized-windows-drive-letter +TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter, + (IsASCIIAlpha(ch1) && ch2 == ':')) + +// If a UTF-16 character is a low/trailing surrogate. +CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00) + +// If a UTF-16 character is a surrogate. +CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800) + +// If a UTF-16 surrogate is a low/trailing one. +CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0) + +#undef CHAR_TEST +#undef TWO_CHAR_STRING_TEST + +static const char* hex[256] = { + "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", + "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", + "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", + "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", + "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", + "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", + "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37", + "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", + "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47", + "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F", + "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", + "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F", + "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67", + "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F", + "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77", + "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F", + "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", + "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", + "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", + "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", + "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", + "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", + "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", + "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", + "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", + "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", + "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", + "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", + "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", + "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", + "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", + "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" +}; + +static const uint8_t SIMPLE_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t DEFAULT_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t USERINFO_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 40 41 42 43 44 45 46 47 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static const uint8_t QUERY_ENCODE_SET[32] = { + // 00 01 02 03 04 05 06 07 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 08 09 0A 0B 0C 0D 0E 0F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 10 11 12 13 14 15 16 17 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 18 19 1A 1B 1C 1D 1E 1F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 20 21 22 23 24 25 26 27 + 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, + // 28 29 2A 2B 2C 2D 2E 2F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 30 31 32 33 34 35 36 37 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 38 39 3A 3B 3C 3D 3E 3F + 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, + // 40 41 42 43 44 45 46 47 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 48 49 4A 4B 4C 4D 4E 4F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 50 51 52 53 54 55 56 57 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 58 59 5A 5B 5C 5D 5E 5F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 60 61 62 63 64 65 66 67 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 68 69 6A 6B 6C 6D 6E 6F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 70 71 72 73 74 75 76 77 + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, + // 78 79 7A 7B 7C 7D 7E 7F + 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, + // 80 81 82 83 84 85 86 87 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 88 89 8A 8B 8C 8D 8E 8F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 90 91 92 93 94 95 96 97 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // 98 99 9A 9B 9C 9D 9E 9F + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A0 A1 A2 A3 A4 A5 A6 A7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // A8 A9 AA AB AC AD AE AF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B0 B1 B2 B3 B4 B5 B6 B7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // B8 B9 BA BB BC BD BE BF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C0 C1 C2 C3 C4 C5 C6 C7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // C8 C9 CA CB CC CD CE CF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D0 D1 D2 D3 D4 D5 D6 D7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // D8 D9 DA DB DC DD DE DF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E0 E1 E2 E3 E4 E5 E6 E7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // E8 E9 EA EB EC ED EE EF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F0 F1 F2 F3 F4 F5 F6 F7 + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, + // F8 F9 FA FB FC FD FE FF + 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 +}; + +static inline bool BitAt(const uint8_t a[], const uint8_t i) { + return !!(a[i >> 3] & (1 << (i & 7))); +} + +// Appends ch to str. If ch position in encode_set is set, the ch will +// be percent-encoded then appended. +static inline void AppendOrEscape(std::string* str, + const unsigned char ch, + const uint8_t encode_set[]) { + if (BitAt(encode_set, ch)) + *str += hex[ch]; + else + *str += ch; +} + +template +static inline unsigned hex2bin(const T ch) { + if (ch >= '0' && ch <= '9') + return ch - '0'; + if (ch >= 'A' && ch <= 'F') + return 10 + (ch - 'A'); + if (ch >= 'a' && ch <= 'f') + return 10 + (ch - 'a'); + return static_cast(-1); +} + +static inline void PercentDecode(const char* input, + size_t len, + std::string* dest) { + if (len == 0) + return; + dest->reserve(len); + const char* pointer = input; + const char* end = input + len; + size_t remaining = pointer - end - 1; + while (pointer < end) { + const char ch = pointer[0]; + remaining = (end - pointer) + 1; + if (ch != '%' || remaining < 2 || + (ch == '%' && + (!IsASCIIHexDigit(pointer[1]) || + !IsASCIIHexDigit(pointer[2])))) { + *dest += ch; + pointer++; + continue; + } else { + unsigned a = hex2bin(pointer[1]); + unsigned b = hex2bin(pointer[2]); + char c = static_cast(a * 16 + b); + *dest += c; + pointer += 3; + } } +} + +#define SPECIALS(XX) \ + XX("ftp:", 21) \ + XX("file:", -1) \ + XX("gopher:", 70) \ + XX("http:", 80) \ + XX("https:", 443) \ + XX("ws:", 80) \ + XX("wss:", 443) + +static inline bool IsSpecial(std::string scheme) { +#define XX(name, _) if (scheme == name) return true; + SPECIALS(XX); +#undef XX + return false; +} - // If a UTF-16 character is a surrogate. - static inline bool IsUnicodeSurrogate(uint16_t c) { - return (c & 0xF800) == 0xD800; - } +static inline int NormalizePort(std::string scheme, int p) { +#define XX(name, port) if (scheme == name && p == port) return -1; + SPECIALS(XX); +#undef XX + return p; +} - // If a UTF-16 surrogate is a low/trailing one. - static inline bool IsUnicodeSurrogateTrail(uint16_t c) { - return (c & 0x400) != 0; - } +#if defined(NODE_HAVE_I18N_SUPPORT) +static inline bool ToUnicode(std::string* input, std::string* output) { + MaybeStackBuffer buf; + if (i18n::ToUnicode(&buf, input->c_str(), input->length()) < 0) + return false; + output->assign(*buf, buf.length()); + return true; +} + +static inline bool ToASCII(std::string* input, std::string* output) { + MaybeStackBuffer buf; + if (i18n::ToASCII(&buf, input->c_str(), input->length()) < 0) + return false; + output->assign(*buf, buf.length()); + return true; +} +#else +// Intentional non-ops if ICU is not present. +static inline bool ToUnicode(std::string* input, std::string* output) { + *output = *input; + return true; +} + +static inline bool ToASCII(std::string* input, std::string* output) { + *output = *input; + return true; +} +#endif - static url_host_type ParseIPv6Host(url_host* host, - const char* input, - size_t length) { - url_host_type type = HOST_TYPE_FAILED; - for (unsigned n = 0; n < 8; n++) - host->value.ipv6[n] = 0; - uint16_t* piece_pointer = &host->value.ipv6[0]; - uint16_t* last_piece = piece_pointer + 8; - uint16_t* compress_pointer = nullptr; - const char* pointer = input; - const char* end = pointer + length; - unsigned value, len, swaps, numbers_seen; - char ch = pointer < end ? pointer[0] : kEOL; +static url_host_type ParseIPv6Host(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_FAILED; + for (unsigned n = 0; n < 8; n++) + host->value.ipv6[n] = 0; + uint16_t* piece_pointer = &host->value.ipv6[0]; + uint16_t* last_piece = piece_pointer + 8; + uint16_t* compress_pointer = nullptr; + const char* pointer = input; + const char* end = pointer + length; + unsigned value, len, swaps, numbers_seen; + char ch = pointer < end ? pointer[0] : kEOL; + if (ch == ':') { + if (length < 2 || pointer[1] != ':') + goto end; + pointer += 2; + ch = pointer < end ? pointer[0] : kEOL; + piece_pointer++; + compress_pointer = piece_pointer; + } + while (ch != kEOL) { + if (piece_pointer > last_piece) + goto end; if (ch == ':') { - if (length < 2 || pointer[1] != ':') + if (compress_pointer != nullptr) goto end; - pointer += 2; + pointer++; ch = pointer < end ? pointer[0] : kEOL; piece_pointer++; compress_pointer = piece_pointer; + continue; } - while (ch != kEOL) { - if (piece_pointer > last_piece) - goto end; - if (ch == ':') { - if (compress_pointer != nullptr) + value = 0; + len = 0; + while (len < 4 && IsASCIIHexDigit(ch)) { + value = value * 0x10 + hex2bin(ch); + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + len++; + } + switch (ch) { + case '.': + if (len == 0) goto end; - pointer++; + pointer -= len; ch = pointer < end ? pointer[0] : kEOL; - piece_pointer++; - compress_pointer = piece_pointer; - continue; - } - value = 0; - len = 0; - while (len < 4 && ASCII_HEX_DIGIT(ch)) { - value = value * 0x10 + hex2bin(ch); - pointer++; - ch = pointer < end ? pointer[0] : kEOL; - len++; - } - switch (ch) { - case '.': - if (len == 0) - goto end; - pointer -= len; - ch = pointer < end ? pointer[0] : kEOL; - if (piece_pointer > last_piece - 2) - goto end; - numbers_seen = 0; - while (ch != kEOL) { - value = 0xffffffff; - if (numbers_seen > 0) { - if (ch == '.' && numbers_seen < 4) { - pointer++; - ch = pointer < end ? pointer[0] : kEOL; - } else { - goto end; - } - } - if (!ASCII_DIGIT(ch)) - goto end; - while (ASCII_DIGIT(ch)) { - unsigned number = ch - '0'; - if (value == 0xffffffff) { - value = number; - } else if (value == 0) { - goto end; - } else { - value = value * 10 + number; - } - if (value > 255) - goto end; + if (piece_pointer > last_piece - 2) + goto end; + numbers_seen = 0; + while (ch != kEOL) { + value = 0xffffffff; + if (numbers_seen > 0) { + if (ch == '.' && numbers_seen < 4) { pointer++; ch = pointer < end ? pointer[0] : kEOL; + } else { + goto end; } - *piece_pointer = *piece_pointer * 0x100 + value; - numbers_seen++; - if (numbers_seen == 2 || numbers_seen == 4) - piece_pointer++; } - if (numbers_seen != 4) - goto end; - continue; - case ':': - pointer++; - ch = pointer < end ? pointer[0] : kEOL; - if (ch == kEOL) + if (!IsASCIIDigit(ch)) goto end; - break; - case kEOL: - break; - default: + while (IsASCIIDigit(ch)) { + unsigned number = ch - '0'; + if (value == 0xffffffff) { + value = number; + } else if (value == 0) { + goto end; + } else { + value = value * 10 + number; + } + if (value > 255) + goto end; + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + } + *piece_pointer = *piece_pointer * 0x100 + value; + numbers_seen++; + if (numbers_seen == 2 || numbers_seen == 4) + piece_pointer++; + } + if (numbers_seen != 4) goto end; - } - *piece_pointer = value; - piece_pointer++; - } - - if (compress_pointer != nullptr) { - swaps = piece_pointer - compress_pointer; - piece_pointer = last_piece - 1; - while (piece_pointer != &host->value.ipv6[0] && swaps > 0) { - uint16_t temp = *piece_pointer; - uint16_t* swap_piece = compress_pointer + swaps - 1; - *piece_pointer = *swap_piece; - *swap_piece = temp; - piece_pointer--; - swaps--; - } - } else if (compress_pointer == nullptr && - piece_pointer != last_piece) { - goto end; + continue; + case ':': + pointer++; + ch = pointer < end ? pointer[0] : kEOL; + if (ch == kEOL) + goto end; + break; + case kEOL: + break; + default: + goto end; } - type = HOST_TYPE_IPV6; - end: - host->type = type; - return type; + *piece_pointer = value; + piece_pointer++; } - static inline int64_t ParseNumber(const char* start, const char* end) { - unsigned R = 10; - if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { - start += 2; - R = 16; + if (compress_pointer != nullptr) { + swaps = piece_pointer - compress_pointer; + piece_pointer = last_piece - 1; + while (piece_pointer != &host->value.ipv6[0] && swaps > 0) { + uint16_t temp = *piece_pointer; + uint16_t* swap_piece = compress_pointer + swaps - 1; + *piece_pointer = *swap_piece; + *swap_piece = temp; + piece_pointer--; + swaps--; } - if (end - start == 0) { - return 0; - } else if (R == 10 && end - start > 1 && start[0] == '0') { - start++; - R = 8; - } - const char* p = start; - - while (p < end) { - const char ch = p[0]; - switch (R) { - case 8: - if (ch < '0' || ch > '7') - return -1; - break; - case 10: - if (!ASCII_DIGIT(ch)) - return -1; - break; - case 16: - if (!ASCII_HEX_DIGIT(ch)) - return -1; - break; - } - p++; + } else if (compress_pointer == nullptr && + piece_pointer != last_piece) { + goto end; + } + type = HOST_TYPE_IPV6; + end: + host->type = type; + return type; +} + +static inline int64_t ParseNumber(const char* start, const char* end) { + unsigned R = 10; + if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { + start += 2; + R = 16; + } + if (end - start == 0) { + return 0; + } else if (R == 10 && end - start > 1 && start[0] == '0') { + start++; + R = 8; + } + const char* p = start; + + while (p < end) { + const char ch = p[0]; + switch (R) { + case 8: + if (ch < '0' || ch > '7') + return -1; + break; + case 10: + if (!IsASCIIDigit(ch)) + return -1; + break; + case 16: + if (!IsASCIIHexDigit(ch)) + return -1; + break; } - return strtoll(start, NULL, R); + p++; } + return strtoll(start, NULL, R); +} + +static url_host_type ParseIPv4Host(url_host* host, + const char* input, + size_t length) { + url_host_type type = HOST_TYPE_DOMAIN; + const char* pointer = input; + const char* mark = input; + const char* end = pointer + length; + int parts = 0; + uint32_t val = 0; + uint64_t numbers[4]; + int tooBigNumbers = 0; + if (length == 0) + goto end; + + while (pointer <= end) { + const char ch = pointer < end ? pointer[0] : kEOL; + const int remaining = end - pointer - 1; + if (ch == '.' || ch == kEOL) { + if (++parts > 4) + goto end; + if (pointer - mark == 0) + break; + int64_t n = ParseNumber(mark, pointer); + if (n < 0) + goto end; - static url_host_type ParseIPv4Host(url_host* host, - const char* input, - size_t length) { - url_host_type type = HOST_TYPE_DOMAIN; - const char* pointer = input; - const char* mark = input; - const char* end = pointer + length; - int parts = 0; - uint32_t val = 0; - uint64_t numbers[4]; - int tooBigNumbers = 0; - if (length == 0) - goto end; - - while (pointer <= end) { - const char ch = pointer < end ? pointer[0] : kEOL; - const int remaining = end - pointer - 1; - if (ch == '.' || ch == kEOL) { - if (++parts > 4) - goto end; - if (pointer - mark == 0) - break; - int64_t n = ParseNumber(mark, pointer); - if (n < 0) - goto end; - - if (n > 255) { - tooBigNumbers++; - } - numbers[parts - 1] = n; - mark = pointer + 1; - if (ch == '.' && remaining == 0) - break; + if (n > 255) { + tooBigNumbers++; } - pointer++; - } - CHECK_GT(parts, 0); - - // If any but the last item in numbers is greater than 255, return failure. - // If the last item in numbers is greater than or equal to - // 256^(5 - the number of items in numbers), return failure. - if (tooBigNumbers > 1 || - (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || - numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { - type = HOST_TYPE_FAILED; - goto end; - } - - type = HOST_TYPE_IPV4; - val = numbers[parts - 1]; - for (int n = 0; n < parts - 1; n++) { - double b = 3 - n; - val += numbers[n] * pow(256, b); + numbers[parts - 1] = n; + mark = pointer + 1; + if (ch == '.' && remaining == 0) + break; } - - host->value.ipv4 = val; - end: - host->type = type; - return type; + pointer++; + } + CHECK_GT(parts, 0); + + // If any but the last item in numbers is greater than 255, return failure. + // If the last item in numbers is greater than or equal to + // 256^(5 - the number of items in numbers), return failure. + if (tooBigNumbers > 1 || + (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || + numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { + type = HOST_TYPE_FAILED; + goto end; } - static url_host_type ParseHost(url_host* host, - const char* input, - size_t length, - bool unicode = false) { - url_host_type type = HOST_TYPE_FAILED; - const char* pointer = input; - std::string decoded; + type = HOST_TYPE_IPV4; + val = numbers[parts - 1]; + for (int n = 0; n < parts - 1; n++) { + double b = 3 - n; + val += numbers[n] * pow(256, b); + } - if (length == 0) + host->value.ipv4 = val; + end: + host->type = type; + return type; +} + +static url_host_type ParseHost(url_host* host, + const char* input, + size_t length, + bool unicode = false) { + url_host_type type = HOST_TYPE_FAILED; + const char* pointer = input; + std::string decoded; + + if (length == 0) + goto end; + + if (pointer[0] == '[') { + if (pointer[length - 1] != ']') goto end; + return ParseIPv6Host(host, ++pointer, length - 2); + } - if (pointer[0] == '[') { - if (pointer[length - 1] != ']') - goto end; - return ParseIPv6Host(host, ++pointer, length - 2); - } + // First, we have to percent decode + PercentDecode(input, length, &decoded); - // First, we have to percent decode - PercentDecode(input, length, &decoded); + // Then we have to punycode toASCII + if (!ToASCII(&decoded, &decoded)) + goto end; - // Then we have to punycode toASCII - if (!ToASCII(&decoded, &decoded)) + // If any of the following characters are still present, we have to fail + for (size_t n = 0; n < decoded.size(); n++) { + const char ch = decoded[n]; + if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d || + ch == 0x20 || ch == '#' || ch == '%' || ch == '/' || + ch == '?' || ch == '@' || ch == '[' || ch == '\\' || + ch == ']') { goto end; - - // If any of the following characters are still present, we have to fail - for (size_t n = 0; n < decoded.size(); n++) { - const char ch = decoded[n]; - if (ch == 0x00 || ch == 0x09 || ch == 0x0a || ch == 0x0d || - ch == 0x20 || ch == '#' || ch == '%' || ch == '/' || - ch == '?' || ch == '@' || ch == '[' || ch == '\\' || - ch == ']') { - goto end; - } } - - // Check to see if it's an IPv4 IP address - type = ParseIPv4Host(host, decoded.c_str(), decoded.length()); - if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED) - goto end; - - // If the unicode flag is set, run the result through punycode ToUnicode - if (unicode && !ToUnicode(&decoded, &decoded)) - goto end; - - // It's not an IPv4 or IPv6 address, it must be a domain - type = HOST_TYPE_DOMAIN; - host->value.domain = decoded; - - end: - host->type = type; - return type; } - // Locates the longest sequence of 0 segments in an IPv6 address - // in order to use the :: compression when serializing - static inline uint16_t* FindLongestZeroSequence(uint16_t* values, - size_t len) { - uint16_t* start = values; - uint16_t* end = start + len; - uint16_t* result = nullptr; - - uint16_t* current = nullptr; - unsigned counter = 0, longest = 1; - - while (start < end) { - if (*start == 0) { - if (current == nullptr) - current = start; - counter++; - } else { - if (counter > longest) { - longest = counter; - result = current; - } - counter = 0; - current = nullptr; + // Check to see if it's an IPv4 IP address + type = ParseIPv4Host(host, decoded.c_str(), decoded.length()); + if (type == HOST_TYPE_IPV4 || type == HOST_TYPE_FAILED) + goto end; + + // If the unicode flag is set, run the result through punycode ToUnicode + if (unicode && !ToUnicode(&decoded, &decoded)) + goto end; + + // It's not an IPv4 or IPv6 address, it must be a domain + type = HOST_TYPE_DOMAIN; + host->value.domain = decoded; + + end: + host->type = type; + return type; +} + +// Locates the longest sequence of 0 segments in an IPv6 address +// in order to use the :: compression when serializing +static inline uint16_t* FindLongestZeroSequence(uint16_t* values, + size_t len) { + uint16_t* start = values; + uint16_t* end = start + len; + uint16_t* result = nullptr; + + uint16_t* current = nullptr; + unsigned counter = 0, longest = 1; + + while (start < end) { + if (*start == 0) { + if (current == nullptr) + current = start; + counter++; + } else { + if (counter > longest) { + longest = counter; + result = current; } - start++; + counter = 0; + current = nullptr; } - if (counter > longest) - result = current; - return result; + start++; } - - static url_host_type WriteHost(url_host* host, std::string* dest) { - dest->clear(); - switch (host->type) { - case HOST_TYPE_DOMAIN: - *dest = host->value.domain; - break; - case HOST_TYPE_IPV4: { - dest->reserve(15); - uint32_t value = host->value.ipv4; - for (int n = 0; n < 4; n++) { - char buf[4]; - char* buffer = buf; - snprintf(buffer, sizeof(buf), "%d", value % 256); - dest->insert(0, buf); - if (n < 3) - dest->insert(0, 1, '.'); - value /= 256; - } - break; + if (counter > longest) + result = current; + return result; +} + +static url_host_type WriteHost(url_host* host, std::string* dest) { + dest->clear(); + switch (host->type) { + case HOST_TYPE_DOMAIN: + *dest = host->value.domain; + break; + case HOST_TYPE_IPV4: { + dest->reserve(15); + uint32_t value = host->value.ipv4; + for (int n = 0; n < 4; n++) { + char buf[4]; + char* buffer = buf; + snprintf(buffer, sizeof(buf), "%d", value % 256); + dest->insert(0, buf); + if (n < 3) + dest->insert(0, 1, '.'); + value /= 256; } - case HOST_TYPE_IPV6: { - dest->reserve(41); - *dest+= '['; - uint16_t* start = &host->value.ipv6[0]; - uint16_t* compress_pointer = - FindLongestZeroSequence(start, 8); - for (int n = 0; n <= 7; n++) { - uint16_t* piece = &host->value.ipv6[n]; - if (compress_pointer == piece) { - *dest += n == 0 ? "::" : ":"; - while (*piece == 0 && ++n < 8) - piece = &host->value.ipv6[n]; - if (n == 8) - break; - } - char buf[5]; - char* buffer = buf; - snprintf(buffer, sizeof(buf), "%x", *piece); - *dest += buf; - if (n < 7) - *dest += ':'; + break; + } + case HOST_TYPE_IPV6: { + dest->reserve(41); + *dest+= '['; + uint16_t* start = &host->value.ipv6[0]; + uint16_t* compress_pointer = + FindLongestZeroSequence(start, 8); + for (int n = 0; n <= 7; n++) { + uint16_t* piece = &host->value.ipv6[n]; + if (compress_pointer == piece) { + *dest += n == 0 ? "::" : ":"; + while (*piece == 0 && ++n < 8) + piece = &host->value.ipv6[n]; + if (n == 8) + break; } - *dest += ']'; - break; + char buf[5]; + char* buffer = buf; + snprintf(buffer, sizeof(buf), "%x", *piece); + *dest += buf; + if (n < 7) + *dest += ':'; } - case HOST_TYPE_FAILED: - break; + *dest += ']'; + break; } - return host->type; + case HOST_TYPE_FAILED: + break; } + return host->type; +} - static bool ParseHost(std::string* input, - std::string* output, - bool unicode = false) { - if (input->length() == 0) - return true; - url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, input->c_str(), input->length(), unicode); - if (host.type == HOST_TYPE_FAILED) - return false; - WriteHost(&host, output); +static bool ParseHost(std::string* input, + std::string* output, + bool unicode = false) { + if (input->length() == 0) return true; - } - - static inline void Copy(Environment* env, - Local ary, - std::vector* vec) { - const int32_t len = ary->Length(); - if (len == 0) - return; // nothing to copy - vec->reserve(len); - for (int32_t n = 0; n < len; n++) { - Local val = ary->Get(env->context(), n).ToLocalChecked(); - if (val->IsString()) { - Utf8Value value(env->isolate(), val.As()); - vec->push_back(std::string(*value, value.length())); - } + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, input->c_str(), input->length(), unicode); + if (host.type == HOST_TYPE_FAILED) + return false; + WriteHost(&host, output); + return true; +} + +static inline void Copy(Environment* env, + Local ary, + std::vector* vec) { + const int32_t len = ary->Length(); + if (len == 0) + return; // nothing to copy + vec->reserve(len); + for (int32_t n = 0; n < len; n++) { + Local val = ary->Get(env->context(), n).ToLocalChecked(); + if (val->IsString()) { + Utf8Value value(env->isolate(), val.As()); + vec->push_back(std::string(*value, value.length())); } } - - static inline Local Copy(Environment* env, - std::vector vec) { - Isolate* isolate = env->isolate(); - Local ary = Array::New(isolate, vec.size()); - for (size_t n = 0; n < vec.size(); n++) - ary->Set(env->context(), n, UTF8STRING(isolate, vec[n])).FromJust(); - return ary; +} + +static inline Local Copy(Environment* env, + std::vector vec) { + Isolate* isolate = env->isolate(); + Local ary = Array::New(isolate, vec.size()); + for (size_t n = 0; n < vec.size(); n++) + ary->Set(env->context(), n, UTF8STRING(isolate, vec[n])).FromJust(); + return ary; +} + +static inline void HarvestBase(Environment* env, + struct url_data* base, + Local base_obj) { + Local context = env->context(); + Local flags = GET(env, base_obj, "flags"); + if (flags->IsInt32()) + base->flags = flags->Int32Value(context).FromJust(); + + Local scheme = GET(env, base_obj, "scheme"); + base->scheme = Utf8Value(env->isolate(), scheme).out(); + + GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); + GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); + GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); + GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY); + GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT); + Local port = GET(env, base_obj, "port"); + if (port->IsInt32()) + base->port = port->Int32Value(context).FromJust(); + Local path = GET(env, base_obj, "path"); + if (path->IsArray()) { + base->flags |= URL_FLAGS_HAS_PATH; + Copy(env, path.As(), &(base->path)); } - - static inline void HarvestBase(Environment* env, - struct url_data* base, - Local base_obj) { - Local context = env->context(); - Local flags = GET(env, base_obj, "flags"); - if (flags->IsInt32()) - base->flags = flags->Int32Value(context).FromJust(); - - Local scheme = GET(env, base_obj, "scheme"); - base->scheme = Utf8Value(env->isolate(), scheme).out(); - - GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); - GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); - GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); - GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY); - GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT); - Local port = GET(env, base_obj, "port"); - if (port->IsInt32()) - base->port = port->Int32Value(context).FromJust(); - Local path = GET(env, base_obj, "path"); - if (path->IsArray()) { - base->flags |= URL_FLAGS_HAS_PATH; - Copy(env, path.As(), &(base->path)); - } +} + +static inline void HarvestContext(Environment* env, + struct url_data* context, + Local context_obj) { + Local flags = GET(env, context_obj, "flags"); + if (flags->IsInt32()) { + int32_t _flags = flags->Int32Value(env->context()).FromJust(); + if (_flags & URL_FLAGS_SPECIAL) + context->flags |= URL_FLAGS_SPECIAL; + if (_flags & URL_FLAGS_CANNOT_BE_BASE) + context->flags |= URL_FLAGS_CANNOT_BE_BASE; } - - static inline void HarvestContext(Environment* env, - struct url_data* context, - Local context_obj) { - Local flags = GET(env, context_obj, "flags"); - if (flags->IsInt32()) { - int32_t _flags = flags->Int32Value(env->context()).FromJust(); - if (_flags & URL_FLAGS_SPECIAL) - context->flags |= URL_FLAGS_SPECIAL; - if (_flags & URL_FLAGS_CANNOT_BE_BASE) - context->flags |= URL_FLAGS_CANNOT_BE_BASE; - } - Local scheme = GET(env, context_obj, "scheme"); - if (scheme->IsString()) { - Utf8Value value(env->isolate(), scheme); - context->scheme.assign(*value, value.length()); - } - Local port = GET(env, context_obj, "port"); - if (port->IsInt32()) - context->port = port->Int32Value(env->context()).FromJust(); + Local scheme = GET(env, context_obj, "scheme"); + if (scheme->IsString()) { + Utf8Value value(env->isolate(), scheme); + context->scheme.assign(*value, value.length()); } - - // Single dot segment can be ".", "%2e", or "%2E" - static inline bool IsSingleDotSegment(std::string str) { - switch (str.size()) { - case 1: - return str == "."; - case 3: - return str[0] == '%' && - str[1] == '2' && - TO_LOWER(str[2]) == 'e'; - default: - return false; - } + Local port = GET(env, context_obj, "port"); + if (port->IsInt32()) + context->port = port->Int32Value(env->context()).FromJust(); +} + +// Single dot segment can be ".", "%2e", or "%2E" +static inline bool IsSingleDotSegment(std::string str) { + switch (str.size()) { + case 1: + return str == "."; + case 3: + return str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e'; + default: + return false; } - - // Double dot segment can be: - // "..", ".%2e", ".%2E", "%2e.", "%2E.", - // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" - static inline bool IsDoubleDotSegment(std::string str) { - switch (str.size()) { - case 2: - return str == ".."; - case 4: - if (str[0] != '.' && str[0] != '%') - return false; - return ((str[0] == '.' && - str[1] == '%' && - str[2] == '2' && - TO_LOWER(str[3]) == 'e') || - (str[0] == '%' && - str[1] == '2' && - TO_LOWER(str[2]) == 'e' && - str[3] == '.')); - case 6: - return (str[0] == '%' && - str[1] == '2' && - TO_LOWER(str[2]) == 'e' && - str[3] == '%' && - str[4] == '2' && - TO_LOWER(str[5]) == 'e'); - default: +} + +// Double dot segment can be: +// "..", ".%2e", ".%2E", "%2e.", "%2E.", +// "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" +static inline bool IsDoubleDotSegment(std::string str) { + switch (str.size()) { + case 2: + return str == ".."; + case 4: + if (str[0] != '.' && str[0] != '%') return false; - } + return ((str[0] == '.' && + str[1] == '%' && + str[2] == '2' && + ASCIILowercase(str[3]) == 'e') || + (str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e' && + str[3] == '.')); + case 6: + return (str[0] == '%' && + str[1] == '2' && + ASCIILowercase(str[2]) == 'e' && + str[3] == '%' && + str[4] == '2' && + ASCIILowercase(str[5]) == 'e'); + default: + return false; } - - static inline void ShortenUrlPath(struct url_data* url) { - if (url->path.empty()) return; - if (url->path.size() == 1 && url->scheme == "file:" && - NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return; - url->path.pop_back(); +} + +static inline void ShortenUrlPath(struct url_data* url) { + if (url->path.empty()) return; + if (url->path.size() == 1 && url->scheme == "file:" && + IsNormalizedWindowsDriveLetter(url->path[0])) return; + url->path.pop_back(); +} + +void URL::Parse(const char* input, + const size_t len, + enum url_parse_state state_override, + struct url_data* url, + const struct url_data* base, + bool has_base) { + bool atflag = false; + bool sbflag = false; + bool uflag = false; + bool base_is_file = false; + int wskip = 0; + + std::string buffer; + url->scheme.reserve(len); + url->username.reserve(len); + url->password.reserve(len); + url->host.reserve(len); + url->path.reserve(len); + url->query.reserve(len); + url->fragment.reserve(len); + buffer.reserve(len); + + // Set the initial parse state. + const bool has_state_override = state_override != kUnknownState; + enum url_parse_state state = has_state_override ? state_override : + kSchemeStart; + + const char* p = input; + const char* end = input + len; + + if (state < kSchemeStart || state > kFragment) { + url->flags |= URL_FLAGS_INVALID_PARSE_STATE; + return; } - void URL::Parse(const char* input, - const size_t len, - enum url_parse_state state_override, - struct url_data* url, - const struct url_data* base, - bool has_base) { - bool atflag = false; - bool sbflag = false; - bool uflag = false; - bool base_is_file = false; - int wskip = 0; - - std::string buffer; - url->scheme.reserve(len); - url->username.reserve(len); - url->password.reserve(len); - url->host.reserve(len); - url->path.reserve(len); - url->query.reserve(len); - url->fragment.reserve(len); - buffer.reserve(len); - - // Set the initial parse state. - const bool has_state_override = state_override != kUnknownState; - enum url_parse_state state = has_state_override ? state_override : - kSchemeStart; - - const char* p = input; - const char* end = input + len; - - if (state < kSchemeStart || state > kFragment) { - url->flags |= URL_FLAGS_INVALID_PARSE_STATE; - return; - } - - while (p <= end) { - const char ch = p < end ? p[0] : kEOL; + while (p <= end) { + const char ch = p < end ? p[0] : kEOL; - if (TAB_AND_NEWLINE(ch)) { - if (state == kAuthority) { - // It's necessary to keep track of how much whitespace - // is being ignored when in kAuthority state because of - // how the buffer is managed. TODO: See if there's a better - // way - wskip++; - } - p++; - continue; + if (IsASCIITabOrNewline(ch)) { + if (state == kAuthority) { + // It's necessary to keep track of how much whitespace + // is being ignored when in kAuthority state because of + // how the buffer is managed. TODO: See if there's a better + // way + wskip++; } + p++; + continue; + } - bool special = (url->flags & URL_FLAGS_SPECIAL); - bool cannot_be_base; - const bool special_back_slash = (special && ch == '\\'); - switch (state) { - case kSchemeStart: - if (ASCII_ALPHA(ch)) { - buffer += TO_LOWER(ch); - state = kScheme; - } else if (!has_state_override) { - state = kNoScheme; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; + bool special = (url->flags & URL_FLAGS_SPECIAL); + bool cannot_be_base; + const bool special_back_slash = (special && ch == '\\'); + switch (state) { + case kSchemeStart: + if (IsASCIIAlpha(ch)) { + buffer += ASCIILowercase(ch); + state = kScheme; + } else if (!has_state_override) { + state = kNoScheme; + continue; + } else { + url->flags |= URL_FLAGS_FAILED; + return; + } + break; + case kScheme: + if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') { + buffer += ASCIILowercase(ch); + p++; + continue; + } else if (ch == ':' || (has_state_override && ch == kEOL)) { + if (buffer.size() > 0) { + buffer += ':'; + url->scheme = buffer; + } else if (has_state_override) { + url->flags |= URL_FLAGS_TERMINATED; return; } - break; - case kScheme: - if (SCHEME_CHAR(ch)) { - buffer += TO_LOWER(ch); - p++; - continue; - } else if (ch == ':' || (has_state_override && ch == kEOL)) { - if (buffer.size() > 0) { - buffer += ':'; - url->scheme = buffer; - } else if (has_state_override) { - url->flags |= URL_FLAGS_TERMINATED; - return; - } - if (IsSpecial(url->scheme)) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - } - if (has_state_override) - return; - buffer.clear(); - if (url->scheme == "file:") { - state = kFile; - } else if (special && - has_base && - url->scheme == base->scheme) { - state = kSpecialRelativeOrAuthority; - } else if (special) { - state = kSpecialAuthoritySlashes; - } else if (p[1] == '/') { - state = kPathOrAuthority; - p++; - } else { - url->flags |= URL_FLAGS_CANNOT_BE_BASE; - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); - state = kCannotBeBase; - } - } else if (!has_state_override) { - buffer.clear(); - state = kNoScheme; - p = input; - continue; + if (IsSpecial(url->scheme)) { + url->flags |= URL_FLAGS_SPECIAL; + special = true; } else { - url->flags |= URL_FLAGS_FAILED; - return; + url->flags &= ~URL_FLAGS_SPECIAL; } - break; - case kNoScheme: - cannot_be_base = base->flags & URL_FLAGS_CANNOT_BE_BASE; - if (!has_base || (cannot_be_base && ch != '#')) { - url->flags |= URL_FLAGS_FAILED; + if (has_state_override) return; - } else if (cannot_be_base && ch == '#') { - url->scheme = base->scheme; - if (IsSpecial(url->scheme)) { - url->flags |= URL_FLAGS_SPECIAL; - special = true; - } else { - url->flags &= ~URL_FLAGS_SPECIAL; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_FRAGMENT) { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = base->fragment; - } - url->flags |= URL_FLAGS_CANNOT_BE_BASE; - state = kFragment; - } else if (has_base && - base->scheme != "file:") { - state = kRelative; - continue; - } else { - url->scheme = "file:"; - url->flags |= URL_FLAGS_SPECIAL; - special = true; + buffer.clear(); + if (url->scheme == "file:") { state = kFile; - continue; - } - break; - case kSpecialRelativeOrAuthority: - if (ch == '/' && p[1] == '/') { - state = kSpecialAuthorityIgnoreSlashes; + } else if (special && + has_base && + url->scheme == base->scheme) { + state = kSpecialRelativeOrAuthority; + } else if (special) { + state = kSpecialAuthoritySlashes; + } else if (p[1] == '/') { + state = kPathOrAuthority; p++; } else { - state = kRelative; - continue; - } - break; - case kPathOrAuthority: - if (ch == '/') { - state = kAuthority; - } else { - state = kPath; - continue; + url->flags |= URL_FLAGS_CANNOT_BE_BASE; + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(""); + state = kCannotBeBase; } - break; - case kRelative: + } else if (!has_state_override) { + buffer.clear(); + state = kNoScheme; + p = input; + continue; + } else { + url->flags |= URL_FLAGS_FAILED; + return; + } + break; + case kNoScheme: + cannot_be_base = base->flags & URL_FLAGS_CANNOT_BE_BASE; + if (!has_base || (cannot_be_base && ch != '#')) { + url->flags |= URL_FLAGS_FAILED; + return; + } else if (cannot_be_base && ch == '#') { url->scheme = base->scheme; if (IsSpecial(url->scheme)) { url->flags |= URL_FLAGS_SPECIAL; @@ -765,110 +1196,59 @@ namespace url { } else { url->flags &= ~URL_FLAGS_SPECIAL; } - switch (ch) { - case kEOL: - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - break; - case '/': - state = kRelativeSlash; - break; - case '?': - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - state = kQuery; - break; - case '#': - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - url->port = base->port; - state = kFragment; - break; - default: - if (special_back_slash) { - state = kRelativeSlash; - } else { - if (base->flags & URL_FLAGS_HAS_USERNAME) { - url->flags |= URL_FLAGS_HAS_USERNAME; - url->username = base->username; - } - if (base->flags & URL_FLAGS_HAS_PASSWORD) { - url->flags |= URL_FLAGS_HAS_PASSWORD; - url->password = base->password; - } - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - ShortenUrlPath(url); - } - url->port = base->port; - state = kPath; - continue; - } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - break; - case kRelativeSlash: - if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) { - state = kSpecialAuthorityIgnoreSlashes; - } else if (ch == '/') { - state = kAuthority; - } else { + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; + } + if (base->flags & URL_FLAGS_HAS_FRAGMENT) { + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment = base->fragment; + } + url->flags |= URL_FLAGS_CANNOT_BE_BASE; + state = kFragment; + } else if (has_base && + base->scheme != "file:") { + state = kRelative; + continue; + } else { + url->scheme = "file:"; + url->flags |= URL_FLAGS_SPECIAL; + special = true; + state = kFile; + continue; + } + break; + case kSpecialRelativeOrAuthority: + if (ch == '/' && p[1] == '/') { + state = kSpecialAuthorityIgnoreSlashes; + p++; + } else { + state = kRelative; + continue; + } + break; + case kPathOrAuthority: + if (ch == '/') { + state = kAuthority; + } else { + state = kPath; + continue; + } + break; + case kRelative: + url->scheme = base->scheme; + if (IsSpecial(url->scheme)) { + url->flags |= URL_FLAGS_SPECIAL; + special = true; + } else { + url->flags &= ~URL_FLAGS_SPECIAL; + } + switch (ch) { + case kEOL: if (base->flags & URL_FLAGS_HAS_USERNAME) { url->flags |= URL_FLAGS_HAS_USERNAME; url->username = base->username; @@ -881,604 +1261,546 @@ namespace url { url->flags |= URL_FLAGS_HAS_HOST; url->host = base->host; } + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; + } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } url->port = base->port; - state = kPath; - continue; - } - break; - case kSpecialAuthoritySlashes: - state = kSpecialAuthorityIgnoreSlashes; - if (ch == '/' && p[1] == '/') { - p++; - } else { - continue; - } - break; - case kSpecialAuthorityIgnoreSlashes: - if (ch != '/' && ch != '\\') { - state = kAuthority; - continue; - } - break; - case kAuthority: - if (ch == '@') { - if (atflag) { - buffer.reserve(buffer.size() + 3); - buffer.insert(0, "%40"); + break; + case '/': + state = kRelativeSlash; + break; + case '?': + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; + } + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - atflag = true; - const size_t blen = buffer.size(); - if (blen > 0 && buffer[0] != ':') { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } + url->port = base->port; + state = kQuery; + break; + case '#': + if (base->flags & URL_FLAGS_HAS_USERNAME) { url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; } - for (size_t n = 0; n < blen; n++) { - const char bch = buffer[n]; - if (bch == ':') { - url->flags |= URL_FLAGS_HAS_PASSWORD; - if (!uflag) { - uflag = true; - continue; - } - } - if (uflag) { - AppendOrEscape(&url->password, bch, UserinfoEncodeSet); - } else { - AppendOrEscape(&url->username, bch, UserinfoEncodeSet); - } + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; } - buffer.clear(); - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - p -= buffer.size() + 1 + wskip; - buffer.clear(); - state = kHost; - } else { - buffer += ch; - } - break; - case kHost: - case kHostname: - if (ch == ':' && !sbflag) { - if (special && buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { - url->flags |= URL_FLAGS_FAILED; - return; + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; } - buffer.clear(); - state = kPort; - if (state_override == kHostname) { - return; + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - } else if (ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - p--; - if (special && buffer.size() == 0) { - url->flags |= URL_FLAGS_FAILED; - return; + url->port = base->port; + state = kFragment; + break; + default: + if (special_back_slash) { + state = kRelativeSlash; + } else { + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; + } + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; + } + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + ShortenUrlPath(url); + } + url->port = base->port; + state = kPath; + continue; } + } + break; + case kRelativeSlash: + if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) { + state = kSpecialAuthorityIgnoreSlashes; + } else if (ch == '/') { + state = kAuthority; + } else { + if (base->flags & URL_FLAGS_HAS_USERNAME) { + url->flags |= URL_FLAGS_HAS_USERNAME; + url->username = base->username; + } + if (base->flags & URL_FLAGS_HAS_PASSWORD) { + url->flags |= URL_FLAGS_HAS_PASSWORD; + url->password = base->password; + } + if (base->flags & URL_FLAGS_HAS_HOST) { url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { - url->flags |= URL_FLAGS_FAILED; - return; + url->host = base->host; + } + url->port = base->port; + state = kPath; + continue; + } + break; + case kSpecialAuthoritySlashes: + state = kSpecialAuthorityIgnoreSlashes; + if (ch == '/' && p[1] == '/') { + p++; + } else { + continue; + } + break; + case kSpecialAuthorityIgnoreSlashes: + if (ch != '/' && ch != '\\') { + state = kAuthority; + continue; + } + break; + case kAuthority: + if (ch == '@') { + if (atflag) { + buffer.reserve(buffer.size() + 3); + buffer.insert(0, "%40"); + } + atflag = true; + const size_t blen = buffer.size(); + if (blen > 0 && buffer[0] != ':') { + url->flags |= URL_FLAGS_HAS_USERNAME; + } + for (size_t n = 0; n < blen; n++) { + const char bch = buffer[n]; + if (bch == ':') { + url->flags |= URL_FLAGS_HAS_PASSWORD; + if (!uflag) { + uflag = true; + continue; + } } - buffer.clear(); - state = kPathStart; - if (has_state_override) { - return; + if (uflag) { + AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET); + } else { + AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET); } - } else { - if (ch == '[') - sbflag = true; - if (ch == ']') - sbflag = false; - buffer += TO_LOWER(ch); } - break; - case kPort: - if (ASCII_DIGIT(ch)) { - buffer += ch; - } else if (has_state_override || - ch == kEOL || - ch == '/' || - ch == '?' || - ch == '#' || - special_back_slash) { - if (buffer.size() > 0) { - int port = 0; - for (size_t i = 0; i < buffer.size(); i++) - port = port * 10 + buffer[i] - '0'; - if (port < 0 || port > 0xffff) { - // TODO(TimothyGu): This hack is currently needed for the host - // setter since it needs access to hostname if it is valid, and - // if the FAILED flag is set the entire response to JS layer - // will be empty. - if (state_override == kHost) - url->port = -1; - else - url->flags |= URL_FLAGS_FAILED; - return; - } - url->port = NormalizePort(url->scheme, port); - buffer.clear(); - } else if (has_state_override) { - // TODO(TimothyGu): Similar case as above. + buffer.clear(); + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + p -= buffer.size() + 1 + wskip; + buffer.clear(); + state = kHost; + } else { + buffer += ch; + } + break; + case kHost: + case kHostname: + if (ch == ':' && !sbflag) { + if (special && buffer.size() == 0) { + url->flags |= URL_FLAGS_FAILED; + return; + } + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } + buffer.clear(); + state = kPort; + if (state_override == kHostname) { + return; + } + } else if (ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + p--; + if (special && buffer.size() == 0) { + url->flags |= URL_FLAGS_FAILED; + return; + } + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } + buffer.clear(); + state = kPathStart; + if (has_state_override) { + return; + } + } else { + if (ch == '[') + sbflag = true; + if (ch == ']') + sbflag = false; + buffer += ASCIILowercase(ch); + } + break; + case kPort: + if (IsASCIIDigit(ch)) { + buffer += ch; + } else if (has_state_override || + ch == kEOL || + ch == '/' || + ch == '?' || + ch == '#' || + special_back_slash) { + if (buffer.size() > 0) { + int port = 0; + for (size_t i = 0; i < buffer.size(); i++) + port = port * 10 + buffer[i] - '0'; + if (port < 0 || port > 0xffff) { + // TODO(TimothyGu): This hack is currently needed for the host + // setter since it needs access to hostname if it is valid, and + // if the FAILED flag is set the entire response to JS layer + // will be empty. if (state_override == kHost) url->port = -1; else - url->flags |= URL_FLAGS_TERMINATED; + url->flags |= URL_FLAGS_FAILED; return; } - state = kPathStart; - continue; - } else { - url->flags |= URL_FLAGS_FAILED; + url->port = NormalizePort(url->scheme, port); + buffer.clear(); + } else if (has_state_override) { + // TODO(TimothyGu): Similar case as above. + if (state_override == kHost) + url->port = -1; + else + url->flags |= URL_FLAGS_TERMINATED; return; } - break; - case kFile: - base_is_file = ( - has_base && - base->scheme == "file:"); - switch (ch) { - case kEOL: - if (base_is_file) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - break; + state = kPathStart; + continue; + } else { + url->flags |= URL_FLAGS_FAILED; + return; + } + break; + case kFile: + base_is_file = ( + has_base && + base->scheme == "file:"); + switch (ch) { + case kEOL: + if (base_is_file) { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - state = kPath; - continue; - case '\\': - case '/': - state = kFileSlash; - break; - case '?': - if (base_is_file) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } + if (base->flags & URL_FLAGS_HAS_QUERY) { url->flags |= URL_FLAGS_HAS_QUERY; - state = kQuery; - break; + url->query = base->query; } - case '#': - if (base_is_file) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - if (base->flags & URL_FLAGS_HAS_QUERY) { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query = base->query; - } - state = kFragment; - break; + break; + } + state = kPath; + continue; + case '\\': + case '/': + state = kFileSlash; + break; + case '?': + if (base_is_file) { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - default: - if (base_is_file && - (!WINDOWS_DRIVE_LETTER(ch, p[1]) || - end - p == 1 || - (p[2] != '/' && - p[2] != '\\' && - p[2] != '?' && - p[2] != '#'))) { - if (base->flags & URL_FLAGS_HAS_HOST) { - url->flags |= URL_FLAGS_HAS_HOST; - url->host = base->host; - } - if (base->flags & URL_FLAGS_HAS_PATH) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path = base->path; - } - ShortenUrlPath(url); + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; } - state = kPath; - continue; - } - break; - case kFileSlash: - if (ch == '/' || ch == '\\') { - state = kFileHost; - } else { - if (has_base && - base->scheme == "file:") { - if (NORMALIZED_WINDOWS_DRIVE_LETTER(base->path[0])) { + url->flags |= URL_FLAGS_HAS_QUERY; + state = kQuery; + break; + } + case '#': + if (base_is_file) { + if (base->flags & URL_FLAGS_HAS_HOST) { + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; + } + if (base->flags & URL_FLAGS_HAS_PATH) { url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(base->path[0]); - } else { + url->path = base->path; + } + if (base->flags & URL_FLAGS_HAS_QUERY) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = base->query; + } + state = kFragment; + break; + } + default: + if (base_is_file && + (!IsWindowsDriveLetter(ch, p[1]) || + end - p == 1 || + (p[2] != '/' && + p[2] != '\\' && + p[2] != '?' && + p[2] != '#'))) { + if (base->flags & URL_FLAGS_HAS_HOST) { url->flags |= URL_FLAGS_HAS_HOST; url->host = base->host; } + if (base->flags & URL_FLAGS_HAS_PATH) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path = base->path; + } + ShortenUrlPath(url); } state = kPath; continue; - } - break; - case kFileHost: - if (ch == kEOL || - ch == '/' || - ch == '\\' || - ch == '?' || - ch == '#') { - if (buffer.size() == 2 && - WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { - state = kPath; - } else if (buffer.size() == 0) { - state = kPathStart; + } + break; + case kFileSlash: + if (ch == '/' || ch == '\\') { + state = kFileHost; + } else { + if (has_base && + base->scheme == "file:") { + if (IsNormalizedWindowsDriveLetter(base->path[0])) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(base->path[0]); } else { - if (buffer != "localhost") { - url->flags |= URL_FLAGS_HAS_HOST; - if (!ParseHost(&buffer, &url->host)) { - url->flags |= URL_FLAGS_FAILED; - return; - } - } - buffer.clear(); - state = kPathStart; + url->flags |= URL_FLAGS_HAS_HOST; + url->host = base->host; } - continue; - } else { - buffer += ch; } - break; - case kPathStart: - if (IsSpecial(url->scheme)) { - state = kPath; - if (ch != '/' && ch != '\\') { - continue; - } - } else if (!has_state_override && ch == '?') { - url->flags |= URL_FLAGS_HAS_QUERY; - url->query.clear(); - state = kQuery; - } else if (!has_state_override && ch == '#') { - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment.clear(); - state = kFragment; - } else if (ch != kEOL) { + state = kPath; + continue; + } + break; + case kFileHost: + if (ch == kEOL || + ch == '/' || + ch == '\\' || + ch == '?' || + ch == '#') { + if (buffer.size() == 2 && + IsWindowsDriveLetter(buffer)) { state = kPath; - if (ch != '/') { - continue; + } else if (buffer.size() == 0) { + state = kPathStart; + } else { + if (buffer != "localhost") { + url->flags |= URL_FLAGS_HAS_HOST; + if (!ParseHost(&buffer, &url->host)) { + url->flags |= URL_FLAGS_FAILED; + return; + } } + buffer.clear(); + state = kPathStart; } - break; - case kPath: - if (ch == kEOL || - ch == '/' || - special_back_slash || - (!has_state_override && (ch == '?' || ch == '#'))) { - if (IsDoubleDotSegment(buffer)) { - ShortenUrlPath(url); - if (ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); - } - } else if (IsSingleDotSegment(buffer)) { - if (ch != '/' && !special_back_slash) { - url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); - } - } else if (!IsSingleDotSegment(buffer)) { - if (url->scheme == "file:" && - url->path.empty() && - buffer.size() == 2 && - WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { - url->flags &= ~URL_FLAGS_HAS_HOST; - buffer[1] = ':'; - } + continue; + } else { + buffer += ch; + } + break; + case kPathStart: + if (IsSpecial(url->scheme)) { + state = kPath; + if (ch != '/' && ch != '\\') { + continue; + } + } else if (!has_state_override && ch == '?') { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query.clear(); + state = kQuery; + } else if (!has_state_override && ch == '#') { + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment.clear(); + state = kFragment; + } else if (ch != kEOL) { + state = kPath; + if (ch != '/') { + continue; + } + } + break; + case kPath: + if (ch == kEOL || + ch == '/' || + special_back_slash || + (!has_state_override && (ch == '?' || ch == '#'))) { + if (IsDoubleDotSegment(buffer)) { + ShortenUrlPath(url); + if (ch != '/' && !special_back_slash) { url->flags |= URL_FLAGS_HAS_PATH; - std::string segment(buffer.c_str(), buffer.size()); - url->path.push_back(segment); + url->path.push_back(""); } - buffer.clear(); - if (url->scheme == "file:" && - (ch == kEOL || - ch == '?' || - ch == '#')) { - while (url->path.size() > 1 && url->path[0].length() == 0) { - url->path.erase(url->path.begin()); - } + } else if (IsSingleDotSegment(buffer)) { + if (ch != '/' && !special_back_slash) { + url->flags |= URL_FLAGS_HAS_PATH; + url->path.push_back(""); } - if (ch == '?') { - url->flags |= URL_FLAGS_HAS_QUERY; - state = kQuery; - } else if (ch == '#') { - state = kFragment; + } else if (!IsSingleDotSegment(buffer)) { + if (url->scheme == "file:" && + url->path.empty() && + buffer.size() == 2 && + IsWindowsDriveLetter(buffer)) { + url->flags &= ~URL_FLAGS_HAS_HOST; + buffer[1] = ':'; } - } else { - AppendOrEscape(&buffer, ch, DefaultEncodeSet); + url->flags |= URL_FLAGS_HAS_PATH; + std::string segment(buffer.c_str(), buffer.size()); + url->path.push_back(segment); } - break; - case kCannotBeBase: - switch (ch) { - case '?': - state = kQuery; - break; - case '#': - state = kFragment; - break; - default: - if (url->path.size() == 0) - url->path.push_back(""); - if (url->path.size() > 0 && ch != kEOL) - AppendOrEscape(&url->path[0], ch, SimpleEncodeSet); + buffer.clear(); + if (url->scheme == "file:" && + (ch == kEOL || + ch == '?' || + ch == '#')) { + while (url->path.size() > 1 && url->path[0].length() == 0) { + url->path.erase(url->path.begin()); + } } - break; - case kQuery: - if (ch == kEOL || (!has_state_override && ch == '#')) { + if (ch == '?') { url->flags |= URL_FLAGS_HAS_QUERY; - url->query = buffer; - buffer.clear(); - if (ch == '#') - state = kFragment; - } else { - AppendOrEscape(&buffer, ch, QueryEncodeSet); - } - break; - case kFragment: - switch (ch) { - case kEOL: - url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = buffer; - break; - case 0: - break; - default: - AppendOrEscape(&buffer, ch, SimpleEncodeSet); + state = kQuery; + } else if (ch == '#') { + state = kFragment; } - break; - default: - url->flags |= URL_FLAGS_INVALID_PARSE_STATE; - return; - } - - p++; - } - } - - static inline void SetArgs(Environment* env, - Local argv[], - const struct url_data* url) { - Isolate* isolate = env->isolate(); - argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); - argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); - if (url->flags & URL_FLAGS_HAS_USERNAME) - argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); - if (url->flags & URL_FLAGS_HAS_PASSWORD) - argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password); - if (url->flags & URL_FLAGS_HAS_HOST) - argv[ARG_HOST] = UTF8STRING(isolate, url->host); - if (url->flags & URL_FLAGS_HAS_QUERY) - argv[ARG_QUERY] = UTF8STRING(isolate, url->query); - if (url->flags & URL_FLAGS_HAS_FRAGMENT) - argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment); - if (url->port > -1) - argv[ARG_PORT] = Integer::New(isolate, url->port); - if (url->flags & URL_FLAGS_HAS_PATH) - argv[ARG_PATH] = Copy(env, url->path); - } - - static void Parse(Environment* env, - Local recv, - const char* input, - const size_t len, - enum url_parse_state state_override, - Local base_obj, - Local context_obj, - Local cb, - Local error_cb) { - Isolate* isolate = env->isolate(); - Local context = env->context(); - HandleScope handle_scope(isolate); - Context::Scope context_scope(context); - - const bool has_base = base_obj->IsObject(); - - struct url_data base; - struct url_data url; - if (context_obj->IsObject()) - HarvestContext(env, &url, context_obj.As()); - if (has_base) - HarvestBase(env, &base, base_obj.As()); - - URL::Parse(input, len, state_override, &url, &base, has_base); - if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || - ((state_override != kUnknownState) && - (url.flags & URL_FLAGS_TERMINATED))) - return; - - // Define the return value placeholders - const Local undef = Undefined(isolate); - if (!(url.flags & URL_FLAGS_FAILED)) { - Local argv[9] = { - undef, - undef, - undef, - undef, - undef, - undef, - undef, - undef, - undef, - }; - SetArgs(env, argv, &url); - (void)cb->Call(context, recv, arraysize(argv), argv); - } else if (error_cb->IsFunction()) { - Local argv[2] = { undef, undef }; - argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - argv[ERR_ARG_INPUT] = - String::NewFromUtf8(env->isolate(), - input, - v8::NewStringType::kNormal).ToLocalChecked(); - (void)error_cb.As()->Call(context, recv, arraysize(argv), argv); - } - } - - static void Parse(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 5); - CHECK(args[0]->IsString()); // input - CHECK(args[2]->IsUndefined() || // base context - args[2]->IsNull() || - args[2]->IsObject()); - CHECK(args[3]->IsUndefined() || // context - args[3]->IsNull() || - args[3]->IsObject()); - CHECK(args[4]->IsFunction()); // complete callback - CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback - - Utf8Value input(env->isolate(), args[0]); - enum url_parse_state state_override = kUnknownState; - if (args[1]->IsNumber()) { - state_override = static_cast( - args[1]->Uint32Value(env->context()).FromJust()); - } - - Parse(env, args.This(), - *input, input.length(), - state_override, - args[2], - args[3], - args[4].As(), - args[5]); - } - - static void EncodeAuthSet(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - std::string output; - const size_t len = value.length(); - output.reserve(len); - for (size_t n = 0; n < len; n++) { - const char ch = (*value)[n]; - AppendOrEscape(&output, ch, UserinfoEncodeSet); - } - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), - output.c_str(), - v8::NewStringType::kNormal).ToLocalChecked()); - } - - static void ToUSVString(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 2); - CHECK(args[0]->IsString()); - CHECK(args[1]->IsNumber()); - - TwoByteValue value(env->isolate(), args[0]); - const size_t n = value.length(); - - const int64_t start = args[1]->IntegerValue(env->context()).FromJust(); - CHECK_GE(start, 0); - - for (size_t i = start; i < n; i++) { - uint16_t c = value[i]; - if (!IsUnicodeSurrogate(c)) { - continue; - } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) { - value[i] = UNICODE_REPLACEMENT_CHARACTER; - } else { - uint16_t d = value[i + 1]; - if (IsUnicodeTrail(d)) { - i++; } else { - value[i] = UNICODE_REPLACEMENT_CHARACTER; + AppendOrEscape(&buffer, ch, DEFAULT_ENCODE_SET); } - } - } - - args.GetReturnValue().Set( - String::NewFromTwoByte(env->isolate(), - *value, - v8::NewStringType::kNormal, - n).ToLocalChecked()); - } - - static void DomainToASCII(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - - url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, *value, value.length()); - if (host.type == HOST_TYPE_FAILED) { - args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); - return; + break; + case kCannotBeBase: + switch (ch) { + case '?': + state = kQuery; + break; + case '#': + state = kFragment; + break; + default: + if (url->path.size() == 0) + url->path.push_back(""); + if (url->path.size() > 0 && ch != kEOL) + AppendOrEscape(&url->path[0], ch, SIMPLE_ENCODE_SET); + } + break; + case kQuery: + if (ch == kEOL || (!has_state_override && ch == '#')) { + url->flags |= URL_FLAGS_HAS_QUERY; + url->query = buffer; + buffer.clear(); + if (ch == '#') + state = kFragment; + } else { + AppendOrEscape(&buffer, ch, QUERY_ENCODE_SET); + } + break; + case kFragment: + switch (ch) { + case kEOL: + url->flags |= URL_FLAGS_HAS_FRAGMENT; + url->fragment = buffer; + break; + case 0: + break; + default: + AppendOrEscape(&buffer, ch, SIMPLE_ENCODE_SET); + } + break; + default: + url->flags |= URL_FLAGS_INVALID_PARSE_STATE; + return; } - std::string out; - WriteHost(&host, &out); - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), - out.c_str(), - v8::NewStringType::kNormal).ToLocalChecked()); - } - static void DomainToUnicode(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value value(env->isolate(), args[0]); - - url_host host{{""}, HOST_TYPE_DOMAIN}; - ParseHost(&host, *value, value.length(), true); - if (host.type == HOST_TYPE_FAILED) { - args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); - return; - } - std::string out; - WriteHost(&host, &out); - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), - out.c_str(), - v8::NewStringType::kNormal).ToLocalChecked()); + p++; } - - // This function works by calling out to a JS function that creates and - // returns the JS URL object. Be mindful of the JS<->Native boundary - // crossing that is required. - const Local URL::ToObject(Environment* env) const { - Isolate* isolate = env->isolate(); - Local context = env->context(); - HandleScope handle_scope(isolate); - Context::Scope context_scope(context); - - const Local undef = Undefined(isolate); - - if (context_.flags & URL_FLAGS_FAILED) - return Local(); - +} // NOLINT(readability/fn_size) + +static inline void SetArgs(Environment* env, + Local argv[], + const struct url_data* url) { + Isolate* isolate = env->isolate(); + argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); + argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); + if (url->flags & URL_FLAGS_HAS_USERNAME) + argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); + if (url->flags & URL_FLAGS_HAS_PASSWORD) + argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password); + if (url->flags & URL_FLAGS_HAS_HOST) + argv[ARG_HOST] = UTF8STRING(isolate, url->host); + if (url->flags & URL_FLAGS_HAS_QUERY) + argv[ARG_QUERY] = UTF8STRING(isolate, url->query); + if (url->flags & URL_FLAGS_HAS_FRAGMENT) + argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment); + if (url->port > -1) + argv[ARG_PORT] = Integer::New(isolate, url->port); + if (url->flags & URL_FLAGS_HAS_PATH) + argv[ARG_PATH] = Copy(env, url->path); +} + +static void Parse(Environment* env, + Local recv, + const char* input, + const size_t len, + enum url_parse_state state_override, + Local base_obj, + Local context_obj, + Local cb, + Local error_cb) { + Isolate* isolate = env->isolate(); + Local context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const bool has_base = base_obj->IsObject(); + + struct url_data base; + struct url_data url; + if (context_obj->IsObject()) + HarvestContext(env, &url, context_obj.As()); + if (has_base) + HarvestBase(env, &base, base_obj.As()); + + URL::Parse(input, len, state_override, &url, &base, has_base); + if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || + ((state_override != kUnknownState) && + (url.flags & URL_FLAGS_TERMINATED))) + return; + + // Define the return value placeholders + const Local undef = Undefined(isolate); + if (!(url.flags & URL_FLAGS_FAILED)) { Local argv[9] = { undef, undef, @@ -1490,54 +1812,213 @@ namespace url { undef, undef, }; - SetArgs(env, argv, &context_); - - TryCatch try_catch(isolate); - - // The SetURLConstructor method must have been called already to - // set the constructor function used below. SetURLConstructor is - // called automatically when the internal/url.js module is loaded - // during the internal/bootstrap_node.js processing. - MaybeLocal ret = - env->url_constructor_function() - ->Call(env->context(), undef, 9, argv); + SetArgs(env, argv, &url); + (void)cb->Call(context, recv, arraysize(argv), argv); + } else if (error_cb->IsFunction()) { + Local argv[2] = { undef, undef }; + argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); + argv[ERR_ARG_INPUT] = + String::NewFromUtf8(env->isolate(), + input, + v8::NewStringType::kNormal).ToLocalChecked(); + (void)error_cb.As()->Call(context, recv, arraysize(argv), argv); + } +} + +static void Parse(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 5); + CHECK(args[0]->IsString()); // input + CHECK(args[2]->IsUndefined() || // base context + args[2]->IsNull() || + args[2]->IsObject()); + CHECK(args[3]->IsUndefined() || // context + args[3]->IsNull() || + args[3]->IsObject()); + CHECK(args[4]->IsFunction()); // complete callback + CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback + + Utf8Value input(env->isolate(), args[0]); + enum url_parse_state state_override = kUnknownState; + if (args[1]->IsNumber()) { + state_override = static_cast( + args[1]->Uint32Value(env->context()).FromJust()); + } - if (ret.IsEmpty()) { - ClearFatalExceptionHandlers(env); - FatalException(isolate, try_catch); + Parse(env, args.This(), + *input, input.length(), + state_override, + args[2], + args[3], + args[4].As(), + args[5]); +} + +static void EncodeAuthSet(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + std::string output; + const size_t len = value.length(); + output.reserve(len); + for (size_t n = 0; n < len; n++) { + const char ch = (*value)[n]; + AppendOrEscape(&output, ch, USERINFO_ENCODE_SET); + } + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + output.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); +} + +static void ToUSVString(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 2); + CHECK(args[0]->IsString()); + CHECK(args[1]->IsNumber()); + + TwoByteValue value(env->isolate(), args[0]); + const size_t n = value.length(); + + const int64_t start = args[1]->IntegerValue(env->context()).FromJust(); + CHECK_GE(start, 0); + + for (size_t i = start; i < n; i++) { + char16_t c = value[i]; + if (!IsUnicodeSurrogate(c)) { + continue; + } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) { + value[i] = kUnicodeReplacementCharacter; + } else { + char16_t d = value[i + 1]; + if (IsUnicodeTrail(d)) { + i++; + } else { + value[i] = kUnicodeReplacementCharacter; + } } - - return ret.ToLocalChecked(); } - static void SetURLConstructor(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_EQ(args.Length(), 1); - CHECK(args[0]->IsFunction()); - env->set_url_constructor_function(args[0].As()); + args.GetReturnValue().Set( + String::NewFromTwoByte(env->isolate(), + *value, + v8::NewStringType::kNormal, + n).ToLocalChecked()); +} + +static void DomainToASCII(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, *value, value.length()); + if (host.type == HOST_TYPE_FAILED) { + args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + return; + } + std::string out; + WriteHost(&host, &out); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + out.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); +} + +static void DomainToUnicode(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_GE(args.Length(), 1); + CHECK(args[0]->IsString()); + Utf8Value value(env->isolate(), args[0]); + + url_host host{{""}, HOST_TYPE_DOMAIN}; + ParseHost(&host, *value, value.length(), true); + if (host.type == HOST_TYPE_FAILED) { + args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); + return; + } + std::string out; + WriteHost(&host, &out); + args.GetReturnValue().Set( + String::NewFromUtf8(env->isolate(), + out.c_str(), + v8::NewStringType::kNormal).ToLocalChecked()); +} + +// This function works by calling out to a JS function that creates and +// returns the JS URL object. Be mindful of the JS<->Native boundary +// crossing that is required. +const Local URL::ToObject(Environment* env) const { + Isolate* isolate = env->isolate(); + Local context = env->context(); + HandleScope handle_scope(isolate); + Context::Scope context_scope(context); + + const Local undef = Undefined(isolate); + + if (context_.flags & URL_FLAGS_FAILED) + return Local(); + + Local argv[9] = { + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + undef, + }; + SetArgs(env, argv, &context_); + + TryCatch try_catch(isolate); + + // The SetURLConstructor method must have been called already to + // set the constructor function used below. SetURLConstructor is + // called automatically when the internal/url.js module is loaded + // during the internal/bootstrap_node.js processing. + MaybeLocal ret = + env->url_constructor_function() + ->Call(env->context(), undef, 9, argv); + + if (ret.IsEmpty()) { + ClearFatalExceptionHandlers(env); + FatalException(isolate, try_catch); } - static void Init(Local target, - Local unused, - Local context, - void* priv) { - Environment* env = Environment::GetCurrent(context); - env->SetMethod(target, "parse", Parse); - env->SetMethod(target, "encodeAuth", EncodeAuthSet); - env->SetMethod(target, "toUSVString", ToUSVString); - env->SetMethod(target, "domainToASCII", DomainToASCII); - env->SetMethod(target, "domainToUnicode", DomainToUnicode); - env->SetMethod(target, "setURLConstructor", SetURLConstructor); + return ret.ToLocalChecked(); +} + +static void SetURLConstructor(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_EQ(args.Length(), 1); + CHECK(args[0]->IsFunction()); + env->set_url_constructor_function(args[0].As()); +} + +static void Init(Local target, + Local unused, + Local context, + void* priv) { + Environment* env = Environment::GetCurrent(context); + env->SetMethod(target, "parse", Parse); + env->SetMethod(target, "encodeAuth", EncodeAuthSet); + env->SetMethod(target, "toUSVString", ToUSVString); + env->SetMethod(target, "domainToASCII", DomainToASCII); + env->SetMethod(target, "domainToUnicode", DomainToUnicode); + env->SetMethod(target, "setURLConstructor", SetURLConstructor); #define XX(name, _) NODE_DEFINE_CONSTANT(target, name); - FLAGS(XX) + FLAGS(XX) #undef XX #define XX(name) NODE_DEFINE_CONSTANT(target, name); - ARGS(XX) - PARSESTATES(XX) + PARSESTATES(XX) #undef XX - } +} } // namespace url } // namespace node diff --git a/src/node_url.h b/src/node_url.h index 5b5b65b7c27e87..49bfb264e8d987 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -16,411 +16,6 @@ using v8::Local; using v8::Value; -#define BIT_AT(a, i) \ - (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ - (1 << ((unsigned int) (i) & 7)))) -#define TAB_AND_NEWLINE(ch) \ - (ch == 0x09 || ch == 0x0a || ch == 0x0d) -#define ASCII_DIGIT(ch) \ - (ch >= 0x30 && ch <= 0x39) -#define ASCII_HEX_DIGIT(ch) \ - (ASCII_DIGIT(ch) || (ch >= 0x41 && ch <= 0x46) || (ch >= 0x61 && ch <= 0x66)) -#define ASCII_ALPHA(ch) \ - ((ch >= 0x41 && ch <= 0x5a) || (ch >= 0x61 && ch <= 0x7a)) -#define ASCII_ALPHANUMERIC(ch) \ - (ASCII_DIGIT(ch) || ASCII_ALPHA(ch)) -#define TO_LOWER(ch) \ - (ASCII_ALPHA(ch) ? (ch | 0x20) : ch) -#define SCHEME_CHAR(ch) \ - (ASCII_ALPHANUMERIC(ch) || ch == '+' || ch == '-' || ch == '.') -#define WINDOWS_DRIVE_LETTER(ch, next) \ - (ASCII_ALPHA(ch) && (next == ':' || next == '|')) -#define NORMALIZED_WINDOWS_DRIVE_LETTER(str) \ - (str.length() == 2 && \ - ASCII_ALPHA(str[0]) && \ - str[1] == ':') - -static const char* hex[256] = { - "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", - "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F", - "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17", - "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F", - "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27", - "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F", - "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37", - "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F", - "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47", - "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F", - "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", - "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F", - "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67", - "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F", - "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77", - "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F", - "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87", - "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F", - "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97", - "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F", - "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7", - "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF", - "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7", - "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF", - "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7", - "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF", - "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7", - "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF", - "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7", - "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF", - "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7", - "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF" -}; - -static const uint8_t SIMPLE_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -static const uint8_t DEFAULT_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -static const uint8_t USERINFO_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 40 41 42 43 44 45 46 47 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -static const uint8_t QUERY_ENCODE_SET[32] = { - // 00 01 02 03 04 05 06 07 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 08 09 0A 0B 0C 0D 0E 0F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 10 11 12 13 14 15 16 17 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 18 19 1A 1B 1C 1D 1E 1F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 20 21 22 23 24 25 26 27 - 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00, - // 28 29 2A 2B 2C 2D 2E 2F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 30 31 32 33 34 35 36 37 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 38 39 3A 3B 3C 3D 3E 3F - 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00, - // 40 41 42 43 44 45 46 47 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 48 49 4A 4B 4C 4D 4E 4F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 50 51 52 53 54 55 56 57 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 58 59 5A 5B 5C 5D 5E 5F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 60 61 62 63 64 65 66 67 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 68 69 6A 6B 6C 6D 6E 6F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 70 71 72 73 74 75 76 77 - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00, - // 78 79 7A 7B 7C 7D 7E 7F - 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80, - // 80 81 82 83 84 85 86 87 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 88 89 8A 8B 8C 8D 8E 8F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 90 91 92 93 94 95 96 97 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // 98 99 9A 9B 9C 9D 9E 9F - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A0 A1 A2 A3 A4 A5 A6 A7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // A8 A9 AA AB AC AD AE AF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B0 B1 B2 B3 B4 B5 B6 B7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // B8 B9 BA BB BC BD BE BF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C0 C1 C2 C3 C4 C5 C6 C7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // C8 C9 CA CB CC CD CE CF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D0 D1 D2 D3 D4 D5 D6 D7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // D8 D9 DA DB DC DD DE DF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E0 E1 E2 E3 E4 E5 E6 E7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // E8 E9 EA EB EC ED EE EF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F0 F1 F2 F3 F4 F5 F6 F7 - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80, - // F8 F9 FA FB FC FD FE FF - 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 -}; - -// Must return true if the character is to be percent-encoded -typedef bool (*must_escape_cb)(const unsigned char ch); - -// Appends ch to str. If test(ch) returns true, the ch will -// be percent-encoded then appended. -static inline void AppendOrEscape(std::string* str, - const unsigned char ch, - must_escape_cb test) { - if (test(ch)) - *str += hex[ch]; - else - *str += ch; -} - -static inline bool SimpleEncodeSet(const unsigned char ch) { - return BIT_AT(SIMPLE_ENCODE_SET, ch); -} - -static inline bool DefaultEncodeSet(const unsigned char ch) { - return BIT_AT(DEFAULT_ENCODE_SET, ch); -} - -static inline bool UserinfoEncodeSet(const unsigned char ch) { - return BIT_AT(USERINFO_ENCODE_SET, ch); -} - -static inline bool QueryEncodeSet(const unsigned char ch) { - return BIT_AT(QUERY_ENCODE_SET, ch); -} - -static inline unsigned hex2bin(const char ch) { - if (ch >= '0' && ch <= '9') - return ch - '0'; - if (ch >= 'A' && ch <= 'F') - return 10 + (ch - 'A'); - if (ch >= 'a' && ch <= 'f') - return 10 + (ch - 'a'); - return static_cast(-1); -} - -static inline void PercentDecode(const char* input, - size_t len, - std::string* dest) { - if (len == 0) - return; - dest->reserve(len); - const char* pointer = input; - const char* end = input + len; - size_t remaining = pointer - end - 1; - while (pointer < end) { - const char ch = pointer[0]; - remaining = (end - pointer) + 1; - if (ch != '%' || remaining < 2 || - (ch == '%' && - (!ASCII_HEX_DIGIT(pointer[1]) || - !ASCII_HEX_DIGIT(pointer[2])))) { - *dest += ch; - pointer++; - continue; - } else { - unsigned a = hex2bin(pointer[1]); - unsigned b = hex2bin(pointer[2]); - char c = static_cast(a * 16 + b); - *dest += c; - pointer += 3; - } - } -} - -#define SPECIALS(XX) \ - XX("ftp:", 21) \ - XX("file:", -1) \ - XX("gopher:", 70) \ - XX("http:", 80) \ - XX("https:", 443) \ - XX("ws:", 80) \ - XX("wss:", 443) - #define PARSESTATES(XX) \ XX(kSchemeStart) \ XX(kScheme) \ @@ -458,23 +53,6 @@ static inline void PercentDecode(const char* input, XX(URL_FLAGS_HAS_QUERY, 0x200) \ XX(URL_FLAGS_HAS_FRAGMENT, 0x400) -#define ARGS(XX) \ - XX(ARG_FLAGS) \ - XX(ARG_PROTOCOL) \ - XX(ARG_USERNAME) \ - XX(ARG_PASSWORD) \ - XX(ARG_HOST) \ - XX(ARG_PORT) \ - XX(ARG_PATH) \ - XX(ARG_QUERY) \ - XX(ARG_FRAGMENT) - -#define ERR_ARGS(XX) \ - XX(ERR_ARG_FLAGS) \ - XX(ERR_ARG_INPUT) \ - -static const char kEOL = -1; - enum url_parse_state { kUnknownState = -1, #define XX(name) name, @@ -488,32 +66,6 @@ enum url_flags { #undef XX }; -enum url_cb_args { -#define XX(name) name, - ARGS(XX) -#undef XX -}; - -enum url_error_cb_args { -#define XX(name) name, - ERR_ARGS(XX) -#undef XX -} url_error_cb_args; - -static inline bool IsSpecial(std::string scheme) { -#define XX(name, _) if (scheme == name) return true; - SPECIALS(XX); -#undef XX - return false; -} - -static inline int NormalizePort(std::string scheme, int p) { -#define XX(name, port) if (scheme == name && p == port) return -1; - SPECIALS(XX); -#undef XX - return p; -} - struct url_data { int32_t flags = URL_FLAGS_NONE; int port = -1; @@ -526,25 +78,6 @@ struct url_data { std::vector path; }; -union url_host_value { - std::string domain; - uint32_t ipv4; - uint16_t ipv6[8]; - ~url_host_value() {} -}; - -enum url_host_type { - HOST_TYPE_FAILED = -1, - HOST_TYPE_DOMAIN = 0, - HOST_TYPE_IPV4 = 1, - HOST_TYPE_IPV6 = 2 -}; - -struct url_host { - url_host_value value; - enum url_host_type type; -}; - class URL { public: static void Parse(const char* input,