From e1eb30a4ec99d1178533ba02be01a6c874845558 Mon Sep 17 00:00:00 2001 From: Weijia Wang <381152119@qq.com> Date: Wed, 7 Feb 2018 11:22:51 +0800 Subject: [PATCH 1/5] url: reduce deplicated codes in `autoEscapeStr` --- lib/url.js | 99 +++++++----------------------------------------------- 1 file changed, 13 insertions(+), 86 deletions(-) diff --git a/lib/url.js b/lib/url.js index cb524fd9a87347..3c9f89322c9609 100644 --- a/lib/url.js +++ b/lib/url.js @@ -442,98 +442,25 @@ function validateHostname(self, rest, hostname) { // Automatically escape all delimiters and unwise characters from RFC 2396. // Also escape single quotes in case of an XSS attack. // Return the escaped string. +const escapedCharacters = { + '\t': '%09', '\n': '%0A', '\r': '%0D', ' ': '%20', + '"': '%22', '\'': '%27', '<': '%3C', '>': '%3E', + '\\': '%5C', '^': '%5E', '`': '%60', '{': '%7B', + '|': '%7C', '}': '%7D' +}; function autoEscapeStr(rest) { var escaped = ''; var lastEscapedPos = 0; for (var i = 0; i < rest.length; ++i) { // Manual switching is faster than using a Map/Object. // `escaped` contains substring up to the last escaped character. - switch (rest.charCodeAt(i)) { - case 9: // '\t' - // Concat if there are ordinary characters in the middle. - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%09'; - lastEscapedPos = i + 1; - break; - case 10: // '\n' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%0A'; - lastEscapedPos = i + 1; - break; - case 13: // '\r' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%0D'; - lastEscapedPos = i + 1; - break; - case 32: // ' ' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%20'; - lastEscapedPos = i + 1; - break; - case 34: // '"' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%22'; - lastEscapedPos = i + 1; - break; - case 39: // '\'' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%27'; - lastEscapedPos = i + 1; - break; - case 60: // '<' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%3C'; - lastEscapedPos = i + 1; - break; - case 62: // '>' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%3E'; - lastEscapedPos = i + 1; - break; - case 92: // '\\' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%5C'; - lastEscapedPos = i + 1; - break; - case 94: // '^' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%5E'; - lastEscapedPos = i + 1; - break; - case 96: // '`' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%60'; - lastEscapedPos = i + 1; - break; - case 123: // '{' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%7B'; - lastEscapedPos = i + 1; - break; - case 124: // '|' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%7C'; - lastEscapedPos = i + 1; - break; - case 125: // '}' - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += '%7D'; - lastEscapedPos = i + 1; - break; + var escapedChar = escapedCharacters[rest[i]]; + if (escapedChar) { + // Concat if there are ordinary characters in the middle. + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += escapedChar; + lastEscapedPos = i + 1; } } if (lastEscapedPos === 0) // Nothing has been escaped. From ba9427c9555be90122e2ea054218b0562458ce09 Mon Sep 17 00:00:00 2001 From: Weijia Wang <381152119@qq.com> Date: Wed, 7 Feb 2018 14:04:13 +0800 Subject: [PATCH 2/5] [squash]fix performance regression --- lib/url.js | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/lib/url.js b/lib/url.js index 3c9f89322c9609..b13d1c332d6a20 100644 --- a/lib/url.js +++ b/lib/url.js @@ -442,11 +442,11 @@ function validateHostname(self, rest, hostname) { // Automatically escape all delimiters and unwise characters from RFC 2396. // Also escape single quotes in case of an XSS attack. // Return the escaped string. -const escapedCharacters = { - '\t': '%09', '\n': '%0A', '\r': '%0D', ' ': '%20', - '"': '%22', '\'': '%27', '<': '%3C', '>': '%3E', - '\\': '%5C', '^': '%5E', '`': '%60', '{': '%7B', - '|': '%7C', '}': '%7D' +const escapedCharacterCodes = { + 9: '%09', 10: '%0A', 13: '%0D', 32: '%20', + 34: '%22', 39: '%27', 60: '%3C', 62: '%3E', + 92: '%5C', 94: '%5E', 96: '%60', 123: '%7B', + 124: '%7C', 125: '%7D' }; function autoEscapeStr(rest) { var escaped = ''; @@ -454,13 +454,27 @@ function autoEscapeStr(rest) { for (var i = 0; i < rest.length; ++i) { // Manual switching is faster than using a Map/Object. // `escaped` contains substring up to the last escaped character. - var escapedChar = escapedCharacters[rest[i]]; - if (escapedChar) { - // Concat if there are ordinary characters in the middle. - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += escapedChar; - lastEscapedPos = i + 1; + var code = rest.charCodeAt(i); + switch (code) { + case 9: // '\t' + case 10: // '\n' + case 13: // '\r' + case 32: // ' ' + case 34: // '"' + case 39: // '\'' + case 60: // '<' + case 62: // '>' + case 92: // '\\' + case 94: // '^' + case 96: // '`' + case 123: // '{' + case 124: // '|' + case 125: // '}' + // Concat if there are ordinary characters in the middle. + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += escapedCharacterCodes[code]; + lastEscapedPos = i + 1; } } if (lastEscapedPos === 0) // Nothing has been escaped. From 943d3a91906fbcff1a5505d113f05206e5332482 Mon Sep 17 00:00:00 2001 From: Weijia Wang <381152119@qq.com> Date: Wed, 7 Feb 2018 14:04:34 +0800 Subject: [PATCH 3/5] add benchmark --- benchmark/url/url-parse.js | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 benchmark/url/url-parse.js diff --git a/benchmark/url/url-parse.js b/benchmark/url/url-parse.js new file mode 100644 index 00000000000000..83f626ccdadfe3 --- /dev/null +++ b/benchmark/url/url-parse.js @@ -0,0 +1,22 @@ +'use strict'; +const common = require('../common.js'); +const url = require('url'); + +const inputs = { + normal: 'http://foo.com/bar', + escaped: 'https://foo.bar/{}^`/abcd' +}; + +const bench = common.createBenchmark(main, { + type: Object.keys(inputs), + n: [1e7] +}); + +function main({ type, n }) { + const input = inputs[type] || ''; + + bench.start(); + for (var i = 0; i < n; i += 1) + url.parse(input); + bench.end(n); +} From 9db55c5018eac2ac481e127875163e273996afd3 Mon Sep 17 00:00:00 2001 From: Weijia Wang <381152119@qq.com> Date: Wed, 7 Feb 2018 21:25:30 +0800 Subject: [PATCH 4/5] use array --- lib/url.js | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/lib/url.js b/lib/url.js index b13d1c332d6a20..94e29b79eefe88 100644 --- a/lib/url.js +++ b/lib/url.js @@ -442,39 +442,29 @@ function validateHostname(self, rest, hostname) { // Automatically escape all delimiters and unwise characters from RFC 2396. // Also escape single quotes in case of an XSS attack. // Return the escaped string. -const escapedCharacterCodes = { +const escapedCodes = { 9: '%09', 10: '%0A', 13: '%0D', 32: '%20', 34: '%22', 39: '%27', 60: '%3C', 62: '%3E', 92: '%5C', 94: '%5E', 96: '%60', 123: '%7B', 124: '%7C', 125: '%7D' }; +// Using Array is faster than Object/Map +const escapedCodesArr = new Array(); +for (var key in escapedCodes) { + escapedCodesArr[key] = escapedCodes[key]; +} function autoEscapeStr(rest) { var escaped = ''; var lastEscapedPos = 0; for (var i = 0; i < rest.length; ++i) { - // Manual switching is faster than using a Map/Object. // `escaped` contains substring up to the last escaped character. - var code = rest.charCodeAt(i); - switch (code) { - case 9: // '\t' - case 10: // '\n' - case 13: // '\r' - case 32: // ' ' - case 34: // '"' - case 39: // '\'' - case 60: // '<' - case 62: // '>' - case 92: // '\\' - case 94: // '^' - case 96: // '`' - case 123: // '{' - case 124: // '|' - case 125: // '}' - // Concat if there are ordinary characters in the middle. - if (i > lastEscapedPos) - escaped += rest.slice(lastEscapedPos, i); - escaped += escapedCharacterCodes[code]; - lastEscapedPos = i + 1; + var escapedChar = escapedCodesArr[rest.charCodeAt(i)]; + if (escapedChar) { + // Concat if there are ordinary characters in the middle. + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += escapedChar; + lastEscapedPos = i + 1; } } if (lastEscapedPos === 0) // Nothing has been escaped. From 2b82afc28c66fc149142604d3f1913dca6393d7e Mon Sep 17 00:00:00 2001 From: Weijia Wang <381152119@qq.com> Date: Thu, 22 Feb 2018 20:26:25 +0800 Subject: [PATCH 5/5] use dense array --- lib/url.js | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/url.js b/lib/url.js index 94e29b79eefe88..ab4b2b4647edd2 100644 --- a/lib/url.js +++ b/lib/url.js @@ -439,26 +439,33 @@ function validateHostname(self, rest, hostname) { } } +// Escaped characters. Use empty strings to fill up unused entries. +// Using Array is faster than Object/Map +const escapedCodes = [ + /*0 - 9*/ '', '', '', '', '', '', '', '', '', '%09', + /*10 - 19*/ '%0A', '', '', '%0D', '', '', '', '', '', '', + /*20 - 29*/ '', '', '', '', '', '', '', '', '', '', + /*30 - 39*/ '', '', '%20', '', '%22', '', '', '', '', '%27', + /*40 - 49*/ '', '', '', '', '', '', '', '', '', '', + /*50 - 59*/ '', '', '', '', '', '', '', '', '', '', + /*60 - 69*/ '%3C', '', '%3E', '', '', '', '', '', '', '', + /*70 - 79*/ '', '', '', '', '', '', '', '', '', '', + /*80 - 89*/ '', '', '', '', '', '', '', '', '', '', + /*90 - 99*/ '', '', '%5C', '', '%5E', '', '%60', '', '', '', + /*100 - 109*/ '', '', '', '', '', '', '', '', '', '', + /*110 - 119*/ '', '', '', '', '', '', '', '', '', '', + /*120 - 125*/ '', '', '', '%7B', '%7C', '%7D' +]; + // Automatically escape all delimiters and unwise characters from RFC 2396. // Also escape single quotes in case of an XSS attack. // Return the escaped string. -const escapedCodes = { - 9: '%09', 10: '%0A', 13: '%0D', 32: '%20', - 34: '%22', 39: '%27', 60: '%3C', 62: '%3E', - 92: '%5C', 94: '%5E', 96: '%60', 123: '%7B', - 124: '%7C', 125: '%7D' -}; -// Using Array is faster than Object/Map -const escapedCodesArr = new Array(); -for (var key in escapedCodes) { - escapedCodesArr[key] = escapedCodes[key]; -} function autoEscapeStr(rest) { var escaped = ''; var lastEscapedPos = 0; for (var i = 0; i < rest.length; ++i) { // `escaped` contains substring up to the last escaped character. - var escapedChar = escapedCodesArr[rest.charCodeAt(i)]; + var escapedChar = escapedCodes[rest.charCodeAt(i)]; if (escapedChar) { // Concat if there are ordinary characters in the middle. if (i > lastEscapedPos)