From ea1c4e12126a962c1393aa0512bd046b2c707307 Mon Sep 17 00:00:00 2001 From: joyeecheung Date: Thu, 1 Dec 2016 19:11:43 -0600 Subject: [PATCH] test,url: improve escaping in url.parse - rename variables in autoEscapeStr so they are easier to understand - comment the escaping algorithm - increase coverage for autoEscapeStr PR-URL: https://github.com/nodejs/node/pull/10083 Reviewed-By: Anna Henningsen --- lib/url.js | 136 ++++++++++++++++++++------------------ test/parallel/test-url.js | 14 ++++ 2 files changed, 85 insertions(+), 65 deletions(-) diff --git a/lib/url.js b/lib/url.js index 5f87c0c36c43e5..03a9fb03dfcca7 100644 --- a/lib/url.js +++ b/lib/url.js @@ -437,105 +437,111 @@ function validateHostname(self, rest, hostname) { } } +// Automatically escape all delimiters and unwise characters from RFC 2396. +// Also escape single quotes in case of an XSS attack. +// Return undefined if the string doesn't need escaping, +// otherwise return the escaped string. function autoEscapeStr(rest) { - var newRest = ''; - var lastPos = 0; + var escaped = ''; + var lastEscapedPos = 0; for (var i = 0; i < rest.length; ++i) { - // Automatically escape all delimiters and unwise characters from RFC 2396 - // Also escape single quotes in case of an XSS attack + // Manual switching is faster than using a Map/Object. + // `escaped` contains substring up to the last escaped cahracter. switch (rest.charCodeAt(i)) { case 9: // '\t' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%09'; - lastPos = i + 1; + // Concat if there are ordinary characters in the middle. + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%09'; + lastEscapedPos = i + 1; break; case 10: // '\n' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%0A'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%0A'; + lastEscapedPos = i + 1; break; case 13: // '\r' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%0D'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%0D'; + lastEscapedPos = i + 1; break; case 32: // ' ' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%20'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%20'; + lastEscapedPos = i + 1; break; case 34: // '"' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%22'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%22'; + lastEscapedPos = i + 1; break; case 39: // '\'' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%27'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%27'; + lastEscapedPos = i + 1; break; case 60: // '<' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%3C'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%3C'; + lastEscapedPos = i + 1; break; case 62: // '>' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%3E'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%3E'; + lastEscapedPos = i + 1; break; case 92: // '\\' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%5C'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%5C'; + lastEscapedPos = i + 1; break; case 94: // '^' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%5E'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%5E'; + lastEscapedPos = i + 1; break; case 96: // '`' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%60'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%60'; + lastEscapedPos = i + 1; break; case 123: // '{' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%7B'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%7B'; + lastEscapedPos = i + 1; break; case 124: // '|' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%7C'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%7C'; + lastEscapedPos = i + 1; break; case 125: // '}' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%7D'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%7D'; + lastEscapedPos = i + 1; break; } } - if (lastPos === 0) + if (lastEscapedPos === 0) // Nothing has been escaped. return; - if (lastPos < rest.length) - return newRest + rest.slice(lastPos); - else - return newRest; + // There are ordinary characters at the end. + if (lastEscapedPos < rest.length) + return escaped + rest.slice(lastEscapedPos); + else // The last character is escaped. + return escaped; } // format a parsed object into a url string diff --git a/test/parallel/test-url.js b/test/parallel/test-url.js index 8bbc7194732368..c97caa36429a9c 100644 --- a/test/parallel/test-url.js +++ b/test/parallel/test-url.js @@ -834,6 +834,20 @@ var parseTests = { query: '@c' }, + 'http://a.b/\tbc\ndr\ref g"hq\'j?mn\\op^q=r`99{st|uv}wz': { + protocol: 'http:', + slashes: true, + host: 'a.b', + port: null, + hostname: 'a.b', + hash: null, + pathname: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E', + path: '/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', + search: '?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', + query: 'mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz', + href: 'http://a.b/%09bc%0Adr%0Def%20g%22hq%27j%3Ckl%3E?mn%5Cop%5Eq=r%6099%7Bst%7Cuv%7Dwz' + }, + 'http://a\r" \t\n<\'b:b@c\r\nd/e?f': { protocol: 'http:', slashes: true,