From 8233c343149d50b07b20b7e210a240fd170856b4 Mon Sep 17 00:00:00 2001 From: Rajaram Gaunker Date: Sat, 13 May 2017 18:08:48 -0700 Subject: [PATCH 1/6] url: fast path ascii domains, do not run ToASCII To match browser behavior fast path ascii only domains and do not run ToASCII on them. Fixes: https://github.com/nodejs/node/issues/12965 Refs: https://github.com/nodejs/node/pull/12966 Refs: https://github.com/whatwg/url/pull/309 --- src/node_url.cc | 25 ++++++++++++++++++--- test/fixtures/url-domains-with-hyphens.js | 27 +++++++++++++++++++++++ test/parallel/test-whatwg-url-domainto.js | 8 +++++++ 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 test/fixtures/url-domains-with-hyphens.js diff --git a/src/node_url.cc b/src/node_url.cc index 2bdc080953f294..e784f10deccef1 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -829,6 +829,16 @@ static url_host_type ParseOpaqueHost(url_host* host, return type; } +static inline bool IsAllASCII(std::string* input) { + for (size_t n = 0; n < input->size(); n++) { + const char ch = (*input)[n]; + if (ch & 0x80) { + return false; + } + } + return true; +} + static url_host_type ParseHost(url_host* host, const char* input, size_t length, @@ -853,9 +863,18 @@ static url_host_type ParseHost(url_host* host, // First, we have to percent decode PercentDecode(input, length, &decoded); - // Then we have to punycode toASCII - if (!ToASCII(&decoded, &decoded)) - goto end; + // Match browser behavior for ASCII only domains + // and do not run them through ToASCII algorithm. + if (IsAllASCII(&decoded)) { + // Lowercase aschii domains + for (size_t n = 0; n < decoded.size(); n++) { + decoded[n] = std::tolower(decoded[n]); + } + } else { + // Then we have to punycode toASCII + if (!ToASCII(&decoded, &decoded)) + goto end; + } // If any of the following characters are still present, we have to fail for (size_t n = 0; n < decoded.size(); n++) { diff --git a/test/fixtures/url-domains-with-hyphens.js b/test/fixtures/url-domains-with-hyphens.js new file mode 100644 index 00000000000000..5bd1136d1e6809 --- /dev/null +++ b/test/fixtures/url-domains-with-hyphens.js @@ -0,0 +1,27 @@ +'use strict'; + +module.exports = { + valid: [ + // URLs with hyphen + { + ascii: 'r4---sn-a5mlrn7s.gevideo.com', + unicode: 'r4---sn-a5mlrn7s.gevideo.com' + }, + { + ascii: '-sn-a5mlrn7s.gevideo.com', + unicode: '-sn-a5mlrn7s.gevideo.com' + }, + { + ascii: 'sn-a5mlrn7s-.gevideo.com', + unicode: 'sn-a5mlrn7s-.gevideo.com' + }, + { + ascii: '-sn-a5mlrn7s-.gevideo.com', + unicode: '-sn-a5mlrn7s-.gevideo.com' + }, + { + ascii: '-sn--a5mlrn7s-.gevideo.com', + unicode: '-sn--a5mlrn7s-.gevideo.com' + } + ] +} diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js index 13ff9968705b98..4e1bee2ab55ed8 100644 --- a/test/parallel/test-whatwg-url-domainto.js +++ b/test/parallel/test-whatwg-url-domainto.js @@ -11,6 +11,7 @@ const { domainToASCII, domainToUnicode } = require('url'); // Tests below are not from WPT. const tests = require('../fixtures/url-idna.js'); +const testsHyphenDomains = require('../fixtures/url-domains-with-hyphens.js'); { const expectedError = common.expectsError( @@ -34,6 +35,13 @@ const tests = require('../fixtures/url-idna.js'); } } +{ + for (const [i, { ascii, unicode }] of testsHyphenDomains.valid.entries()) { + assert.strictEqual(ascii, domainToASCII(unicode), + `domainToASCII(${i + 1})`); + } +} + { const convertFunc = { ascii: domainToASCII, From 35e901d4bec294d1cf8de48a26c9491099cf3885 Mon Sep 17 00:00:00 2001 From: Rajaram Gaunker Date: Sat, 13 May 2017 18:08:48 -0700 Subject: [PATCH 2/6] url: fast path ascii domains, do not run ToASCII To match browser behavior fast path ascii only domains and do not run ToASCII on them. Fixes: https://github.com/nodejs/node/issues/12965 Refs: https://github.com/nodejs/node/pull/12966 Refs: https://github.com/whatwg/url/pull/309 --- src/node_url.cc | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/node_url.cc b/src/node_url.cc index e784f10deccef1..ed9b7dd89f26c4 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -130,6 +130,9 @@ enum url_error_cb_args { return str.length() >= 2 && name(str[0], str[1]); \ } +// https://infra.spec.whatwg.org/#ascii-code-point +CHAR_TEST(8, IsASCIICodePoint, (ch >= '\0' && ch <= '\x7f')) + // https://infra.spec.whatwg.org/#ascii-tab-or-newline CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) @@ -829,10 +832,10 @@ static url_host_type ParseOpaqueHost(url_host* host, return type; } -static inline bool IsAllASCII(std::string* input) { - for (size_t n = 0; n < input->size(); n++) { - const char ch = (*input)[n]; - if (ch & 0x80) { +static inline bool IsAllASCII(const std::string& input) { + for (size_t n = 0; n < input.size(); n++) { + const char ch = input[n]; + if (!IsASCIICodePoint(ch)) { return false; } } @@ -865,13 +868,13 @@ static url_host_type ParseHost(url_host* host, // Match browser behavior for ASCII only domains // and do not run them through ToASCII algorithm. - if (IsAllASCII(&decoded)) { - // Lowercase aschii domains + if (IsAllASCII(decoded)) { + // Lowercase ASCII domains for (size_t n = 0; n < decoded.size(); n++) { - decoded[n] = std::tolower(decoded[n]); + decoded[n] = ASCIILowercase(decoded[n]); } } else { - // Then we have to punycode toASCII + // Then we have to Unicode IDNA toASCII if (!ToASCII(&decoded, &decoded)) goto end; } From a13c377af82b6d1ba8daa3c6864bf9278dffe5c7 Mon Sep 17 00:00:00 2001 From: Rajaram Gaunker Date: Sat, 13 May 2017 18:08:48 -0700 Subject: [PATCH 3/6] url: fast path ascii domains, do not run ToASCII To match browser behavior fast path ascii only domains and do not run ToASCII on them. Fixes: https://github.com/nodejs/node/issues/12965 Refs: https://github.com/nodejs/node/pull/12966 Refs: https://github.com/whatwg/url/pull/309 --- test/fixtures/url-domains-with-hyphens.js | 27 ----------------------- test/parallel/test-whatwg-url-domainto.js | 15 ++++++++----- 2 files changed, 10 insertions(+), 32 deletions(-) delete mode 100644 test/fixtures/url-domains-with-hyphens.js diff --git a/test/fixtures/url-domains-with-hyphens.js b/test/fixtures/url-domains-with-hyphens.js deleted file mode 100644 index 5bd1136d1e6809..00000000000000 --- a/test/fixtures/url-domains-with-hyphens.js +++ /dev/null @@ -1,27 +0,0 @@ -'use strict'; - -module.exports = { - valid: [ - // URLs with hyphen - { - ascii: 'r4---sn-a5mlrn7s.gevideo.com', - unicode: 'r4---sn-a5mlrn7s.gevideo.com' - }, - { - ascii: '-sn-a5mlrn7s.gevideo.com', - unicode: '-sn-a5mlrn7s.gevideo.com' - }, - { - ascii: 'sn-a5mlrn7s-.gevideo.com', - unicode: 'sn-a5mlrn7s-.gevideo.com' - }, - { - ascii: '-sn-a5mlrn7s-.gevideo.com', - unicode: '-sn-a5mlrn7s-.gevideo.com' - }, - { - ascii: '-sn--a5mlrn7s-.gevideo.com', - unicode: '-sn--a5mlrn7s-.gevideo.com' - } - ] -} diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js index 4e1bee2ab55ed8..9811e3c9975f7a 100644 --- a/test/parallel/test-whatwg-url-domainto.js +++ b/test/parallel/test-whatwg-url-domainto.js @@ -11,7 +11,6 @@ const { domainToASCII, domainToUnicode } = require('url'); // Tests below are not from WPT. const tests = require('../fixtures/url-idna.js'); -const testsHyphenDomains = require('../fixtures/url-domains-with-hyphens.js'); { const expectedError = common.expectsError( @@ -36,10 +35,16 @@ const testsHyphenDomains = require('../fixtures/url-domains-with-hyphens.js'); } { - for (const [i, { ascii, unicode }] of testsHyphenDomains.valid.entries()) { - assert.strictEqual(ascii, domainToASCII(unicode), - `domainToASCII(${i + 1})`); - } + [ + 'r4---sn-a5mlrn7s.gevideo.com', + '-sn-a5mlrn7s.gevideo.com', + 'sn-a5mlrn7s-.gevideo.com', + '-sn-a5mlrn7s-.gevideo.com', + '-sn--a5mlrn7s-.gevideo.com' + ].forEach((domain) => { + assert.strictEqual(domain, domainToASCII(domain), + `domainToASCII(${domain})`); + }) } { From d816988c8ec5d0c3988c368af50a37006ed35150 Mon Sep 17 00:00:00 2001 From: Rajaram Gaunker Date: Sat, 13 May 2017 18:08:48 -0700 Subject: [PATCH 4/6] url: fast path ascii domains, do not run ToASCII To match browser behavior fast path ascii only domains and do not run ToASCII on them. Fixes: https://github.com/nodejs/node/issues/12965 Refs: https://github.com/nodejs/node/pull/12966 Refs: https://github.com/whatwg/url/pull/309 --- src/node_url.cc | 6 ++++- test/fixtures/url-domains-with-hyphens.js | 27 +++++++++++++++++++++++ test/parallel/test-whatwg-url-domainto.js | 15 +++++-------- 3 files changed, 37 insertions(+), 11 deletions(-) create mode 100644 test/fixtures/url-domains-with-hyphens.js diff --git a/src/node_url.cc b/src/node_url.cc index ed9b7dd89f26c4..3353ba7d899046 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -133,6 +133,8 @@ enum url_error_cb_args { // https://infra.spec.whatwg.org/#ascii-code-point CHAR_TEST(8, IsASCIICodePoint, (ch >= '\0' && ch <= '\x7f')) +CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z')) + // https://infra.spec.whatwg.org/#ascii-tab-or-newline CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) @@ -871,7 +873,9 @@ static url_host_type ParseHost(url_host* host, if (IsAllASCII(decoded)) { // Lowercase ASCII domains for (size_t n = 0; n < decoded.size(); n++) { - decoded[n] = ASCIILowercase(decoded[n]); + if (!IsLowerCaseASCII(decoded[n])) { + decoded[n] = ASCIILowercase(decoded[n]); + } } } else { // Then we have to Unicode IDNA toASCII diff --git a/test/fixtures/url-domains-with-hyphens.js b/test/fixtures/url-domains-with-hyphens.js new file mode 100644 index 00000000000000..5bd1136d1e6809 --- /dev/null +++ b/test/fixtures/url-domains-with-hyphens.js @@ -0,0 +1,27 @@ +'use strict'; + +module.exports = { + valid: [ + // URLs with hyphen + { + ascii: 'r4---sn-a5mlrn7s.gevideo.com', + unicode: 'r4---sn-a5mlrn7s.gevideo.com' + }, + { + ascii: '-sn-a5mlrn7s.gevideo.com', + unicode: '-sn-a5mlrn7s.gevideo.com' + }, + { + ascii: 'sn-a5mlrn7s-.gevideo.com', + unicode: 'sn-a5mlrn7s-.gevideo.com' + }, + { + ascii: '-sn-a5mlrn7s-.gevideo.com', + unicode: '-sn-a5mlrn7s-.gevideo.com' + }, + { + ascii: '-sn--a5mlrn7s-.gevideo.com', + unicode: '-sn--a5mlrn7s-.gevideo.com' + } + ] +} diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js index 9811e3c9975f7a..4e1bee2ab55ed8 100644 --- a/test/parallel/test-whatwg-url-domainto.js +++ b/test/parallel/test-whatwg-url-domainto.js @@ -11,6 +11,7 @@ const { domainToASCII, domainToUnicode } = require('url'); // Tests below are not from WPT. const tests = require('../fixtures/url-idna.js'); +const testsHyphenDomains = require('../fixtures/url-domains-with-hyphens.js'); { const expectedError = common.expectsError( @@ -35,16 +36,10 @@ const tests = require('../fixtures/url-idna.js'); } { - [ - 'r4---sn-a5mlrn7s.gevideo.com', - '-sn-a5mlrn7s.gevideo.com', - 'sn-a5mlrn7s-.gevideo.com', - '-sn-a5mlrn7s-.gevideo.com', - '-sn--a5mlrn7s-.gevideo.com' - ].forEach((domain) => { - assert.strictEqual(domain, domainToASCII(domain), - `domainToASCII(${domain})`); - }) + for (const [i, { ascii, unicode }] of testsHyphenDomains.valid.entries()) { + assert.strictEqual(ascii, domainToASCII(unicode), + `domainToASCII(${i + 1})`); + } } { From e86297c7804388320833d098427f0a12bbbb586f Mon Sep 17 00:00:00 2001 From: Rajaram Gaunker Date: Sat, 13 May 2017 18:08:48 -0700 Subject: [PATCH 5/6] url: fast path ascii domains, do not run ToASCII To match browser behavior fast path ascii only domains and do not run ToASCII on them. Fixes: https://github.com/nodejs/node/issues/12965 Refs: https://github.com/nodejs/node/pull/12966 Refs: https://github.com/whatwg/url/pull/309 --- node.gyp | 4 ++++ src/node_url.cc | 18 ++++---------- src/string_bytes.cc | 57 ++------------------------------------------- src/string_utils.cc | 55 +++++++++++++++++++++++++++++++++++++++++++ src/string_utils.h | 15 ++++++++++++ 5 files changed, 81 insertions(+), 68 deletions(-) create mode 100644 src/string_utils.cc create mode 100644 src/string_utils.h diff --git a/node.gyp b/node.gyp index f50aea3475598d..3fa29f80a8189f 100644 --- a/node.gyp +++ b/node.gyp @@ -192,6 +192,7 @@ 'src/spawn_sync.cc', 'src/string_bytes.cc', 'src/string_search.cc', + 'src/string_utils.cc', 'src/stream_base.cc', 'src/stream_wrap.cc', 'src/tcp_wrap.cc', @@ -235,6 +236,8 @@ 'src/req-wrap.h', 'src/req-wrap-inl.h', 'src/string_bytes.h', + 'src/string_search.h', + 'src/string_utils.h', 'src/stream_base.h', 'src/stream_base-inl.h', 'src/stream_wrap.h', @@ -626,6 +629,7 @@ '<(OBJ_PATH)<(OBJ_SEPARATOR)util.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)string_bytes.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)string_search.<(OBJ_SUFFIX)', + '<(OBJ_PATH)<(OBJ_SEPARATOR)string_utils.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)stream_base.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)node_constants.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)node_revert.<(OBJ_SUFFIX)', diff --git a/src/node_url.cc b/src/node_url.cc index 3353ba7d899046..4e135ce3d81c70 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -9,6 +9,7 @@ #include "base-object.h" #include "base-object-inl.h" #include "node_i18n.h" +#include "string_utils.h" #include #include @@ -130,8 +131,7 @@ enum url_error_cb_args { return str.length() >= 2 && name(str[0], str[1]); \ } -// https://infra.spec.whatwg.org/#ascii-code-point -CHAR_TEST(8, IsASCIICodePoint, (ch >= '\0' && ch <= '\x7f')) +CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z')) CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z')) @@ -834,16 +834,6 @@ static url_host_type ParseOpaqueHost(url_host* host, return type; } -static inline bool IsAllASCII(const std::string& input) { - for (size_t n = 0; n < input.size(); n++) { - const char ch = input[n]; - if (!IsASCIICodePoint(ch)) { - return false; - } - } - return true; -} - static url_host_type ParseHost(url_host* host, const char* input, size_t length, @@ -852,6 +842,7 @@ static url_host_type ParseHost(url_host* host, url_host_type type = HOST_TYPE_FAILED; const char* pointer = input; std::string decoded; + const char *buf = NULL; if (length == 0) goto end; @@ -870,7 +861,8 @@ static url_host_type ParseHost(url_host* host, // Match browser behavior for ASCII only domains // and do not run them through ToASCII algorithm. - if (IsAllASCII(decoded)) { + buf = decoded.c_str(); + if (!stringutils::ContainsNonAscii(buf, strlen(buf))) { // Lowercase ASCII domains for (size_t n = 0; n < decoded.size(); n++) { if (!IsLowerCaseASCII(decoded[n])) { diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 636d7da25d3291..2d33b1bbdf2f00 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -25,6 +25,7 @@ #include "node.h" #include "node_buffer.h" #include "v8.h" +#include "string_utils.h" #include #include // memcpy @@ -550,60 +551,6 @@ size_t StringBytes::Size(Isolate* isolate, return data_size; } - - - -static bool contains_non_ascii_slow(const char* buf, size_t len) { - for (size_t i = 0; i < len; ++i) { - if (buf[i] & 0x80) - return true; - } - return false; -} - - -static bool contains_non_ascii(const char* src, size_t len) { - if (len < 16) { - return contains_non_ascii_slow(src, len); - } - - const unsigned bytes_per_word = sizeof(uintptr_t); - const unsigned align_mask = bytes_per_word - 1; - const unsigned unaligned = reinterpret_cast(src) & align_mask; - - if (unaligned > 0) { - const unsigned n = bytes_per_word - unaligned; - if (contains_non_ascii_slow(src, n)) - return true; - src += n; - len -= n; - } - - -#if defined(_WIN64) || defined(_LP64) - const uintptr_t mask = 0x8080808080808080ll; -#else - const uintptr_t mask = 0x80808080l; -#endif - - const uintptr_t* srcw = reinterpret_cast(src); - - for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) { - if (srcw[i] & mask) - return true; - } - - const unsigned remainder = len & align_mask; - if (remainder > 0) { - const size_t offset = len - remainder; - if (contains_non_ascii_slow(src + offset, remainder)) - return true; - } - - return false; -} - - static void force_ascii_slow(const char* src, char* dst, size_t len) { for (size_t i = 0; i < len; ++i) { dst[i] = src[i] & 0x7f; @@ -709,7 +656,7 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, } case ASCII: - if (contains_non_ascii(buf, buflen)) { + if (stringutils::ContainsNonAscii(buf, buflen)) { char* out = node::UncheckedMalloc(buflen); if (out == nullptr) { *error = SB_MALLOC_FAILED_ERROR; diff --git a/src/string_utils.cc b/src/string_utils.cc new file mode 100644 index 00000000000000..27cf2f3e9ce40a --- /dev/null +++ b/src/string_utils.cc @@ -0,0 +1,55 @@ +#include "string_utils.h" + +namespace node { +namespace stringutils { + + static bool contains_non_ascii_slow(const char* buf, size_t len) { + for (size_t i = 0; i < len; ++i) { + if (buf[i] & 0x80) + return true; + } + return false; + } + + bool ContainsNonAscii(const char* src, size_t len) { + if (len < 16) { + return contains_non_ascii_slow(src, len); + } + + const unsigned bytes_per_word = sizeof(uintptr_t); + const unsigned align_mask = bytes_per_word - 1; + const unsigned unaligned = reinterpret_cast(src) & align_mask; + + if (unaligned > 0) { + const unsigned n = bytes_per_word - unaligned; + if (contains_non_ascii_slow(src, n)) + return true; + src += n; + len -= n; + } + + + #if defined(_WIN64) || defined(_LP64) + const uintptr_t mask = 0x8080808080808080ll; + #else + const uintptr_t mask = 0x80808080l; + #endif + + const uintptr_t* srcw = reinterpret_cast(src); + + for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) { + if (srcw[i] & mask) + return true; + } + + const unsigned remainder = len & align_mask; + if (remainder > 0) { + const size_t offset = len - remainder; + if (contains_non_ascii_slow(src + offset, remainder)) + return true; + } + + return false; + } +} // namespace stringutils +} // namespace node diff --git a/src/string_utils.h b/src/string_utils.h new file mode 100644 index 00000000000000..c086becdb77fb7 --- /dev/null +++ b/src/string_utils.h @@ -0,0 +1,15 @@ + +#ifndef SRC_STRING_UTILS_H_ +#define SRC_STRING_UTILS_H_ + +#include "env.h" +#include "env-inl.h" +#include "util.h" + +namespace node { +namespace stringutils { + bool ContainsNonAscii(const char* src, size_t len); +} // namespace stringutils +} // namespace node + +#endif // SRC_STRING_UTILS_H_ From 861604f2b1353481e4645f248a7f7e7866055772 Mon Sep 17 00:00:00 2001 From: Rajaram Gaunker Date: Sat, 13 May 2017 18:08:48 -0700 Subject: [PATCH 6/6] url: fast path ascii domains, do not run ToASCII To match browser behavior fast path ascii only domains and do not run ToASCII on them. Fixes: https://github.com/nodejs/node/issues/12965 Refs: https://github.com/nodejs/node/pull/12966 Refs: https://github.com/whatwg/url/pull/309 --- node.gyp | 2 -- src/node_url.cc | 8 +------ src/string_utils.cc | 55 --------------------------------------------- src/string_utils.h | 54 ++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 51 insertions(+), 68 deletions(-) delete mode 100644 src/string_utils.cc diff --git a/node.gyp b/node.gyp index 3fa29f80a8189f..05063223baf0fa 100644 --- a/node.gyp +++ b/node.gyp @@ -192,7 +192,6 @@ 'src/spawn_sync.cc', 'src/string_bytes.cc', 'src/string_search.cc', - 'src/string_utils.cc', 'src/stream_base.cc', 'src/stream_wrap.cc', 'src/tcp_wrap.cc', @@ -629,7 +628,6 @@ '<(OBJ_PATH)<(OBJ_SEPARATOR)util.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)string_bytes.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)string_search.<(OBJ_SUFFIX)', - '<(OBJ_PATH)<(OBJ_SEPARATOR)string_utils.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)stream_base.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)node_constants.<(OBJ_SUFFIX)', '<(OBJ_PATH)<(OBJ_SEPARATOR)node_revert.<(OBJ_SUFFIX)', diff --git a/src/node_url.cc b/src/node_url.cc index 4e135ce3d81c70..95f7a4ff1e2bfa 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -131,10 +131,6 @@ enum url_error_cb_args { return str.length() >= 2 && name(str[0], str[1]); \ } -CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z')) - -CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z')) - // https://infra.spec.whatwg.org/#ascii-tab-or-newline CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) @@ -865,9 +861,7 @@ static url_host_type ParseHost(url_host* host, if (!stringutils::ContainsNonAscii(buf, strlen(buf))) { // Lowercase ASCII domains for (size_t n = 0; n < decoded.size(); n++) { - if (!IsLowerCaseASCII(decoded[n])) { - decoded[n] = ASCIILowercase(decoded[n]); - } + decoded[n] = ASCIILowercase(decoded[n]); } } else { // Then we have to Unicode IDNA toASCII diff --git a/src/string_utils.cc b/src/string_utils.cc deleted file mode 100644 index 27cf2f3e9ce40a..00000000000000 --- a/src/string_utils.cc +++ /dev/null @@ -1,55 +0,0 @@ -#include "string_utils.h" - -namespace node { -namespace stringutils { - - static bool contains_non_ascii_slow(const char* buf, size_t len) { - for (size_t i = 0; i < len; ++i) { - if (buf[i] & 0x80) - return true; - } - return false; - } - - bool ContainsNonAscii(const char* src, size_t len) { - if (len < 16) { - return contains_non_ascii_slow(src, len); - } - - const unsigned bytes_per_word = sizeof(uintptr_t); - const unsigned align_mask = bytes_per_word - 1; - const unsigned unaligned = reinterpret_cast(src) & align_mask; - - if (unaligned > 0) { - const unsigned n = bytes_per_word - unaligned; - if (contains_non_ascii_slow(src, n)) - return true; - src += n; - len -= n; - } - - - #if defined(_WIN64) || defined(_LP64) - const uintptr_t mask = 0x8080808080808080ll; - #else - const uintptr_t mask = 0x80808080l; - #endif - - const uintptr_t* srcw = reinterpret_cast(src); - - for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) { - if (srcw[i] & mask) - return true; - } - - const unsigned remainder = len & align_mask; - if (remainder > 0) { - const size_t offset = len - remainder; - if (contains_non_ascii_slow(src + offset, remainder)) - return true; - } - - return false; - } -} // namespace stringutils -} // namespace node diff --git a/src/string_utils.h b/src/string_utils.h index c086becdb77fb7..a6f9978bf4621a 100644 --- a/src/string_utils.h +++ b/src/string_utils.h @@ -2,13 +2,59 @@ #ifndef SRC_STRING_UTILS_H_ #define SRC_STRING_UTILS_H_ -#include "env.h" -#include "env-inl.h" -#include "util.h" +#include +#include namespace node { namespace stringutils { - bool ContainsNonAscii(const char* src, size_t len); + inline static bool contains_non_ascii_slow(const char* buf, size_t len) { + for (size_t i = 0; i < len; ++i) { + if (buf[i] & 0x80) + return true; + } + return false; + } + + inline bool ContainsNonAscii(const char* src, size_t len) { + if (len < 16) { + return contains_non_ascii_slow(src, len); + } + + const unsigned bytes_per_word = sizeof(uintptr_t); + const unsigned align_mask = bytes_per_word - 1; + const unsigned unaligned = reinterpret_cast(src) & align_mask; + + if (unaligned > 0) { + const unsigned n = bytes_per_word - unaligned; + if (contains_non_ascii_slow(src, n)) + return true; + src += n; + len -= n; + } + + + #if defined(_WIN64) || defined(_LP64) + const uintptr_t mask = 0x8080808080808080ll; + #else + const uintptr_t mask = 0x80808080l; + #endif + + const uintptr_t* srcw = reinterpret_cast(src); + + for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) { + if (srcw[i] & mask) + return true; + } + + const unsigned remainder = len & align_mask; + if (remainder > 0) { + const size_t offset = len - remainder; + if (contains_non_ascii_slow(src + offset, remainder)) + return true; + } + + return false; + } } // namespace stringutils } // namespace node