Skip to content

Commit

Permalink
url: fast path ascii domains, do not run ToASCII
Browse files Browse the repository at this point in the history
To match browser behavior fast path ascii only domains and
do not run ToASCII on them.

Fixes: nodejs#12965
Refs: nodejs#12966
Refs: whatwg/url#309
  • Loading branch information
zimbabao committed May 16, 2017
1 parent e86297c commit 861604f
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 68 deletions.
2 changes: 0 additions & 2 deletions node.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@
'src/spawn_sync.cc',
'src/string_bytes.cc',
'src/string_search.cc',
'src/string_utils.cc',
'src/stream_base.cc',
'src/stream_wrap.cc',
'src/tcp_wrap.cc',
Expand Down Expand Up @@ -629,7 +628,6 @@
'<(OBJ_PATH)<(OBJ_SEPARATOR)util.<(OBJ_SUFFIX)',
'<(OBJ_PATH)<(OBJ_SEPARATOR)string_bytes.<(OBJ_SUFFIX)',
'<(OBJ_PATH)<(OBJ_SEPARATOR)string_search.<(OBJ_SUFFIX)',
'<(OBJ_PATH)<(OBJ_SEPARATOR)string_utils.<(OBJ_SUFFIX)',
'<(OBJ_PATH)<(OBJ_SEPARATOR)stream_base.<(OBJ_SUFFIX)',
'<(OBJ_PATH)<(OBJ_SEPARATOR)node_constants.<(OBJ_SUFFIX)',
'<(OBJ_PATH)<(OBJ_SEPARATOR)node_revert.<(OBJ_SUFFIX)',
Expand Down
8 changes: 1 addition & 7 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,6 @@ enum url_error_cb_args {
return str.length() >= 2 && name(str[0], str[1]); \
}

CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z'))

CHAR_TEST(8, IsLowerCaseASCII, (ch >='a' && ch <= 'z'))

// https://infra.spec.whatwg.org/#ascii-tab-or-newline
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))

Expand Down Expand Up @@ -865,9 +861,7 @@ static url_host_type ParseHost(url_host* host,
if (!stringutils::ContainsNonAscii(buf, strlen(buf))) {
// Lowercase ASCII domains
for (size_t n = 0; n < decoded.size(); n++) {
if (!IsLowerCaseASCII(decoded[n])) {
decoded[n] = ASCIILowercase(decoded[n]);
}
decoded[n] = ASCIILowercase(decoded[n]);
}
} else {
// Then we have to Unicode IDNA toASCII
Expand Down
55 changes: 0 additions & 55 deletions src/string_utils.cc

This file was deleted.

54 changes: 50 additions & 4 deletions src/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,59 @@
#ifndef SRC_STRING_UTILS_H_
#define SRC_STRING_UTILS_H_

#include "env.h"
#include "env-inl.h"
#include "util.h"
#include <cstddef>
#include <cstdint>

namespace node {
namespace stringutils {
bool ContainsNonAscii(const char* src, size_t len);
inline static bool contains_non_ascii_slow(const char* buf, size_t len) {
for (size_t i = 0; i < len; ++i) {
if (buf[i] & 0x80)
return true;
}
return false;
}

inline bool ContainsNonAscii(const char* src, size_t len) {
if (len < 16) {
return contains_non_ascii_slow(src, len);
}

const unsigned bytes_per_word = sizeof(uintptr_t);
const unsigned align_mask = bytes_per_word - 1;
const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;

if (unaligned > 0) {
const unsigned n = bytes_per_word - unaligned;
if (contains_non_ascii_slow(src, n))
return true;
src += n;
len -= n;
}


#if defined(_WIN64) || defined(_LP64)
const uintptr_t mask = 0x8080808080808080ll;
#else
const uintptr_t mask = 0x80808080l;
#endif

const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);

for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
if (srcw[i] & mask)
return true;
}

const unsigned remainder = len & align_mask;
if (remainder > 0) {
const size_t offset = len - remainder;
if (contains_non_ascii_slow(src + offset, remainder))
return true;
}

return false;
}
} // namespace stringutils
} // namespace node

Expand Down

0 comments on commit 861604f

Please sign in to comment.