nodejs · zimbabao · May 14, 2017 · May 14, 2017 · May 14, 2017 · May 14, 2017
diff --git a/node.gyp b/node.gyp
@@ -235,6 +235,8 @@
         'src/req-wrap.h',
         'src/req-wrap-inl.h',
         'src/string_bytes.h',
+        'src/string_search.h',
+        'src/string_utils.h',
         'src/stream_base.h',
         'src/stream_base-inl.h',
         'src/stream_wrap.h',

diff --git a/src/node_url.cc b/src/node_url.cc
@@ -9,6 +9,7 @@
 #include "base-object.h"
 #include "base-object-inl.h"
 #include "node_i18n.h"
+#include "string_utils.h"
 
 #include <string>
 #include <vector>
@@ -837,6 +838,7 @@ static url_host_type ParseHost(url_host* host,
   url_host_type type = HOST_TYPE_FAILED;
   const char* pointer = input;
   std::string decoded;
+  const char *buf = NULL;
 
   if (length == 0)
     goto end;
@@ -853,9 +855,19 @@ static url_host_type ParseHost(url_host* host,
   // First, we have to percent decode
   PercentDecode(input, length, &decoded);
 
-  // Then we have to punycode toASCII
-  if (!ToASCII(&decoded, &decoded))
-    goto end;
+  // Match browser behavior for ASCII only domains
+  // and do not run them through ToASCII algorithm.
+  buf = decoded.c_str();
+  if (!stringutils::ContainsNonAscii(buf, strlen(buf))) {
+    // Lowercase ASCII domains
+    for (size_t n = 0; n < decoded.size(); n++) {
+      decoded[n] = ASCIILowercase(decoded[n]);
+    }
+  } else {
+    // Then we have to Unicode IDNA toASCII
+    if (!ToASCII(&decoded, &decoded))
+      goto end;
+  }
 
   // If any of the following characters are still present, we have to fail
   for (size_t n = 0; n < decoded.size(); n++) {

diff --git a/src/string_bytes.cc b/src/string_bytes.cc
@@ -25,6 +25,7 @@
 #include "node.h"
 #include "node_buffer.h"
 #include "v8.h"
+#include "string_utils.h"
 
 #include <limits.h>
 #include <string.h>  // memcpy
@@ -550,60 +551,6 @@ size_t StringBytes::Size(Isolate* isolate,
   return data_size;
 }
 
-
-
-
-static bool contains_non_ascii_slow(const char* buf, size_t len) {
-  for (size_t i = 0; i < len; ++i) {
-    if (buf[i] & 0x80)
-      return true;
-  }
-  return false;
-}
-
-
-static bool contains_non_ascii(const char* src, size_t len) {
-  if (len < 16) {
-    return contains_non_ascii_slow(src, len);
-  }
-
-  const unsigned bytes_per_word = sizeof(uintptr_t);
-  const unsigned align_mask = bytes_per_word - 1;
-  const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
-
-  if (unaligned > 0) {
-    const unsigned n = bytes_per_word - unaligned;
-    if (contains_non_ascii_slow(src, n))
-      return true;
-    src += n;
-    len -= n;
-  }
-
-
-#if defined(_WIN64) || defined(_LP64)
-  const uintptr_t mask = 0x8080808080808080ll;
-#else
-  const uintptr_t mask = 0x80808080l;
-#endif
-
-  const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
-
-  for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
-    if (srcw[i] & mask)
-      return true;
-  }
-
-  const unsigned remainder = len & align_mask;
-  if (remainder > 0) {
-    const size_t offset = len - remainder;
-    if (contains_non_ascii_slow(src + offset, remainder))
-      return true;
-  }
-
-  return false;
-}
-
-
 static void force_ascii_slow(const char* src, char* dst, size_t len) {
   for (size_t i = 0; i < len; ++i) {
     dst[i] = src[i] & 0x7f;
@@ -709,7 +656,7 @@ MaybeLocal<Value> StringBytes::Encode(Isolate* isolate,
       }
 
     case ASCII:
-      if (contains_non_ascii(buf, buflen)) {
+      if (stringutils::ContainsNonAscii(buf, buflen)) {
         char* out = node::UncheckedMalloc(buflen);
         if (out == nullptr) {
           *error = SB_MALLOC_FAILED_ERROR;

diff --git a/src/string_utils.h b/src/string_utils.h
@@ -0,0 +1,61 @@
+
+#ifndef SRC_STRING_UTILS_H_
+#define SRC_STRING_UTILS_H_
+
+#include <cstddef>
+#include <cstdint>
+
+namespace node {
+namespace stringutils {
+  inline static bool contains_non_ascii_slow(const char* buf, size_t len) {
+    for (size_t i = 0; i < len; ++i) {
+      if (buf[i] & 0x80)
+        return true;
+      }
+      return false;
+  }
+
+  inline bool ContainsNonAscii(const char* src, size_t len) {
+    if (len < 16) {
+        return contains_non_ascii_slow(src, len);
+    }
+
+    const unsigned bytes_per_word = sizeof(uintptr_t);
+    const unsigned align_mask = bytes_per_word - 1;
+    const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
+
+    if (unaligned > 0) {
+        const unsigned n = bytes_per_word - unaligned;
+        if (contains_non_ascii_slow(src, n))
+        return true;
+        src += n;
+        len -= n;
+    }
+
+
+    #if defined(_WIN64) || defined(_LP64)
+    const uintptr_t mask = 0x8080808080808080ll;
+    #else
+    const uintptr_t mask = 0x80808080l;
+    #endif
+
+    const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
+
+    for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
+        if (srcw[i] & mask)
+        return true;
+    }
+
+    const unsigned remainder = len & align_mask;
+    if (remainder > 0) {
+        const size_t offset = len - remainder;
+        if (contains_non_ascii_slow(src + offset, remainder))
+        return true;
+    }
+
+    return false;
+  }
+}  // namespace stringutils
+}  // namespace node
+
+#endif  // SRC_STRING_UTILS_H_
diff --git a/test/fixtures/url-domains-with-hyphens.js b/test/fixtures/url-domains-with-hyphens.js
@@ -0,0 +1,27 @@
+'use strict';
+
+module.exports = {
+  valid: [
+    // URLs with hyphen
+    {
+      ascii: 'r4---sn-a5mlrn7s.gevideo.com',
+      unicode: 'r4---sn-a5mlrn7s.gevideo.com'
+    },
+    {
+      ascii: '-sn-a5mlrn7s.gevideo.com',
+      unicode: '-sn-a5mlrn7s.gevideo.com'
+    },
+    {
+      ascii: 'sn-a5mlrn7s-.gevideo.com',
+      unicode: 'sn-a5mlrn7s-.gevideo.com'
+    },
+    {
+      ascii: '-sn-a5mlrn7s-.gevideo.com',
+      unicode: '-sn-a5mlrn7s-.gevideo.com'
+    },
+    {
+      ascii: '-sn--a5mlrn7s-.gevideo.com',
+      unicode: '-sn--a5mlrn7s-.gevideo.com'
+    }
+  ]
+}
diff --git a/test/parallel/test-whatwg-url-domainto.js b/test/parallel/test-whatwg-url-domainto.js
@@ -11,6 +11,7 @@ const { domainToASCII, domainToUnicode } = require('url');
 
 // Tests below are not from WPT.
 const tests = require('../fixtures/url-idna.js');
+const testsHyphenDomains = require('../fixtures/url-domains-with-hyphens.js');
 
 {
   const expectedError = common.expectsError(
@@ -34,6 +35,13 @@ const tests = require('../fixtures/url-idna.js');
   }
 }
 
+{
+  for (const [i, { ascii, unicode }] of testsHyphenDomains.valid.entries()) {
+    assert.strictEqual(ascii, domainToASCII(unicode),
+                       `domainToASCII(${i + 1})`);
+  }
+}
+
 {
   const convertFunc = {
     ascii: domainToASCII,