From 5e68a13d5332f1b0649051cee2769fd21dcd78e4 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Wed, 5 Feb 2020 09:17:22 -0800 Subject: [PATCH] src: various minor improvements to node_url Went hunting for possible performance improvements. Didn't find anything significant but did manage to make a number of style improvements that bring more in line with style guidelines and good pratice. PR-URL: https://github.com/nodejs/node/pull/31651 Reviewed-By: Anna Henningsen Reviewed-By: Colin Ihrig --- src/env.h | 7 ++ src/node_url.cc | 245 +++++++++++++++++++++++++----------------------- src/node_url.h | 4 +- 3 files changed, 138 insertions(+), 118 deletions(-) diff --git a/src/env.h b/src/env.h index d249dc25f5d3b3..f6f68a37681ff6 100644 --- a/src/env.h +++ b/src/env.h @@ -371,6 +371,13 @@ constexpr size_t kFsStatsBufferLength = V(type_string, "type") \ V(uid_string, "uid") \ V(unknown_string, "") \ + V(url_special_ftp_string, "ftp:") \ + V(url_special_file_string, "file:") \ + V(url_special_gopher_string, "gopher:") \ + V(url_special_http_string, "http:") \ + V(url_special_https_string, "https:") \ + V(url_special_ws_string, "ws:") \ + V(url_special_wss_string, "wss:") \ V(url_string, "url") \ V(username_string, "username") \ V(valid_from_string, "valid_from") \ diff --git a/src/node_url.cc b/src/node_url.cc index 6565688d02e5e0..d3513f095c8294 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -30,7 +30,7 @@ using v8::String; using v8::Undefined; using v8::Value; -inline Local Utf8String(Isolate* isolate, const std::string& str) { +Local Utf8String(Isolate* isolate, const std::string& str) { return String::NewFromUtf8(isolate, str.data(), NewStringType::kNormal, @@ -42,10 +42,10 @@ namespace url { namespace { // https://url.spec.whatwg.org/#eof-code-point -const char kEOL = -1; +constexpr char kEOL = -1; // Used in ToUSVString(). -const char16_t kUnicodeReplacementCharacter = 0xFFFD; +constexpr char16_t kUnicodeReplacementCharacter = 0xFFFD; // https://url.spec.whatwg.org/#concept-host class URLHost { @@ -60,7 +60,7 @@ class URLHost { bool is_special, bool unicode = false); - inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; } + bool ParsingFailed() const { return type_ == HostType::H_FAILED; } std::string ToString() const; // Like ToString(), but avoids a copy in exchange for invalidating `*this`. std::string ToStringMove(); @@ -86,7 +86,7 @@ class URLHost { Value value_; HostType type_ = HostType::H_FAILED; - inline void Reset() { + void Reset() { using string = std::string; switch (type_) { case HostType::H_DOMAIN: @@ -107,13 +107,13 @@ class URLHost { // internals too much. // These helpers are the easiest solution but we might want to consider // just not forcing strings into an union. - inline void SetOpaque(std::string&& string) { + void SetOpaque(std::string&& string) { Reset(); type_ = HostType::H_OPAQUE; new(&value_.domain_or_opaque) std::string(std::move(string)); } - inline void SetDomain(std::string&& string) { + void SetDomain(std::string&& string) { Reset(); type_ = HostType::H_DOMAIN; new(&value_.domain_or_opaque) std::string(std::move(string)); @@ -154,7 +154,7 @@ enum url_error_cb_args { #define CHAR_TEST(bits, name, expr) \ template \ - inline bool name(const T ch) { \ + bool name(const T ch) { \ static_assert(sizeof(ch) >= (bits) / 8, \ "Character must be wider than " #bits " bits"); \ return (expr); \ @@ -162,13 +162,13 @@ enum url_error_cb_args { #define TWO_CHAR_STRING_TEST(bits, name, expr) \ template \ - inline bool name(const T ch1, const T ch2) { \ + bool name(const T ch1, const T ch2) { \ static_assert(sizeof(ch1) >= (bits) / 8, \ "Character must be wider than " #bits " bits"); \ return (expr); \ } \ template \ - inline bool name(const std::basic_string& str) { \ + bool name(const std::basic_string& str) { \ static_assert(sizeof(str[0]) >= (bits) / 8, \ "Character must be wider than " #bits " bits"); \ return str.length() >= 2 && name(str[0], str[1]); \ @@ -197,7 +197,7 @@ CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) // https://infra.spec.whatwg.org/#ascii-lowercase template -inline T ASCIILowercase(T ch) { +T ASCIILowercase(T ch) { return IsASCIIAlpha(ch) ? (ch | 0x20) : ch; } @@ -667,13 +667,13 @@ const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = { 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80 }; -inline bool BitAt(const uint8_t a[], const uint8_t i) { +bool BitAt(const uint8_t a[], const uint8_t i) { return !!(a[i >> 3] & (1 << (i & 7))); } // Appends ch to str. If ch position in encode_set is set, the ch will // be percent-encoded then appended. -inline void AppendOrEscape(std::string* str, +void AppendOrEscape(std::string* str, const unsigned char ch, const uint8_t encode_set[]) { if (BitAt(encode_set, ch)) @@ -683,7 +683,7 @@ inline void AppendOrEscape(std::string* str, } template -inline unsigned hex2bin(const T ch) { +unsigned hex2bin(const T ch) { if (ch >= '0' && ch <= '9') return ch - '0'; if (ch >= 'A' && ch <= 'F') @@ -693,7 +693,7 @@ inline unsigned hex2bin(const T ch) { return static_cast(-1); } -inline std::string PercentDecode(const char* input, size_t len) { +std::string PercentDecode(const char* input, size_t len) { std::string dest; if (len == 0) return dest; @@ -703,7 +703,7 @@ inline std::string PercentDecode(const char* input, size_t len) { while (pointer < end) { const char ch = pointer[0]; - const size_t remaining = end - pointer - 1; + size_t remaining = end - pointer - 1; if (ch != '%' || remaining < 2 || (ch == '%' && (!IsASCIIHexDigit(pointer[1]) || @@ -723,24 +723,39 @@ inline std::string PercentDecode(const char* input, size_t len) { } #define SPECIALS(XX) \ - XX("ftp:", 21) \ - XX("file:", -1) \ - XX("gopher:", 70) \ - XX("http:", 80) \ - XX("https:", 443) \ - XX("ws:", 80) \ - XX("wss:", 443) - -inline bool IsSpecial(const std::string& scheme) { -#define XX(name, _) if (scheme == name) return true; - SPECIALS(XX); -#undef XX + XX(ftp, 21, "ftp:") \ + XX(file, -1, "file:") \ + XX(gopher, 70, "gopher:") \ + XX(http, 80, "http:") \ + XX(https, 443, "https:") \ + XX(ws, 80, "ws:") \ + XX(wss, 443, "wss:") + +bool IsSpecial(const std::string& scheme) { +#define V(_, __, name) if (scheme == name) return true; + SPECIALS(V); +#undef V return false; } +Local GetSpecial(Environment* env, const std::string& scheme) { +#define V(key, _, name) if (scheme == name) \ + return env->url_special_##key##_string(); + SPECIALS(V) +#undef V + UNREACHABLE(); +} + +int NormalizePort(const std::string& scheme, int p) { +#define V(_, port, name) if (scheme == name && p == port) return -1; + SPECIALS(V); +#undef V + return p; +} + // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter -inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) { - const size_t length = end - p; +bool StartsWithWindowsDriveLetter(const char* p, const char* end) { + size_t length = end - p; return length >= 2 && IsWindowsDriveLetter(p[0], p[1]) && (length == 2 || @@ -750,15 +765,8 @@ inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) { p[2] == '#'); } -inline int NormalizePort(const std::string& scheme, int p) { -#define XX(name, port) if (scheme == name && p == port) return -1; - SPECIALS(XX); -#undef XX - return p; -} - #if defined(NODE_HAVE_I18N_SUPPORT) -inline bool ToUnicode(const std::string& input, std::string* output) { +bool ToUnicode(const std::string& input, std::string* output) { MaybeStackBuffer buf; if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) return false; @@ -766,7 +774,7 @@ inline bool ToUnicode(const std::string& input, std::string* output) { return true; } -inline bool ToASCII(const std::string& input, std::string* output) { +bool ToASCII(const std::string& input, std::string* output) { MaybeStackBuffer buf; if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) return false; @@ -775,12 +783,12 @@ inline bool ToASCII(const std::string& input, std::string* output) { } #else // Intentional non-ops if ICU is not present. -inline bool ToUnicode(const std::string& input, std::string* output) { +bool ToUnicode(const std::string& input, std::string* output) { *output = input; return true; } -inline bool ToASCII(const std::string& input, std::string* output) { +bool ToASCII(const std::string& input, std::string* output) { *output = input; return true; } @@ -902,7 +910,7 @@ void URLHost::ParseIPv6Host(const char* input, size_t length) { type_ = HostType::H_IPV6; } -inline int64_t ParseNumber(const char* start, const char* end) { +int64_t ParseNumber(const char* start, const char* end) { unsigned R = 10; if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { start += 2; @@ -952,7 +960,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) { while (pointer <= end) { const char ch = pointer < end ? pointer[0] : kEOL; - const int remaining = end - pointer - 1; + int remaining = end - pointer - 1; if (ch == '.' || ch == kEOL) { if (++parts > static_cast(arraysize(numbers))) return; @@ -1061,7 +1069,7 @@ void URLHost::ParseHost(const char* input, // Locates the longest sequence of 0 segments in an IPv6 address // in order to use the :: compression when serializing template -inline T* FindLongestZeroSequence(T* values, size_t len) { +T* FindLongestZeroSequence(T* values, size_t len) { T* start = values; T* end = start + len; T* result = nullptr; @@ -1173,14 +1181,12 @@ bool ParseHost(const std::string& input, return true; } -inline std::vector FromJSStringArray(Environment* env, - Local array) { +std::vector FromJSStringArray(Environment* env, + Local array) { std::vector vec; - const int32_t len = array->Length(); - if (len == 0) - return vec; // nothing to copy - vec.reserve(len); - for (int32_t n = 0; n < len; n++) { + if (array->Length() > 0) + vec.reserve(array->Length()); + for (size_t n = 0; n < array->Length(); n++) { Local val = array->Get(env->context(), n).ToLocalChecked(); if (val->IsString()) { Utf8Value value(env->isolate(), val.As()); @@ -1190,13 +1196,19 @@ inline std::vector FromJSStringArray(Environment* env, return vec; } -inline url_data HarvestBase(Environment* env, Local base_obj) { +url_data HarvestBase(Environment* env, Local base_obj) { url_data base; Local context = env->context(); + Local flags = base_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); if (flags->IsInt32()) - base.flags = flags->Int32Value(context).FromJust(); + flags->Int32Value(context).To(&base.flags); + + Local port = + base_obj->Get(env->context(), env->port_string()).ToLocalChecked(); + if (port->IsInt32()) + port->Int32Value(context).To(&base.port); Local scheme = base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); @@ -1230,11 +1242,6 @@ inline url_data HarvestBase(Environment* env, Local base_obj) { env->fragment_string(), true); - Local port = - base_obj->Get(env->context(), env->port_string()).ToLocalChecked(); - if (port->IsInt32()) - base.port = port.As()->Value(); - Local path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked(); if (path->IsArray()) { @@ -1244,18 +1251,18 @@ inline url_data HarvestBase(Environment* env, Local base_obj) { return base; } -inline url_data HarvestContext(Environment* env, Local context_obj) { +url_data HarvestContext(Environment* env, Local context_obj) { url_data context; Local flags = context_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); if (flags->IsInt32()) { - static const int32_t copy_flags_mask = + static constexpr int32_t kCopyFlagsMask = URL_FLAGS_SPECIAL | URL_FLAGS_CANNOT_BE_BASE | URL_FLAGS_HAS_USERNAME | URL_FLAGS_HAS_PASSWORD | URL_FLAGS_HAS_HOST; - context.flags |= flags.As()->Value() & copy_flags_mask; + context.flags |= flags.As()->Value() & kCopyFlagsMask; } Local scheme = context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); @@ -1294,7 +1301,7 @@ inline url_data HarvestContext(Environment* env, Local context_obj) { } // Single dot segment can be ".", "%2e", or "%2E" -inline bool IsSingleDotSegment(const std::string& str) { +bool IsSingleDotSegment(const std::string& str) { switch (str.size()) { case 1: return str == "."; @@ -1310,7 +1317,7 @@ inline bool IsSingleDotSegment(const std::string& str) { // Double dot segment can be: // "..", ".%2e", ".%2E", "%2e.", "%2E.", // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e" -inline bool IsDoubleDotSegment(const std::string& str) { +bool IsDoubleDotSegment(const std::string& str) { switch (str.size()) { case 2: return str == ".."; @@ -1337,7 +1344,7 @@ inline bool IsDoubleDotSegment(const std::string& str) { } } -inline void ShortenUrlPath(struct url_data* url) { +void ShortenUrlPath(struct url_data* url) { if (url->path.empty()) return; if (url->path.size() == 1 && url->scheme == "file:" && IsNormalizedWindowsDriveLetter(url->path[0])) return; @@ -1707,7 +1714,7 @@ void URL::Parse(const char* input, buffer.insert(0, "%40"); } atflag = true; - const size_t blen = buffer.size(); + size_t blen = buffer.size(); if (blen > 0 && buffer[0] != ':') { url->flags |= URL_FLAGS_HAS_USERNAME; } @@ -2091,12 +2098,16 @@ void URL::Parse(const char* input, } } // NOLINT(readability/fn_size) -static inline void SetArgs(Environment* env, - Local argv[ARG_COUNT], - const struct url_data& url) { +namespace { +void SetArgs(Environment* env, + Local argv[ARG_COUNT], + const struct url_data& url) { Isolate* isolate = env->isolate(); argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); - argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str()); + argv[ARG_PROTOCOL] = + url.flags & URL_FLAGS_SPECIAL ? + GetSpecial(env, url.scheme) : + OneByteString(isolate, url.scheme.c_str()); if (url.flags & URL_FLAGS_HAS_USERNAME) argv[ARG_USERNAME] = Utf8String(isolate, url.username); if (url.flags & URL_FLAGS_HAS_PASSWORD) @@ -2113,15 +2124,15 @@ static inline void SetArgs(Environment* env, argv[ARG_PATH] = ToV8Value(env->context(), url.path).ToLocalChecked(); } -static void Parse(Environment* env, - Local recv, - const char* input, - const size_t len, - enum url_parse_state state_override, - Local base_obj, - Local context_obj, - Local cb, - Local error_cb) { +void Parse(Environment* env, + Local recv, + const char* input, + size_t len, + enum url_parse_state state_override, + Local base_obj, + Local context_obj, + Local cb, + Local error_cb) { Isolate* isolate = env->isolate(); Local context = env->context(); HandleScope handle_scope(isolate); @@ -2172,7 +2183,7 @@ static void Parse(Environment* env, } } -static void Parse(const FunctionCallbackInfo& args) { +void Parse(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 5); CHECK(args[0]->IsString()); // input @@ -2201,13 +2212,13 @@ static void Parse(const FunctionCallbackInfo& args) { args[5]); } -static void EncodeAuthSet(const FunctionCallbackInfo& args) { +void EncodeAuthSet(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); Utf8Value value(env->isolate(), args[0]); std::string output; - const size_t len = value.length(); + size_t len = value.length(); output.reserve(len); for (size_t n = 0; n < len; n++) { const char ch = (*value)[n]; @@ -2219,23 +2230,23 @@ static void EncodeAuthSet(const FunctionCallbackInfo& args) { NewStringType::kNormal).ToLocalChecked()); } -static void ToUSVString(const FunctionCallbackInfo& args) { +void ToUSVString(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 2); CHECK(args[0]->IsString()); CHECK(args[1]->IsNumber()); TwoByteValue value(env->isolate(), args[0]); - const size_t n = value.length(); - const int64_t start = args[1]->IntegerValue(env->context()).FromJust(); + int64_t start; + args[1]->IntegerValue(env->context()).To(&start); CHECK_GE(start, 0); - for (size_t i = start; i < n; i++) { + for (size_t i = start; i < value.length(); i++) { char16_t c = value[i]; if (!IsUnicodeSurrogate(c)) { continue; - } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) { + } else if (IsUnicodeSurrogateTrail(c) || i == value.length() - 1) { value[i] = kUnicodeReplacementCharacter; } else { char16_t d = value[i + 1]; @@ -2251,10 +2262,10 @@ static void ToUSVString(const FunctionCallbackInfo& args) { String::NewFromTwoByte(env->isolate(), *value, NewStringType::kNormal, - n).ToLocalChecked()); + value.length()).ToLocalChecked()); } -static void DomainToASCII(const FunctionCallbackInfo& args) { +void DomainToASCII(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); @@ -2274,7 +2285,7 @@ static void DomainToASCII(const FunctionCallbackInfo& args) { NewStringType::kNormal).ToLocalChecked()); } -static void DomainToUnicode(const FunctionCallbackInfo& args) { +void DomainToUnicode(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); CHECK_GE(args.Length(), 1); CHECK(args[0]->IsString()); @@ -2294,6 +2305,35 @@ static void DomainToUnicode(const FunctionCallbackInfo& args) { NewStringType::kNormal).ToLocalChecked()); } +void SetURLConstructor(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK_EQ(args.Length(), 1); + CHECK(args[0]->IsFunction()); + env->set_url_constructor_function(args[0].As()); +} + +void Initialize(Local target, + Local unused, + Local context, + void* priv) { + Environment* env = Environment::GetCurrent(context); + env->SetMethod(target, "parse", Parse); + env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet); + env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString); + env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII); + env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode); + env->SetMethod(target, "setURLConstructor", SetURLConstructor); + +#define XX(name, _) NODE_DEFINE_CONSTANT(target, name); + FLAGS(XX) +#undef XX + +#define XX(name) NODE_DEFINE_CONSTANT(target, name); + PARSESTATES(XX) +#undef XX +} +} // namespace + std::string URL::ToFilePath() const { if (context_.scheme != "file:") { return ""; @@ -2411,33 +2451,6 @@ MaybeLocal URL::ToObject(Environment* env) const { return ret; } -static void SetURLConstructor(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_EQ(args.Length(), 1); - CHECK(args[0]->IsFunction()); - env->set_url_constructor_function(args[0].As()); -} - -static void Initialize(Local target, - Local unused, - Local context, - void* priv) { - Environment* env = Environment::GetCurrent(context); - env->SetMethod(target, "parse", Parse); - env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet); - env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString); - env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII); - env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode); - env->SetMethod(target, "setURLConstructor", SetURLConstructor); - -#define XX(name, _) NODE_DEFINE_CONSTANT(target, name); - FLAGS(XX) -#undef XX - -#define XX(name) NODE_DEFINE_CONSTANT(target, name); - PARSESTATES(XX) -#undef XX -} } // namespace url } // namespace node diff --git a/src/node_url.h b/src/node_url.h index 963273f988c983..6439a4a087be77 100644 --- a/src/node_url.h +++ b/src/node_url.h @@ -120,11 +120,11 @@ class URL { URL(const std::string& input, const std::string& base) : URL(input.c_str(), input.length(), base.c_str(), base.length()) {} - int32_t flags() { + int32_t flags() const { return context_.flags; } - int port() { + int port() const { return context_.port; }