diff --git a/src/env.h b/src/env.h index 75c530af454572..a688b069242160 100644 --- a/src/env.h +++ b/src/env.h @@ -165,11 +165,13 @@ struct PackageConfig { V(fingerprint_string, "fingerprint") \ V(fingerprint256_string, "fingerprint256") \ V(flags_string, "flags") \ + V(fragment_string, "fragment") \ V(get_data_clone_error_string, "_getDataCloneError") \ V(get_shared_array_buffer_id_string, "_getSharedArrayBufferId") \ V(gid_string, "gid") \ V(handle_string, "handle") \ V(homedir_string, "homedir") \ + V(host_string, "host") \ V(hostmaster_string, "hostmaster") \ V(ignore_string, "ignore") \ V(infoaccess_string, "infoAccess") \ @@ -226,6 +228,7 @@ struct PackageConfig { V(order_string, "order") \ V(owner_string, "owner") \ V(parse_error_string, "Parse Error") \ + V(password_string, "password") \ V(path_string, "path") \ V(pending_handle_string, "pendingHandle") \ V(pbkdf2_error_string, "PBKDF2 Error") \ @@ -239,6 +242,7 @@ struct PackageConfig { V(produce_cached_data_string, "produceCachedData") \ V(promise_string, "promise") \ V(pubkey_string, "pubkey") \ + V(query_string, "query") \ V(raw_string, "raw") \ V(read_host_object_string, "_readHostObject") \ V(readable_string, "readable") \ @@ -247,6 +251,7 @@ struct PackageConfig { V(rename_string, "rename") \ V(replacement_string, "replacement") \ V(retry_string, "retry") \ + V(scheme_string, "scheme") \ V(serial_string, "serial") \ V(scopeid_string, "scopeid") \ V(serial_number_string, "serialNumber") \ diff --git a/src/node_url.cc b/src/node_url.cc index 901fa0761a4f00..09199afb141e3f 100644 --- a/src/node_url.cc +++ b/src/node_url.cc @@ -15,6 +15,7 @@ using v8::Context; using v8::Function; using v8::FunctionCallbackInfo; using v8::HandleScope; +using v8::Int32; using v8::Integer; using v8::Isolate; using v8::Local; @@ -26,23 +27,12 @@ using v8::TryCatch; using v8::Undefined; using v8::Value; -#define GET(env, obj, name) \ - obj->Get(env->context(), \ - OneByteString(env->isolate(), name)).ToLocalChecked() - -#define GET_AND_SET(env, obj, name, data, flag) \ - { \ - Local val = GET(env, obj, #name); \ - if (val->IsString()) { \ - Utf8Value value(env->isolate(), val.As()); \ - data->name = *value; \ - data->flags |= flag; \ - } \ - } - -#define UTF8STRING(isolate, str) \ - String::NewFromUtf8(isolate, str.c_str(), v8::NewStringType::kNormal) \ - .ToLocalChecked() +inline Local Utf8String(Isolate* isolate, const std::string& str) { + return String::NewFromUtf8(isolate, + str.data(), + v8::NewStringType::kNormal, + str.length()).ToLocalChecked(); +} namespace url { @@ -69,6 +59,8 @@ class URLHost { inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; } std::string ToString() const; + // Like ToString(), but avoids a copy in exchange for invalidating `*this`. + std::string ToStringMove(); private: enum class HostType { @@ -80,10 +72,9 @@ class URLHost { }; union Value { - std::string domain; + std::string domain_or_opaque; uint32_t ipv4; uint16_t ipv6[8]; - std::string opaque; ~Value() {} Value() : ipv4(0) {} @@ -95,9 +86,12 @@ class URLHost { inline void Reset() { using string = std::string; switch (type_) { - case HostType::H_DOMAIN: value_.domain.~string(); break; - case HostType::H_OPAQUE: value_.opaque.~string(); break; - default: break; + case HostType::H_DOMAIN: + case HostType::H_OPAQUE: + value_.domain_or_opaque.~string(); + break; + default: + break; } type_ = HostType::H_FAILED; } @@ -113,13 +107,13 @@ class URLHost { inline void SetOpaque(std::string&& string) { Reset(); type_ = HostType::H_OPAQUE; - new(&value_.opaque) std::string(std::move(string)); + new(&value_.domain_or_opaque) std::string(std::move(string)); } inline void SetDomain(std::string&& string) { Reset(); type_ = HostType::H_DOMAIN; - new(&value_.domain) std::string(std::move(string)); + new(&value_.domain_or_opaque) std::string(std::move(string)); } }; @@ -136,7 +130,8 @@ URLHost::~URLHost() { XX(ARG_PORT) \ XX(ARG_PATH) \ XX(ARG_QUERY) \ - XX(ARG_FRAGMENT) + XX(ARG_FRAGMENT) \ + XX(ARG_COUNT) // This one has to be last. #define ERR_ARGS(XX) \ XX(ERR_ARG_FLAGS) \ @@ -665,7 +660,7 @@ inline std::string PercentDecode(const char* input, size_t len) { XX("ws:", 80) \ XX("wss:", 443) -inline bool IsSpecial(std::string scheme) { +inline bool IsSpecial(const std::string& scheme) { #define XX(name, _) if (scheme == name) return true; SPECIALS(XX); #undef XX @@ -684,7 +679,7 @@ inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) { p[2] == '#'); } -inline int NormalizePort(std::string scheme, int p) { +inline int NormalizePort(const std::string& scheme, int p) { #define XX(name, port) if (scheme == name && p == port) return -1; SPECIALS(XX); #undef XX @@ -930,7 +925,7 @@ void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) { void URLHost::ParseOpaqueHost(const char* input, size_t length) { CHECK_EQ(type_, HostType::H_FAILED); std::string output; - output.reserve(length * 3); + output.reserve(length); for (size_t i = 0; i < length; i++) { const char ch = input[i]; if (ch != '%' && IsForbiddenHostCodePoint(ch)) { @@ -1022,14 +1017,27 @@ inline T* FindLongestZeroSequence(T* values, size_t len) { return result; } +std::string URLHost::ToStringMove() { + std::string return_value; + switch (type_) { + case HostType::H_DOMAIN: + case HostType::H_OPAQUE: + return_value = std::move(value_.domain_or_opaque); + break; + default: + return_value = ToString(); + break; + } + Reset(); + return return_value; +} + std::string URLHost::ToString() const { std::string dest; switch (type_) { case HostType::H_DOMAIN: - return value_.domain; - break; case HostType::H_OPAQUE: - return value_.opaque; + return value_.domain_or_opaque; break; case HostType::H_IPV4: { dest.reserve(15); @@ -1089,103 +1097,125 @@ bool ParseHost(const std::string& input, host.ParseHost(input.c_str(), input.length(), is_special, unicode); if (host.ParsingFailed()) return false; - *output = host.ToString(); + *output = host.ToStringMove(); return true; } -inline void Copy(Environment* env, - Local ary, - std::vector* vec) { - const int32_t len = ary->Length(); +inline std::vector FromJSStringArray(Environment* env, + Local array) { + std::vector vec; + const int32_t len = array->Length(); if (len == 0) - return; // nothing to copy - vec->reserve(len); + return vec; // nothing to copy + vec.reserve(len); for (int32_t n = 0; n < len; n++) { - Local val = ary->Get(env->context(), n).ToLocalChecked(); + Local val = array->Get(env->context(), n).ToLocalChecked(); if (val->IsString()) { Utf8Value value(env->isolate(), val.As()); - vec->push_back(std::string(*value, value.length())); + vec.emplace_back(*value, value.length()); } } + return vec; } -inline Local Copy(Environment* env, - const std::vector& vec) { +inline Local ToJSStringArray(Environment* env, + const std::vector& vec) { Isolate* isolate = env->isolate(); - Local ary = Array::New(isolate, vec.size()); + Local array = Array::New(isolate, vec.size()); for (size_t n = 0; n < vec.size(); n++) - ary->Set(env->context(), n, UTF8STRING(isolate, vec[n])).FromJust(); - return ary; + array->Set(env->context(), n, Utf8String(isolate, vec[n])).FromJust(); + return array; } -inline void HarvestBase(Environment* env, - struct url_data* base, - Local base_obj) { +inline url_data HarvestBase(Environment* env, Local base_obj) { + url_data base; Local context = env->context(); - Local flags = GET(env, base_obj, "flags"); + Local flags = + base_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); if (flags->IsInt32()) - base->flags = flags->Int32Value(context).FromJust(); - - Local scheme = GET(env, base_obj, "scheme"); - base->scheme = Utf8Value(env->isolate(), scheme).out(); - - GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); - GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); - GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); - GET_AND_SET(env, base_obj, query, base, URL_FLAGS_HAS_QUERY); - GET_AND_SET(env, base_obj, fragment, base, URL_FLAGS_HAS_FRAGMENT); - Local port = GET(env, base_obj, "port"); + base.flags = flags->Int32Value(context).FromJust(); + + Local scheme = + base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); + base.scheme = Utf8Value(env->isolate(), scheme).out(); + + auto GetStr = [&](std::string url_data::* member, + int flag, + Local name) { + Local value = base_obj->Get(env->context(), name).ToLocalChecked(); + if (value->IsString()) { + Utf8Value utf8value(env->isolate(), value.As()); + (base.*member).assign(*utf8value, utf8value.length()); + base.flags |= flag; + } + }; + GetStr(&url_data::username, URL_FLAGS_HAS_USERNAME, env->username_string()); + GetStr(&url_data::password, URL_FLAGS_HAS_PASSWORD, env->password_string()); + GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string()); + GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string()); + GetStr(&url_data::fragment, URL_FLAGS_HAS_FRAGMENT, env->fragment_string()); + + Local port = + base_obj->Get(env->context(), env->port_string()).ToLocalChecked(); if (port->IsInt32()) - base->port = port->Int32Value(context).FromJust(); - Local path = GET(env, base_obj, "path"); + base.port = port.As()->Value(); + + Local + path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked(); if (path->IsArray()) { - base->flags |= URL_FLAGS_HAS_PATH; - Copy(env, path.As(), &(base->path)); + base.flags |= URL_FLAGS_HAS_PATH; + base.path = FromJSStringArray(env, path.As()); } + return base; } -inline void HarvestContext(Environment* env, - struct url_data* context, - Local context_obj) { - Local flags = GET(env, context_obj, "flags"); +inline url_data HarvestContext(Environment* env, Local context_obj) { + url_data context; + Local flags = + context_obj->Get(env->context(), env->flags_string()).ToLocalChecked(); if (flags->IsInt32()) { - int32_t _flags = flags->Int32Value(env->context()).FromJust(); - if (_flags & URL_FLAGS_SPECIAL) - context->flags |= URL_FLAGS_SPECIAL; - if (_flags & URL_FLAGS_CANNOT_BE_BASE) - context->flags |= URL_FLAGS_CANNOT_BE_BASE; - if (_flags & URL_FLAGS_HAS_USERNAME) - context->flags |= URL_FLAGS_HAS_USERNAME; - if (_flags & URL_FLAGS_HAS_PASSWORD) - context->flags |= URL_FLAGS_HAS_PASSWORD; - if (_flags & URL_FLAGS_HAS_HOST) - context->flags |= URL_FLAGS_HAS_HOST; + static const int32_t copy_flags_mask = + URL_FLAGS_SPECIAL | + URL_FLAGS_CANNOT_BE_BASE | + URL_FLAGS_HAS_USERNAME | + URL_FLAGS_HAS_PASSWORD | + URL_FLAGS_HAS_HOST; + context.flags |= flags.As()->Value() & copy_flags_mask; } - Local scheme = GET(env, context_obj, "scheme"); + Local scheme = + context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked(); if (scheme->IsString()) { Utf8Value value(env->isolate(), scheme); - context->scheme.assign(*value, value.length()); + context.scheme.assign(*value, value.length()); } - Local port = GET(env, context_obj, "port"); + Local port = + context_obj->Get(env->context(), env->port_string()).ToLocalChecked(); if (port->IsInt32()) - context->port = port->Int32Value(env->context()).FromJust(); - if (context->flags & URL_FLAGS_HAS_USERNAME) { - Local username = GET(env, context_obj, "username"); + context.port = port.As()->Value(); + if (context.flags & URL_FLAGS_HAS_USERNAME) { + Local username = + context_obj->Get(env->context(), + env->username_string()).ToLocalChecked(); CHECK(username->IsString()); Utf8Value value(env->isolate(), username); - context->username.assign(*value, value.length()); + context.username.assign(*value, value.length()); } - if (context->flags & URL_FLAGS_HAS_PASSWORD) { - Local password = GET(env, context_obj, "password"); + if (context.flags & URL_FLAGS_HAS_PASSWORD) { + Local password = + context_obj->Get(env->context(), + env->password_string()).ToLocalChecked(); CHECK(password->IsString()); Utf8Value value(env->isolate(), password); - context->password.assign(*value, value.length()); + context.password.assign(*value, value.length()); } - Local host = GET(env, context_obj, "host"); + Local host = + context_obj->Get(env->context(), + env->host_string()).ToLocalChecked(); if (host->IsString()) { Utf8Value value(env->isolate(), host); - context->host.assign(*value, value.length()); + context.host.assign(*value, value.length()); } + return context; } // Single dot segment can be ".", "%2e", or "%2E" @@ -1267,30 +1297,37 @@ void URL::Parse(const char* input, len = end - p; } + // The spec says we should strip out any ASCII tabs or newlines. + // In those cases, we create another std::string instance with the filtered + // contents, but in the general case we avoid the overhead. std::string whitespace_stripped; - whitespace_stripped.reserve(len); - for (const char* ptr = p; ptr < end; ptr++) + for (const char* ptr = p; ptr < end; ptr++) { if (!IsASCIITabOrNewline(*ptr)) - whitespace_stripped += *ptr; + continue; + // Hit tab or newline. Allocate storage, copy what we have until now, + // and then iterate and filter all similar characters out. + whitespace_stripped.reserve(len - 1); + whitespace_stripped.assign(p, ptr - p); + // 'ptr + 1' skips the current char, which we know to be tab or newline. + for (ptr = ptr + 1; ptr < end; ptr++) { + if (!IsASCIITabOrNewline(*ptr)) + whitespace_stripped += *ptr; + } - input = whitespace_stripped.c_str(); - len = whitespace_stripped.size(); - p = input; - end = input + len; + // Update variables like they should have looked like if the string + // had been stripped of whitespace to begin with. + input = whitespace_stripped.c_str(); + len = whitespace_stripped.size(); + p = input; + end = input + len; + break; + } - bool atflag = false; - bool sbflag = false; - bool uflag = false; + bool atflag = false; // Set when @ has been seen. + bool square_bracket_flag = false; // Set inside of [...] + bool password_token_seen_flag = false; // Set after a : after an username. std::string buffer; - url->scheme.reserve(len); - url->username.reserve(len); - url->password.reserve(len); - url->host.reserve(len); - url->path.reserve(len); - url->query.reserve(len); - url->fragment.reserve(len); - buffer.reserve(len); // Set the initial parse state. const bool has_state_override = state_override != kUnknownState; @@ -1347,7 +1384,7 @@ void URL::Parse(const char* input, // as it can be done before even entering C++ binding. } - url->scheme = buffer; + url->scheme = std::move(buffer); url->port = NormalizePort(url->scheme, url->port); if (new_is_special) { url->flags |= URL_FLAGS_SPECIAL; @@ -1373,7 +1410,7 @@ void URL::Parse(const char* input, } else { url->flags |= URL_FLAGS_CANNOT_BE_BASE; url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); + url->path.emplace_back(""); state = kCannotBeBase; } } else if (!has_state_override) { @@ -1602,12 +1639,12 @@ void URL::Parse(const char* input, const char bch = buffer[n]; if (bch == ':') { url->flags |= URL_FLAGS_HAS_PASSWORD; - if (!uflag) { - uflag = true; + if (!password_token_seen_flag) { + password_token_seen_flag = true; continue; } } - if (uflag) { + if (password_token_seen_flag) { AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET); } else { AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET); @@ -1635,7 +1672,7 @@ void URL::Parse(const char* input, if (has_state_override && url->scheme == "file:") { state = kFileHost; continue; - } else if (ch == ':' && !sbflag) { + } else if (ch == ':' && !square_bracket_flag) { if (buffer.size() == 0) { url->flags |= URL_FLAGS_FAILED; return; @@ -1679,9 +1716,9 @@ void URL::Parse(const char* input, } } else { if (ch == '[') - sbflag = true; + square_bracket_flag = true; if (ch == ']') - sbflag = false; + square_bracket_flag = false; buffer += ch; } break; @@ -1888,12 +1925,12 @@ void URL::Parse(const char* input, ShortenUrlPath(url); if (ch != '/' && !special_back_slash) { url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); + url->path.emplace_back(""); } } else if (IsSingleDotSegment(buffer) && ch != '/' && !special_back_slash) { url->flags |= URL_FLAGS_HAS_PATH; - url->path.push_back(""); + url->path.emplace_back(""); } else if (!IsSingleDotSegment(buffer)) { if (url->scheme == "file:" && url->path.empty() && @@ -1907,8 +1944,7 @@ void URL::Parse(const char* input, buffer[1] = ':'; } url->flags |= URL_FLAGS_HAS_PATH; - std::string segment(buffer.c_str(), buffer.size()); - url->path.push_back(segment); + url->path.emplace_back(std::move(buffer)); } buffer.clear(); if (url->scheme == "file:" && @@ -1947,7 +1983,7 @@ void URL::Parse(const char* input, case kQuery: if (ch == kEOL || (!has_state_override && ch == '#')) { url->flags |= URL_FLAGS_HAS_QUERY; - url->query = buffer; + url->query = std::move(buffer); buffer.clear(); if (ch == '#') state = kFragment; @@ -1959,7 +1995,7 @@ void URL::Parse(const char* input, switch (ch) { case kEOL: url->flags |= URL_FLAGS_HAS_FRAGMENT; - url->fragment = buffer; + url->fragment = std::move(buffer); break; case 0: break; @@ -1977,25 +2013,25 @@ void URL::Parse(const char* input, } // NOLINT(readability/fn_size) static inline void SetArgs(Environment* env, - Local argv[], - const struct url_data* url) { + Local argv[ARG_COUNT], + const struct url_data& url) { Isolate* isolate = env->isolate(); - argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); - argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); - if (url->flags & URL_FLAGS_HAS_USERNAME) - argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); - if (url->flags & URL_FLAGS_HAS_PASSWORD) - argv[ARG_PASSWORD] = UTF8STRING(isolate, url->password); - if (url->flags & URL_FLAGS_HAS_HOST) - argv[ARG_HOST] = UTF8STRING(isolate, url->host); - if (url->flags & URL_FLAGS_HAS_QUERY) - argv[ARG_QUERY] = UTF8STRING(isolate, url->query); - if (url->flags & URL_FLAGS_HAS_FRAGMENT) - argv[ARG_FRAGMENT] = UTF8STRING(isolate, url->fragment); - if (url->port > -1) - argv[ARG_PORT] = Integer::New(isolate, url->port); - if (url->flags & URL_FLAGS_HAS_PATH) - argv[ARG_PATH] = Copy(env, url->path); + argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags); + argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str()); + if (url.flags & URL_FLAGS_HAS_USERNAME) + argv[ARG_USERNAME] = Utf8String(isolate, url.username); + if (url.flags & URL_FLAGS_HAS_PASSWORD) + argv[ARG_PASSWORD] = Utf8String(isolate, url.password); + if (url.flags & URL_FLAGS_HAS_HOST) + argv[ARG_HOST] = Utf8String(isolate, url.host); + if (url.flags & URL_FLAGS_HAS_QUERY) + argv[ARG_QUERY] = Utf8String(isolate, url.query); + if (url.flags & URL_FLAGS_HAS_FRAGMENT) + argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment); + if (url.port > -1) + argv[ARG_PORT] = Integer::New(isolate, url.port); + if (url.flags & URL_FLAGS_HAS_PATH) + argv[ARG_PATH] = ToJSStringArray(env, url.path); } static void Parse(Environment* env, @@ -2015,12 +2051,12 @@ static void Parse(Environment* env, const bool has_context = context_obj->IsObject(); const bool has_base = base_obj->IsObject(); - struct url_data base; - struct url_data url; + url_data base; + url_data url; if (has_context) - HarvestContext(env, &url, context_obj.As()); + url = HarvestContext(env, context_obj.As()); if (has_base) - HarvestBase(env, &base, base_obj.As()); + base = HarvestBase(env, base_obj.As()); URL::Parse(input, len, state_override, &url, has_context, &base, has_base); if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || @@ -2032,7 +2068,7 @@ static void Parse(Environment* env, const Local undef = Undefined(isolate); const Local null = Null(isolate); if (!(url.flags & URL_FLAGS_FAILED)) { - Local argv[9] = { + Local argv[] = { undef, undef, undef, @@ -2043,7 +2079,7 @@ static void Parse(Environment* env, null, // query defaults to null null, // fragment defaults to null }; - SetArgs(env, argv, &url); + SetArgs(env, argv, url); cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local()); } else if (error_cb->IsFunction()) { Local argv[2] = { undef, undef }; @@ -2152,7 +2188,7 @@ static void DomainToASCII(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); return; } - std::string out = host.ToString(); + std::string out = host.ToStringMove(); args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), out.c_str(), @@ -2172,7 +2208,7 @@ static void DomainToUnicode(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), "")); return; } - std::string out = host.ToString(); + std::string out = host.ToStringMove(); args.GetReturnValue().Set( String::NewFromUtf8(env->isolate(), out.c_str(), @@ -2255,7 +2291,7 @@ const Local URL::ToObject(Environment* env) const { if (context_.flags & URL_FLAGS_FAILED) return Local(); - Local argv[9] = { + Local argv[] = { undef, undef, undef, @@ -2266,7 +2302,7 @@ const Local URL::ToObject(Environment* env) const { null, // query defaults to null null, // fragment defaults to null }; - SetArgs(env, argv, &context_); + SetArgs(env, argv, context_); MaybeLocal ret; {