-
Notifications
You must be signed in to change notification settings - Fork 29.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
src, url: WHATWG URL C++ parser cleanup #11917
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -494,7 +494,9 @@ namespace url { | |
if (flags->IsInt32()) | ||
base->flags = flags->Int32Value(context).FromJust(); | ||
|
||
GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME); | ||
Local<Value> scheme = GET(env, base_obj, "scheme"); | ||
base->scheme = Utf8Value(env->isolate(), scheme).out(); | ||
|
||
GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME); | ||
GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD); | ||
GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST); | ||
|
@@ -644,7 +646,7 @@ namespace url { | |
state = kNoScheme; | ||
continue; | ||
} else { | ||
url->flags |= URL_FLAGS_TERMINATED; | ||
url->flags |= URL_FLAGS_FAILED; | ||
return; | ||
} | ||
break; | ||
|
@@ -654,10 +656,12 @@ namespace url { | |
p++; | ||
continue; | ||
} else if (ch == ':' || (has_state_override && ch == kEOL)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is unfortunately true. We already deviate from the spec by storing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As long as we're still API compliant and passing all the tests, such deviation is fine. It would be helpful, however, to document the deviations in code comments. |
||
buffer += ':'; | ||
if (buffer.size() > 0) { | ||
url->flags |= URL_FLAGS_HAS_SCHEME; | ||
buffer += ':'; | ||
url->scheme = buffer; | ||
} else if (has_state_override) { | ||
url->flags |= URL_FLAGS_TERMINATED; | ||
return; | ||
} | ||
if (IsSpecial(url->scheme)) { | ||
url->flags |= URL_FLAGS_SPECIAL; | ||
|
@@ -672,7 +676,6 @@ namespace url { | |
state = kFile; | ||
} else if (special && | ||
has_base && | ||
base->flags & URL_FLAGS_HAS_SCHEME && | ||
url->scheme == base->scheme) { | ||
state = kSpecialRelativeOrAuthority; | ||
} else if (special) { | ||
|
@@ -692,7 +695,7 @@ namespace url { | |
p = input; | ||
continue; | ||
} else { | ||
url->flags |= URL_FLAGS_TERMINATED; | ||
url->flags |= URL_FLAGS_FAILED; | ||
return; | ||
} | ||
break; | ||
|
@@ -702,7 +705,6 @@ namespace url { | |
url->flags |= URL_FLAGS_FAILED; | ||
return; | ||
} else if (cannot_be_base && ch == '#') { | ||
url->flags |= URL_FLAGS_HAS_SCHEME; | ||
url->scheme = base->scheme; | ||
if (IsSpecial(url->scheme)) { | ||
url->flags |= URL_FLAGS_SPECIAL; | ||
|
@@ -725,12 +727,10 @@ namespace url { | |
url->flags |= URL_FLAGS_CANNOT_BE_BASE; | ||
state = kFragment; | ||
} else if (has_base && | ||
base->flags & URL_FLAGS_HAS_SCHEME && | ||
base->scheme != "file:") { | ||
state = kRelative; | ||
continue; | ||
} else { | ||
url->flags |= URL_FLAGS_HAS_SCHEME; | ||
url->scheme = "file:"; | ||
url->flags |= URL_FLAGS_SPECIAL; | ||
special = true; | ||
|
@@ -756,7 +756,6 @@ namespace url { | |
} | ||
break; | ||
case kRelative: | ||
url->flags |= URL_FLAGS_HAS_SCHEME; | ||
url->scheme = base->scheme; | ||
if (IsSpecial(url->scheme)) { | ||
url->flags |= URL_FLAGS_SPECIAL; | ||
|
@@ -951,7 +950,6 @@ namespace url { | |
buffer.clear(); | ||
state = kPort; | ||
if (state_override == kHostname) { | ||
url->flags |= URL_FLAGS_TERMINATED; | ||
return; | ||
} | ||
} else if (ch == kEOL || | ||
|
@@ -972,7 +970,6 @@ namespace url { | |
buffer.clear(); | ||
state = kPathStart; | ||
if (has_state_override) { | ||
url->flags |= URL_FLAGS_TERMINATED; | ||
return; | ||
} | ||
} else { | ||
|
@@ -996,13 +993,26 @@ namespace url { | |
int port = 0; | ||
for (size_t i = 0; i < buffer.size(); i++) | ||
port = port * 10 + buffer[i] - '0'; | ||
if (port >= 0 && port <= 0xffff) { | ||
url->port = NormalizePort(url->scheme, port); | ||
} else if (!has_state_override) { | ||
url->flags |= URL_FLAGS_FAILED; | ||
if (port < 0 || port > 0xffff) { | ||
// TODO(TimothyGu): This hack is currently needed for the host | ||
// setter since it needs access to hostname if it is valid, and | ||
// if the FAILED flag is set the entire response to JS layer | ||
// will be empty. | ||
if (state_override == kHost) | ||
url->port = -1; | ||
else | ||
url->flags |= URL_FLAGS_FAILED; | ||
return; | ||
} | ||
url->port = NormalizePort(url->scheme, port); | ||
buffer.clear(); | ||
} else if (has_state_override) { | ||
// TODO(TimothyGu): Similar case as above. | ||
if (state_override == kHost) | ||
url->port = -1; | ||
else | ||
url->flags |= URL_FLAGS_TERMINATED; | ||
return; | ||
} | ||
state = kPathStart; | ||
continue; | ||
|
@@ -1014,7 +1024,6 @@ namespace url { | |
case kFile: | ||
base_is_file = ( | ||
has_base && | ||
base->flags & URL_FLAGS_HAS_SCHEME && | ||
base->scheme == "file:"); | ||
switch (ch) { | ||
case kEOL: | ||
|
@@ -1097,7 +1106,6 @@ namespace url { | |
state = kFileHost; | ||
} else { | ||
if (has_base && | ||
base->flags & URL_FLAGS_HAS_SCHEME && | ||
base->scheme == "file:" && | ||
base->flags & URL_FLAGS_HAS_PATH && | ||
base->path.size() > 0 && | ||
|
@@ -1158,8 +1166,7 @@ namespace url { | |
url->path.push_back(""); | ||
} | ||
} else { | ||
if (url->flags & URL_FLAGS_HAS_SCHEME && | ||
url->scheme == "file:" && | ||
if (url->scheme == "file:" && | ||
url->path.empty() && | ||
buffer.size() == 2 && | ||
WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) { | ||
|
@@ -1233,8 +1240,7 @@ namespace url { | |
const struct url_data* url) { | ||
Isolate* isolate = env->isolate(); | ||
argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags); | ||
if (url->flags & URL_FLAGS_HAS_SCHEME) | ||
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); | ||
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str()); | ||
if (url->flags & URL_FLAGS_HAS_USERNAME) | ||
argv[ARG_USERNAME] = UTF8STRING(isolate, url->username); | ||
if (url->flags & URL_FLAGS_HAS_PASSWORD) | ||
|
@@ -1275,7 +1281,9 @@ namespace url { | |
HarvestBase(env, &base, base_obj.As<Object>()); | ||
|
||
URL::Parse(input, len, state_override, &url, &base, has_base); | ||
if (url.flags & URL_FLAGS_INVALID_PARSE_STATE) | ||
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) || | ||
((state_override != kUnknownState) && | ||
(url.flags & URL_FLAGS_TERMINATED))) | ||
return; | ||
|
||
// Define the return value placeholders | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -451,13 +451,12 @@ static inline void PercentDecode(const char* input, | |
XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \ | ||
XX(URL_FLAGS_TERMINATED, 0x08) \ | ||
XX(URL_FLAGS_SPECIAL, 0x10) \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likely worthwhile to decrement each of the remaining flags accordingly so that there's not a gap. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. I was planning on some more refactoring, but that didn't happen. |
||
XX(URL_FLAGS_HAS_SCHEME, 0x20) \ | ||
XX(URL_FLAGS_HAS_USERNAME, 0x40) \ | ||
XX(URL_FLAGS_HAS_PASSWORD, 0x80) \ | ||
XX(URL_FLAGS_HAS_HOST, 0x100) \ | ||
XX(URL_FLAGS_HAS_PATH, 0x200) \ | ||
XX(URL_FLAGS_HAS_QUERY, 0x400) \ | ||
XX(URL_FLAGS_HAS_FRAGMENT, 0x800) | ||
XX(URL_FLAGS_HAS_USERNAME, 0x20) \ | ||
XX(URL_FLAGS_HAS_PASSWORD, 0x40) \ | ||
XX(URL_FLAGS_HAS_HOST, 0x80) \ | ||
XX(URL_FLAGS_HAS_PATH, 0x100) \ | ||
XX(URL_FLAGS_HAS_QUERY, 0x200) \ | ||
XX(URL_FLAGS_HAS_FRAGMENT, 0x400) | ||
|
||
#define ARGS(XX) \ | ||
XX(ARG_FLAGS) \ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Eventually it would likely be better just to inline this and get rid of the macro