Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src, url: WHATWG URL C++ parser cleanup #11917

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 31 additions & 23 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,9 @@ namespace url {
if (flags->IsInt32())
base->flags = flags->Int32Value(context).FromJust();

GET_AND_SET(env, base_obj, scheme, base, URL_FLAGS_HAS_SCHEME);
Local<Value> scheme = GET(env, base_obj, "scheme");
base->scheme = Utf8Value(env->isolate(), scheme).out();

GET_AND_SET(env, base_obj, username, base, URL_FLAGS_HAS_USERNAME);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually it would likely be better just to inline this and get rid of the macro

GET_AND_SET(env, base_obj, password, base, URL_FLAGS_HAS_PASSWORD);
GET_AND_SET(env, base_obj, host, base, URL_FLAGS_HAS_HOST);
Expand Down Expand Up @@ -644,7 +646,7 @@ namespace url {
state = kNoScheme;
continue;
} else {
url->flags |= URL_FLAGS_TERMINATED;
url->flags |= URL_FLAGS_FAILED;
return;
}
break;
Expand All @@ -654,10 +656,12 @@ namespace url {
p++;
continue;
} else if (ch == ':' || (has_state_override && ch == kEOL)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The scheme state handling seems to deviate quite a bit from the spec now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is unfortunately true. We already deviate from the spec by storing : as part of the scheme, coupled by the fact that we delegate some responsibilities to the JS layer, it's hard to follow the spec word-for-word.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as we're still API compliant and passing all the tests, such deviation is fine. It would be helpful, however, to document the deviations in code comments.

buffer += ':';
if (buffer.size() > 0) {
url->flags |= URL_FLAGS_HAS_SCHEME;
buffer += ':';
url->scheme = buffer;
} else if (has_state_override) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
Expand All @@ -672,7 +676,6 @@ namespace url {
state = kFile;
} else if (special &&
has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
url->scheme == base->scheme) {
state = kSpecialRelativeOrAuthority;
} else if (special) {
Expand All @@ -692,7 +695,7 @@ namespace url {
p = input;
continue;
} else {
url->flags |= URL_FLAGS_TERMINATED;
url->flags |= URL_FLAGS_FAILED;
return;
}
break;
Expand All @@ -702,7 +705,6 @@ namespace url {
url->flags |= URL_FLAGS_FAILED;
return;
} else if (cannot_be_base && ch == '#') {
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = base->scheme;
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
Expand All @@ -725,12 +727,10 @@ namespace url {
url->flags |= URL_FLAGS_CANNOT_BE_BASE;
state = kFragment;
} else if (has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme != "file:") {
state = kRelative;
continue;
} else {
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = "file:";
url->flags |= URL_FLAGS_SPECIAL;
special = true;
Expand All @@ -756,7 +756,6 @@ namespace url {
}
break;
case kRelative:
url->flags |= URL_FLAGS_HAS_SCHEME;
url->scheme = base->scheme;
if (IsSpecial(url->scheme)) {
url->flags |= URL_FLAGS_SPECIAL;
Expand Down Expand Up @@ -951,7 +950,6 @@ namespace url {
buffer.clear();
state = kPort;
if (state_override == kHostname) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
} else if (ch == kEOL ||
Expand All @@ -972,7 +970,6 @@ namespace url {
buffer.clear();
state = kPathStart;
if (has_state_override) {
url->flags |= URL_FLAGS_TERMINATED;
return;
}
} else {
Expand All @@ -996,13 +993,26 @@ namespace url {
int port = 0;
for (size_t i = 0; i < buffer.size(); i++)
port = port * 10 + buffer[i] - '0';
if (port >= 0 && port <= 0xffff) {
url->port = NormalizePort(url->scheme, port);
} else if (!has_state_override) {
url->flags |= URL_FLAGS_FAILED;
if (port < 0 || port > 0xffff) {
// TODO(TimothyGu): This hack is currently needed for the host
// setter since it needs access to hostname if it is valid, and
// if the FAILED flag is set the entire response to JS layer
// will be empty.
if (state_override == kHost)
url->port = -1;
else
url->flags |= URL_FLAGS_FAILED;
return;
}
url->port = NormalizePort(url->scheme, port);
buffer.clear();
} else if (has_state_override) {
// TODO(TimothyGu): Similar case as above.
if (state_override == kHost)
url->port = -1;
else
url->flags |= URL_FLAGS_TERMINATED;
return;
}
state = kPathStart;
continue;
Expand All @@ -1014,7 +1024,6 @@ namespace url {
case kFile:
base_is_file = (
has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme == "file:");
switch (ch) {
case kEOL:
Expand Down Expand Up @@ -1097,7 +1106,6 @@ namespace url {
state = kFileHost;
} else {
if (has_base &&
base->flags & URL_FLAGS_HAS_SCHEME &&
base->scheme == "file:" &&
base->flags & URL_FLAGS_HAS_PATH &&
base->path.size() > 0 &&
Expand Down Expand Up @@ -1158,8 +1166,7 @@ namespace url {
url->path.push_back("");
}
} else {
if (url->flags & URL_FLAGS_HAS_SCHEME &&
url->scheme == "file:" &&
if (url->scheme == "file:" &&
url->path.empty() &&
buffer.size() == 2 &&
WINDOWS_DRIVE_LETTER(buffer[0], buffer[1])) {
Expand Down Expand Up @@ -1233,8 +1240,7 @@ namespace url {
const struct url_data* url) {
Isolate* isolate = env->isolate();
argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url->flags);
if (url->flags & URL_FLAGS_HAS_SCHEME)
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
argv[ARG_PROTOCOL] = OneByteString(isolate, url->scheme.c_str());
if (url->flags & URL_FLAGS_HAS_USERNAME)
argv[ARG_USERNAME] = UTF8STRING(isolate, url->username);
if (url->flags & URL_FLAGS_HAS_PASSWORD)
Expand Down Expand Up @@ -1275,7 +1281,9 @@ namespace url {
HarvestBase(env, &base, base_obj.As<Object>());

URL::Parse(input, len, state_override, &url, &base, has_base);
if (url.flags & URL_FLAGS_INVALID_PARSE_STATE)
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
((state_override != kUnknownState) &&
(url.flags & URL_FLAGS_TERMINATED)))
return;

// Define the return value placeholders
Expand Down
13 changes: 6 additions & 7 deletions src/node_url.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,13 +451,12 @@ static inline void PercentDecode(const char* input,
XX(URL_FLAGS_INVALID_PARSE_STATE, 0x04) \
XX(URL_FLAGS_TERMINATED, 0x08) \
XX(URL_FLAGS_SPECIAL, 0x10) \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likely worthwhile to decrement each of the remaining flags accordingly so that there's not a gap.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. I was planning on some more refactoring, but that didn't happen.

XX(URL_FLAGS_HAS_SCHEME, 0x20) \
XX(URL_FLAGS_HAS_USERNAME, 0x40) \
XX(URL_FLAGS_HAS_PASSWORD, 0x80) \
XX(URL_FLAGS_HAS_HOST, 0x100) \
XX(URL_FLAGS_HAS_PATH, 0x200) \
XX(URL_FLAGS_HAS_QUERY, 0x400) \
XX(URL_FLAGS_HAS_FRAGMENT, 0x800)
XX(URL_FLAGS_HAS_USERNAME, 0x20) \
XX(URL_FLAGS_HAS_PASSWORD, 0x40) \
XX(URL_FLAGS_HAS_HOST, 0x80) \
XX(URL_FLAGS_HAS_PATH, 0x100) \
XX(URL_FLAGS_HAS_QUERY, 0x200) \
XX(URL_FLAGS_HAS_FRAGMENT, 0x400)

#define ARGS(XX) \
XX(ARG_FLAGS) \
Expand Down