Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: remove explicit UTF-8 validity check in url #11859

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 0 additions & 30 deletions src/node_url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@
#include <stdio.h>
#include <cmath>

#if defined(NODE_HAVE_I18N_SUPPORT)
#include <unicode/utf8.h>
#include <unicode/utf.h>
#endif

#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD

namespace node {
Expand Down Expand Up @@ -113,21 +108,6 @@ namespace url {
output->assign(*buf, buf.length());
return true;
}

// Unfortunately there's not really a better way to do this.
// Iterate through each encoded codepoint and verify that
// it is a valid unicode codepoint.
static bool IsValidUTF8(std::string* input) {
const char* p = input->c_str();
int32_t len = input->length();
for (int32_t i = 0; i < len;) {
UChar32 c;
U8_NEXT_UNSAFE(p, i, c);
if (!U_IS_UNICODE_CHAR(c))
return false;
}
return true;
}
#else
// Intentional non-ops if ICU is not present.
static bool ToUnicode(std::string* input, std::string* output) {
Expand All @@ -139,10 +119,6 @@ namespace url {
*output = *input;
return true;
}

static bool IsValidUTF8(std::string* input) {
return true;
}
#endif

// If a UTF-16 character is a low/trailing surrogate.
Expand Down Expand Up @@ -395,12 +371,6 @@ namespace url {
if (PercentDecode(input, length, &decoded) < 0)
goto end;

// If there are any invalid UTF8 byte sequences, we have to fail.
// Unfortunately this means iterating through the string and checking
// each decoded codepoint.
if (!IsValidUTF8(&decoded))
goto end;

// Then we have to punycode toASCII
if (!ToASCII(&decoded, &decoded))
goto end;
Expand Down