-
Notifications
You must be signed in to change notification settings - Fork 30.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
src: do not ignore IDNA conversion error #11549
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -410,7 +410,8 @@ bool InitializeICUDirectory(const std::string& path) { | |
|
||
int32_t ToUnicode(MaybeStackBuffer<char>* buf, | ||
const char* input, | ||
size_t length) { | ||
size_t length, | ||
bool lenient) { | ||
UErrorCode status = U_ZERO_ERROR; | ||
uint32_t options = UIDNA_DEFAULT; | ||
options |= UIDNA_NONTRANSITIONAL_TO_UNICODE; | ||
|
@@ -435,7 +436,7 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf, | |
&status); | ||
} | ||
|
||
if (U_FAILURE(status)) { | ||
if (U_FAILURE(status) || (!lenient && info.errors != 0)) { | ||
len = -1; | ||
buf->SetLength(0); | ||
} else { | ||
|
@@ -448,7 +449,8 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf, | |
|
||
int32_t ToASCII(MaybeStackBuffer<char>* buf, | ||
const char* input, | ||
size_t length) { | ||
size_t length, | ||
bool lenient) { | ||
UErrorCode status = U_ZERO_ERROR; | ||
uint32_t options = UIDNA_DEFAULT; | ||
options |= UIDNA_NONTRANSITIONAL_TO_ASCII; | ||
|
@@ -473,7 +475,7 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf, | |
&status); | ||
} | ||
|
||
if (U_FAILURE(status)) { | ||
if (U_FAILURE(status) || (!lenient && info.errors != 0)) { | ||
len = -1; | ||
buf->SetLength(0); | ||
} else { | ||
|
@@ -489,8 +491,11 @@ static void ToUnicode(const FunctionCallbackInfo<Value>& args) { | |
CHECK_GE(args.Length(), 1); | ||
CHECK(args[0]->IsString()); | ||
Utf8Value val(env->isolate(), args[0]); | ||
// optional arg | ||
bool lenient = args[1]->BooleanValue(env->context()).FromJust(); | ||
|
||
MaybeStackBuffer<char> buf; | ||
int32_t len = ToUnicode(&buf, *val, val.length()); | ||
int32_t len = ToUnicode(&buf, *val, val.length(), lenient); | ||
|
||
if (len < 0) { | ||
return env->ThrowError("Cannot convert name to Unicode"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this error part of any non-experimental API? Could we change it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No; in fact the /cc @jasnell There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it's not used, it can be removed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Remove which function specifically? The `i18n::ToUnicode' function is definitely used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jasnell, the exposed |
||
|
@@ -508,8 +513,11 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) { | |
CHECK_GE(args.Length(), 1); | ||
CHECK(args[0]->IsString()); | ||
Utf8Value val(env->isolate(), args[0]); | ||
// optional arg | ||
bool lenient = args[1]->BooleanValue(env->context()).FromJust(); | ||
|
||
MaybeStackBuffer<char> buf; | ||
int32_t len = ToASCII(&buf, *val, val.length()); | ||
int32_t len = ToASCII(&buf, *val, val.length(), lenient); | ||
|
||
if (len < 0) { | ||
return env->ThrowError("Cannot convert name to ASCII"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this error part of any non-experimental API? Could we change it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes for > url.parse(`http://${'é'.repeat(230)}.com/`)
Error: Cannot convert name to ASCII |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
'use strict'; | ||
|
||
// Credit for list: http://www.i18nguy.com/markup/idna-examples.html | ||
module.exports = { | ||
valid: [ | ||
{ ascii: 'xn--mgbaal8b0b9b2b.icom.museum', | ||
unicode: 'افغانستا.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--lgbbat1ad8j.icom.museum', | ||
unicode: 'الجزائر.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--sterreich-z7a.icom.museum', | ||
unicode: 'österreich.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--54b6eqazv8bc7e.icom.museum', | ||
unicode: 'বাংলাদেশ.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--80abmy0agn7e.icom.museum', | ||
unicode: 'беларусь.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--belgi-rsa.icom.museum', | ||
unicode: 'belgië.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--80abgvm6a7d2b.icom.museum', | ||
unicode: 'българия.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--mgbfqim.icom.museum', | ||
unicode: 'تشادر.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--fiqs8s.icom.museum', | ||
unicode: '中国.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--mgbu4chg.icom.museum', | ||
unicode: 'القمر.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--vxakcego.icom.museum', | ||
unicode: 'κυπρος.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--eskrepublika-ebb62d.icom.museum', | ||
unicode: 'českárepublika.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--wgbh1c.icom.museum', | ||
unicode: 'مصر.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--hxakic4aa.icom.museum', | ||
unicode: 'ελλάδα.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--magyarorszg-t7a.icom.museum', | ||
unicode: 'magyarország.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--sland-ysa.icom.museum', | ||
unicode: 'ísland.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--h2brj9c.icom.museum', | ||
unicode: 'भारत.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--mgba3a4fra.icom.museum', | ||
unicode: 'ايران.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--ire-9la.icom.museum', | ||
unicode: 'éire.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--4dbklr2c8d.xn--4dbrk0ce.museum', | ||
unicode: 'איקו״ם.ישראל.museum' | ||
}, | ||
{ | ||
ascii: 'xn--wgv71a.icom.museum', | ||
unicode: '日本.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--igbhzh7gpa.icom.museum', | ||
unicode: 'الأردن.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--80aaa0a6awh12ed.icom.museum', | ||
unicode: 'қазақстан.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--3e0b707e.icom.museum', | ||
unicode: '한국.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--80afmksoji0fc.icom.museum', | ||
unicode: 'кыргызстан.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--q7ce6a.icom.museum', | ||
unicode: 'ລາວ.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--mgbb7fjb.icom.museum', | ||
unicode: 'لبنان.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--80aaldqjmmi6x.icom.museum', | ||
unicode: 'македонија.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--mgbah1a3hjkrd.icom.museum', | ||
unicode: 'موريتانيا.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--mxico-bsa.icom.museum', | ||
unicode: 'méxico.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--c1aqabffc0aq.icom.museum', | ||
unicode: 'монголулс.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--mgbc0a9azcg.icom.museum', | ||
unicode: 'المغرب.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--l2bey1c2b.icom.museum', | ||
unicode: 'नेपाल.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--mgb9awbf.icom.museum', | ||
unicode: 'عمان.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--wgbl6a.icom.museum', | ||
unicode: 'قطر.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--romnia-yta.icom.museum', | ||
unicode: 'românia.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--h1alffa9f.xn--h1aegh.museum', | ||
unicode: 'россия.иком.museum' | ||
}, | ||
{ | ||
ascii: 'xn--80aaabm1ab4blmeec9e7n.xn--h1aegh.museum', | ||
unicode: 'србијаицрнагора.иком.museum' | ||
}, | ||
{ | ||
ascii: 'xn--xkc2al3hye2a.icom.museum', | ||
unicode: 'இலங்கை.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--espaa-rta.icom.museum', | ||
unicode: 'españa.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--o3cw4h.icom.museum', | ||
unicode: 'ไทย.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--pgbs0dh.icom.museum', | ||
unicode: 'تونس.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--trkiye-3ya.icom.museum', | ||
unicode: 'türkiye.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--80aaxgrpt.icom.museum', | ||
unicode: 'украина.icom.museum' | ||
}, | ||
{ | ||
ascii: 'xn--vitnam-jk8b.icom.museum', | ||
unicode: 'việtnam.icom.museum' | ||
}, | ||
// long URL | ||
{ | ||
ascii: `${`${'a'.repeat(63)}.`.repeat(3)}com`, | ||
unicode: `${`${'a'.repeat(63)}.`.repeat(3)}com` | ||
} | ||
], | ||
invalid: [ | ||
// long label | ||
{ | ||
url: `${'a'.repeat(64)}.com`, | ||
mode: 'ascii' | ||
}, | ||
// long URL | ||
{ | ||
url: `${`${'a'.repeat(63)}.`.repeat(4)}com`, | ||
mode: 'ascii' | ||
}, | ||
// invalid character | ||
{ | ||
url: '\ufffd.com', | ||
mode: 'ascii' | ||
}, | ||
{ | ||
url: '\ufffd.com', | ||
mode: 'unicode' | ||
}, | ||
// invalid Punycode | ||
{ | ||
url: 'xn---abc.com', | ||
mode: 'unicode' | ||
} | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Btw, should this be
deserialization
, and mention that it is the inverse ofdomainToASCII
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is serialization, since the domain is fully parsed and subsequently serialized from the parsed form. It's just that it uses a different algorithm for
deserialization.