From 89c96ade532921fada1fe403bb1251d4f6997841 Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Fri, 18 Oct 2024 12:47:50 -0400 Subject: [PATCH] src: remove icu based `ToASCII` and `ToUnicode` PR-URL: https://github.com/nodejs/node/pull/55156 Reviewed-By: James M Snell Reviewed-By: Matthew Aitken Reviewed-By: Daniel Lemire Reviewed-By: Richard Lau --- src/node_i18n.cc | 172 +----------------------- src/node_i18n.h | 13 -- test/fixtures/icu-punycode-toascii.json | 149 -------------------- test/parallel/test-icu-punycode.js | 57 -------- 4 files changed, 2 insertions(+), 389 deletions(-) delete mode 100644 test/fixtures/icu-punycode-toascii.json delete mode 100644 test/parallel/test-icu-punycode.js diff --git a/src/node_i18n.cc b/src/node_i18n.cc index 43bb68351bf0a6..0bcf10a0b35acc 100644 --- a/src/node_i18n.cc +++ b/src/node_i18n.cc @@ -60,19 +60,17 @@ #include #include #include -#include -#include #include #include -#include #include -#include #include #include #include #include "nbytes.h" #ifdef NODE_HAVE_SMALL_ICU +#include + /* if this is defined, we have a 'secondary' entry point. compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */ #define SMALL_ICUDATA_ENTRY_POINT \ @@ -96,7 +94,6 @@ using v8::Int32; using v8::Isolate; using v8::Local; using v8::MaybeLocal; -using v8::NewStringType; using v8::Object; using v8::ObjectTemplate; using v8::String; @@ -583,167 +580,6 @@ void SetDefaultTimeZone(const char* tzid) { CHECK(U_SUCCESS(status)); } -int32_t ToUnicode(MaybeStackBuffer* buf, - const char* input, - size_t length) { - UErrorCode status = U_ZERO_ERROR; - uint32_t options = UIDNA_NONTRANSITIONAL_TO_UNICODE; - UIDNA* uidna = uidna_openUTS46(options, &status); - if (U_FAILURE(status)) - return -1; - UIDNAInfo info = UIDNA_INFO_INITIALIZER; - - int32_t len = uidna_nameToUnicodeUTF8(uidna, - input, length, - **buf, buf->capacity(), - &info, - &status); - - // Do not check info.errors like we do with ToASCII since ToUnicode always - // returns a string, despite any possible errors that may have occurred. - - if (status == U_BUFFER_OVERFLOW_ERROR) { - status = U_ZERO_ERROR; - buf->AllocateSufficientStorage(len); - len = uidna_nameToUnicodeUTF8(uidna, - input, length, - **buf, buf->capacity(), - &info, - &status); - } - - // info.errors is ignored as UTS #46 ToUnicode always produces a Unicode - // string, regardless of whether an error occurred. - - if (U_FAILURE(status)) { - len = -1; - buf->SetLength(0); - } else { - buf->SetLength(len); - } - - uidna_close(uidna); - return len; -} - -int32_t ToASCII(MaybeStackBuffer* buf, - const char* input, - size_t length, - idna_mode mode) { - UErrorCode status = U_ZERO_ERROR; - uint32_t options = // CheckHyphens = false; handled later - UIDNA_CHECK_BIDI | // CheckBidi = true - UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true - UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing - if (mode == idna_mode::kStrict) { - options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict - // VerifyDnsLength = beStrict; - // handled later - } - - UIDNA* uidna = uidna_openUTS46(options, &status); - if (U_FAILURE(status)) - return -1; - UIDNAInfo info = UIDNA_INFO_INITIALIZER; - - int32_t len = uidna_nameToASCII_UTF8(uidna, - input, length, - **buf, buf->capacity(), - &info, - &status); - - if (status == U_BUFFER_OVERFLOW_ERROR) { - status = U_ZERO_ERROR; - buf->AllocateSufficientStorage(len); - len = uidna_nameToASCII_UTF8(uidna, - input, length, - **buf, buf->capacity(), - &info, - &status); - } - - // In UTS #46 which specifies ToASCII, certain error conditions are - // configurable through options, and the WHATWG URL Standard promptly elects - // to disable some of them to accommodate for real-world use cases. - // Unfortunately, ICU4C's IDNA module does not support disabling some of - // these options through `options` above, and thus continues throwing - // unnecessary errors. To counter this situation, we just filter out the - // errors that may have happened afterwards, before deciding whether to - // return an error from this function. - - // CheckHyphens = false - // (Specified in the current UTS #46 draft rev. 18.) - // Refs: - // - https://github.com/whatwg/url/issues/53 - // - https://github.com/whatwg/url/pull/309 - // - http://www.unicode.org/review/pri317/ - // - http://www.unicode.org/reports/tr46/tr46-18.html - // - https://www.icann.org/news/announcement-2000-01-07-en - info.errors &= ~UIDNA_ERROR_HYPHEN_3_4; - info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN; - info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN; - - if (mode != idna_mode::kStrict) { - // VerifyDnsLength = beStrict - info.errors &= ~UIDNA_ERROR_EMPTY_LABEL; - info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG; - info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG; - } - - if (U_FAILURE(status) || (mode != idna_mode::kLenient && info.errors != 0)) { - len = -1; - buf->SetLength(0); - } else { - buf->SetLength(len); - } - - uidna_close(uidna); - return len; -} - -static void ToUnicode(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value val(env->isolate(), args[0]); - - MaybeStackBuffer buf; - int32_t len = ToUnicode(&buf, *val, val.length()); - - if (len < 0) { - return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to Unicode"); - } - - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), - *buf, - NewStringType::kNormal, - len).ToLocalChecked()); -} - -static void ToASCII(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - CHECK_GE(args.Length(), 1); - CHECK(args[0]->IsString()); - Utf8Value val(env->isolate(), args[0]); - // optional arg - bool lenient = args[1]->BooleanValue(env->isolate()); - idna_mode mode = lenient ? idna_mode::kLenient : idna_mode::kDefault; - - MaybeStackBuffer buf; - int32_t len = ToASCII(&buf, *val, val.length(), mode); - - if (len < 0) { - return THROW_ERR_INVALID_ARG_VALUE(env, "Cannot convert name to ASCII"); - } - - args.GetReturnValue().Set( - String::NewFromUtf8(env->isolate(), - *buf, - NewStringType::kNormal, - len).ToLocalChecked()); -} - // This is similar to wcwidth except that it takes the current unicode // character properties database into consideration, allowing it to // correctly calculate the column widths of things like emoji's and @@ -850,8 +686,6 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data, Local target) { Isolate* isolate = isolate_data->isolate(); - SetMethod(isolate, target, "toUnicode", ToUnicode); - SetMethod(isolate, target, "toASCII", ToASCII); SetMethod(isolate, target, "getStringWidth", GetStringWidth); // One-shot converters @@ -880,8 +714,6 @@ void CreatePerContextProperties(Local target, void* priv) {} void RegisterExternalReferences(ExternalReferenceRegistry* registry) { - registry->Register(ToUnicode); - registry->Register(ToASCII); registry->Register(GetStringWidth); registry->Register(ICUErrorName); registry->Register(Transcode); diff --git a/src/node_i18n.h b/src/node_i18n.h index e516282865fb18..6ba3c41fc4f056 100644 --- a/src/node_i18n.h +++ b/src/node_i18n.h @@ -53,19 +53,6 @@ enum class idna_mode { kStrict }; -// Implements the WHATWG URL Standard "domain to ASCII" algorithm. -// https://url.spec.whatwg.org/#concept-domain-to-ascii -int32_t ToASCII(MaybeStackBuffer* buf, - const char* input, - size_t length, - idna_mode mode = idna_mode::kDefault); - -// Implements the WHATWG URL Standard "domain to Unicode" algorithm. -// https://url.spec.whatwg.org/#concept-domain-to-unicode -int32_t ToUnicode(MaybeStackBuffer* buf, - const char* input, - size_t length); - struct ConverterDeleter { void operator()(UConverter* pointer) const { ucnv_close(pointer); } }; diff --git a/test/fixtures/icu-punycode-toascii.json b/test/fixtures/icu-punycode-toascii.json deleted file mode 100644 index 814f06e794866d..00000000000000 --- a/test/fixtures/icu-punycode-toascii.json +++ /dev/null @@ -1,149 +0,0 @@ -[ - "This resource is focused on highlighting issues with UTS #46 ToASCII", - { - "comment": "Label with hyphens in 3rd and 4th position", - "input": "aa--", - "output": "aa--" - }, - { - "input": "a†--", - "output": "xn--a---kp0a" - }, - { - "input": "ab--c", - "output": "ab--c" - }, - { - "comment": "Label with leading hyphen", - "input": "-x", - "output": "-x" - }, - { - "input": "-†", - "output": "xn----xhn" - }, - { - "input": "-x.xn--nxa", - "output": "-x.xn--nxa" - }, - { - "input": "-x.β", - "output": "-x.xn--nxa" - }, - { - "comment": "Label with trailing hyphen", - "input": "x-.xn--nxa", - "output": "x-.xn--nxa" - }, - { - "input": "x-.β", - "output": "x-.xn--nxa" - }, - { - "comment": "Empty labels", - "input": "x..xn--nxa", - "output": "x..xn--nxa" - }, - { - "input": "x..β", - "output": "x..xn--nxa" - }, - { - "comment": "Invalid Punycode", - "input": "xn--a", - "output": null - }, - { - "input": "xn--a.xn--nxa", - "output": null - }, - { - "input": "xn--a.β", - "output": null - }, - { - "comment": "Valid Punycode", - "input": "xn--nxa.xn--nxa", - "output": "xn--nxa.xn--nxa" - }, - { - "comment": "Mixed", - "input": "xn--nxa.β", - "output": "xn--nxa.xn--nxa" - }, - { - "input": "ab--c.xn--nxa", - "output": "ab--c.xn--nxa" - }, - { - "input": "ab--c.β", - "output": "ab--c.xn--nxa" - }, - { - "comment": "CheckJoiners is true", - "input": "\u200D.example", - "output": null - }, - { - "input": "xn--1ug.example", - "output": null - }, - { - "comment": "CheckBidi is true", - "input": "يa", - "output": null - }, - { - "input": "xn--a-yoc", - "output": null - }, - { - "comment": "processing_option is Nontransitional_Processing", - "input": "ශ්‍රී", - "output": "xn--10cl1a0b660p" - }, - { - "input": "نامه‌ای", - "output": "xn--mgba3gch31f060k" - }, - { - "comment": "U+FFFD", - "input": "\uFFFD.com", - "output": null - }, - { - "comment": "U+FFFD character encoded in Punycode", - "input": "xn--zn7c.com", - "output": null - }, - { - "comment": "Label longer than 63 code points", - "input": "x01234567890123456789012345678901234567890123456789012345678901x", - "output": "x01234567890123456789012345678901234567890123456789012345678901x" - }, - { - "input": "x01234567890123456789012345678901234567890123456789012345678901†", - "output": "xn--x01234567890123456789012345678901234567890123456789012345678901-6963b" - }, - { - "input": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa", - "output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa" - }, - { - "input": "x01234567890123456789012345678901234567890123456789012345678901x.β", - "output": "x01234567890123456789012345678901234567890123456789012345678901x.xn--nxa" - }, - { - "comment": "Domain excluding TLD longer than 253 code points", - "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x", - "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.x" - }, - { - "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa", - "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa" - }, - { - "input": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.β", - "output": "01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.01234567890123456789012345678901234567890123456789.0123456789012345678901234567890123456789012345678.xn--nxa" - } -] diff --git a/test/parallel/test-icu-punycode.js b/test/parallel/test-icu-punycode.js deleted file mode 100644 index 29e88f9b9a6262..00000000000000 --- a/test/parallel/test-icu-punycode.js +++ /dev/null @@ -1,57 +0,0 @@ -'use strict'; -// Flags: --expose-internals -const common = require('../common'); - -if (!common.hasIntl) - common.skip('missing Intl'); - -const { internalBinding } = require('internal/test/binding'); -const icu = internalBinding('icu'); -const assert = require('assert'); - -// Test hasConverter method -assert(icu.hasConverter('utf-8'), - 'hasConverter should report converter exists for utf-8'); -assert(!icu.hasConverter('x'), - 'hasConverter should report converter does not exist for x'); - -const tests = require('../fixtures/url-idna.js'); -const fixtures = require('../fixtures/icu-punycode-toascii.json'); - -{ - for (const [i, { ascii, unicode }] of tests.entries()) { - assert.strictEqual(ascii, icu.toASCII(unicode), `toASCII(${i + 1})`); - assert.strictEqual(unicode, icu.toUnicode(ascii), `toUnicode(${i + 1})`); - assert.strictEqual(ascii, icu.toASCII(icu.toUnicode(ascii)), - `toASCII(toUnicode(${i + 1}))`); - assert.strictEqual(unicode, icu.toUnicode(icu.toASCII(unicode)), - `toUnicode(toASCII(${i + 1}))`); - } -} - -{ - for (const [i, test] of fixtures.entries()) { - if (typeof test === 'string') - continue; // skip comments - const { comment, input, output } = test; - let caseComment = `case ${i + 1}`; - if (comment) - caseComment += ` (${comment})`; - if (output === null) { - assert.throws( - () => icu.toASCII(input), - { - code: 'ERR_INVALID_ARG_VALUE', - name: 'TypeError', - message: 'Cannot convert name to ASCII' - } - ); - icu.toASCII(input, true); // Should not throw. - } else { - assert.strictEqual(icu.toASCII(input), output, `ToASCII ${caseComment}`); - assert.strictEqual(icu.toASCII(input, true), output, - `ToASCII ${caseComment} in lenient mode`); - } - icu.toUnicode(input); // Should not throw. - } -}