diff --git a/LICENSE b/LICENSE index 306e57a23decdf..dad0dccc0cc56d 100644 --- a/LICENSE +++ b/LICENSE @@ -109,9 +109,22 @@ The externally maintained libraries used by Node.js are: - ICU, located at deps/icu-small, is licensed as follows: """ - COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) + UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE - Copyright © 1991-2020 Unicode, Inc. All rights reserved. + See Terms of Use + for definitions of Unicode Inc.’s Data Files and Software. + + NOTICE TO USER: Carefully read the following legal agreement. + BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S + DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), + YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE + TERMS AND CONDITIONS OF THIS AGREEMENT. + IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE + THE DATA FILES OR SOFTWARE. + + COPYRIGHT AND PERMISSION NOTICE + + Copyright © 1991-2022 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in https://www.unicode.org/copyright.html. Permission is hereby granted, free of charge, to any person obtaining @@ -143,7 +156,7 @@ The externally maintained libraries used by Node.js are: use or other dealings in these Data Files or Software without prior written authorization of the copyright holder. - --------------------- + ---------------------------------------------------------------------- Third-Party Software Licenses @@ -151,7 +164,9 @@ The externally maintained libraries used by Node.js are: terms for licensed third-party software components included within ICU libraries. - 1. ICU License - ICU 1.8.1 to ICU 57.1 + ---------------------------------------------------------------------- + + ICU License - ICU 1.8.1 to ICU 57.1 COPYRIGHT AND PERMISSION NOTICE @@ -186,7 +201,9 @@ The externally maintained libraries used by Node.js are: All trademarks and registered trademarks mentioned herein are the property of their respective owners. - 2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt) + ---------------------------------------------------------------------- + + Chinese/Japanese Word Break Dictionary Data (cjdict.txt) # The Google Chrome software developed by Google is licensed under # the BSD license. Other software included in this distribution is @@ -390,7 +407,9 @@ The externally maintained libraries used by Node.js are: # # ---------------COPYING.ipadic-----END---------------------------------- - 3. Lao Word Break Dictionary Data (laodict.txt) + ---------------------------------------------------------------------- + + Lao Word Break Dictionary Data (laodict.txt) # Copyright (C) 2016 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html @@ -430,7 +449,9 @@ The externally maintained libraries used by Node.js are: # OF THE POSSIBILITY OF SUCH DAMAGE. # -------------------------------------------------------------------------- - 4. Burmese Word Break Dictionary Data (burmesedict.txt) + ---------------------------------------------------------------------- + + Burmese Word Break Dictionary Data (burmesedict.txt) # Copyright (c) 2014 International Business Machines Corporation # and others. All Rights Reserved. @@ -470,7 +491,9 @@ The externally maintained libraries used by Node.js are: # SUCH DAMAGE. # -------------------------------------------------------------------------- - 5. Time Zone Database + ---------------------------------------------------------------------- + + Time Zone Database ICU uses the public domain data and code derived from Time Zone Database for its time zone support. The ownership of the TZ database @@ -493,7 +516,9 @@ The externally maintained libraries used by Node.js are: # making a contribution to the database or code waives all rights to # future claims in that contribution or in the TZ Database. - 6. Google double-conversion + ---------------------------------------------------------------------- + + Google double-conversion Copyright 2006-2011, the V8 project authors. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -521,6 +546,83 @@ The externally maintained libraries used by Node.js are: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ---------------------------------------------------------------------- + + File: aclocal.m4 (only for ICU4C) + Section: pkg.m4 - Macros to locate and utilise pkg-config. + + Copyright © 2004 Scott James Remnant . + Copyright © 2012-2015 Dan Nicholson + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. + + As a special exception to the GNU General Public License, if you + distribute this file as part of a program that contains a + configuration script generated by Autoconf, you may include it under + the same distribution terms that you use for the rest of that + program. + + (The condition for the exception is fulfilled because + ICU4C includes a configuration script generated by Autoconf, + namely the `configure` script.) + + ---------------------------------------------------------------------- + + File: config.guess (only for ICU4C) + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . + + As a special exception to the GNU General Public License, if you + distribute this file as part of a program that contains a + configuration script generated by Autoconf, you may include it under + the same distribution terms that you use for the rest of that + program. This Exception is an additional permission under section 7 + of the GNU General Public License, version 3 ("GPLv3"). + + (The condition for the exception is fulfilled because + ICU4C includes a configuration script generated by Autoconf, + namely the `configure` script.) + + ---------------------------------------------------------------------- + + File: install-sh (only for ICU4C) + + Copyright 1991 by the Massachusetts Institute of Technology + + Permission to use, copy, modify, distribute, and sell this software and its + documentation for any purpose is hereby granted without fee, provided that + the above copyright notice appear in all copies and that both that + copyright notice and this permission notice appear in supporting + documentation, and that the name of M.I.T. not be used in advertising or + publicity pertaining to distribution of the software without specific, + written prior permission. M.I.T. makes no representations about the + suitability of this software for any purpose. It is provided "as is" + without express or implied warranty. """ - libuv, located at deps/uv, is licensed as follows: diff --git a/deps/icu-small/LICENSE b/deps/icu-small/LICENSE index 970ae074cbf555..80b587723a67f7 100644 --- a/deps/icu-small/LICENSE +++ b/deps/icu-small/LICENSE @@ -1,6 +1,19 @@ -COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) +UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE -Copyright © 1991-2020 Unicode, Inc. All rights reserved. +See Terms of Use +for definitions of Unicode Inc.’s Data Files and Software. + +NOTICE TO USER: Carefully read the following legal agreement. +BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S +DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), +YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE +TERMS AND CONDITIONS OF THIS AGREEMENT. +IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE +THE DATA FILES OR SOFTWARE. + +COPYRIGHT AND PERMISSION NOTICE + +Copyright © 1991-2022 Unicode, Inc. All rights reserved. Distributed under the Terms of Use in https://www.unicode.org/copyright.html. Permission is hereby granted, free of charge, to any person obtaining @@ -32,7 +45,7 @@ shall not be used in advertising or otherwise to promote the sale, use or other dealings in these Data Files or Software without prior written authorization of the copyright holder. ---------------------- +---------------------------------------------------------------------- Third-Party Software Licenses @@ -40,7 +53,9 @@ This section contains third-party software notices and/or additional terms for licensed third-party software components included within ICU libraries. -1. ICU License - ICU 1.8.1 to ICU 57.1 +---------------------------------------------------------------------- + +ICU License - ICU 1.8.1 to ICU 57.1 COPYRIGHT AND PERMISSION NOTICE @@ -75,7 +90,9 @@ of the copyright holder. All trademarks and registered trademarks mentioned herein are the property of their respective owners. -2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt) +---------------------------------------------------------------------- + +Chinese/Japanese Word Break Dictionary Data (cjdict.txt) # The Google Chrome software developed by Google is licensed under # the BSD license. Other software included in this distribution is @@ -279,7 +296,9 @@ property of their respective owners. # # ---------------COPYING.ipadic-----END---------------------------------- -3. Lao Word Break Dictionary Data (laodict.txt) +---------------------------------------------------------------------- + +Lao Word Break Dictionary Data (laodict.txt) # Copyright (C) 2016 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html @@ -319,7 +338,9 @@ property of their respective owners. # OF THE POSSIBILITY OF SUCH DAMAGE. # -------------------------------------------------------------------------- -4. Burmese Word Break Dictionary Data (burmesedict.txt) +---------------------------------------------------------------------- + +Burmese Word Break Dictionary Data (burmesedict.txt) # Copyright (c) 2014 International Business Machines Corporation # and others. All Rights Reserved. @@ -359,7 +380,9 @@ property of their respective owners. # SUCH DAMAGE. # -------------------------------------------------------------------------- -5. Time Zone Database +---------------------------------------------------------------------- + +Time Zone Database ICU uses the public domain data and code derived from Time Zone Database for its time zone support. The ownership of the TZ database @@ -382,7 +405,9 @@ Database section 7. # making a contribution to the database or code waives all rights to # future claims in that contribution or in the TZ Database. -6. Google double-conversion +---------------------------------------------------------------------- + +Google double-conversion Copyright 2006-2011, the V8 project authors. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -410,3 +435,85 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------------------------- + +File: aclocal.m4 (only for ICU4C) +Section: pkg.m4 - Macros to locate and utilise pkg-config. + + +Copyright © 2004 Scott James Remnant . +Copyright © 2012-2015 Dan Nicholson + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. + +As a special exception to the GNU General Public License, if you +distribute this file as part of a program that contains a +configuration script generated by Autoconf, you may include it under +the same distribution terms that you use for the rest of that +program. + + +(The condition for the exception is fulfilled because +ICU4C includes a configuration script generated by Autoconf, +namely the `configure` script.) + +---------------------------------------------------------------------- + +File: config.guess (only for ICU4C) + + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, see . + +As a special exception to the GNU General Public License, if you +distribute this file as part of a program that contains a +configuration script generated by Autoconf, you may include it under +the same distribution terms that you use for the rest of that +program. This Exception is an additional permission under section 7 +of the GNU General Public License, version 3 ("GPLv3"). + + +(The condition for the exception is fulfilled because +ICU4C includes a configuration script generated by Autoconf, +namely the `configure` script.) + +---------------------------------------------------------------------- + +File: install-sh (only for ICU4C) + + +Copyright 1991 by the Massachusetts Institute of Technology + +Permission to use, copy, modify, distribute, and sell this software and its +documentation for any purpose is hereby granted without fee, provided that +the above copyright notice appear in all copies and that both that +copyright notice and this permission notice appear in supporting +documentation, and that the name of M.I.T. not be used in advertising or +publicity pertaining to distribution of the software without specific, +written prior permission. M.I.T. makes no representations about the +suitability of this software for any purpose. It is provided "as is" +without express or implied warranty. diff --git a/deps/icu-small/README-FULL-ICU.txt b/deps/icu-small/README-FULL-ICU.txt index a6afcf4952945e..0c33485768901c 100644 --- a/deps/icu-small/README-FULL-ICU.txt +++ b/deps/icu-small/README-FULL-ICU.txt @@ -1,8 +1,8 @@ ICU sources - auto generated by shrink-icu-src.py This directory contains the ICU subset used by --with-intl=full-icu -It is a strict subset of ICU 70 source files with the following exception(s): -* deps/icu-small/source/data/in/icudt70l.dat.bz2 : compressed data file +It is a strict subset of ICU 71 source files with the following exception(s): +* deps/icu-small/source/data/in/icudt71l.dat.bz2 : compressed data file To rebuild this directory, see ../../tools/icu/README.md diff --git a/deps/icu-small/source/common/BUILD b/deps/icu-small/source/common/BUILD.bazel similarity index 100% rename from deps/icu-small/source/common/BUILD rename to deps/icu-small/source/common/BUILD.bazel diff --git a/deps/icu-small/source/common/brkeng.cpp b/deps/icu-small/source/common/brkeng.cpp index 52e9c53621dca2..dc9fb99bf1972d 100644 --- a/deps/icu-small/source/common/brkeng.cpp +++ b/deps/icu-small/source/common/brkeng.cpp @@ -79,6 +79,7 @@ UnhandledEngine::findBreaks( UText *text, int32_t /* startPos */, int32_t endPos, UVector32 &/*foundBreaks*/, + UBool /* isPhraseBreaking */, UErrorCode &status) const { if (U_FAILURE(status)) return 0; UChar32 c = utext_current32(text); diff --git a/deps/icu-small/source/common/brkeng.h b/deps/icu-small/source/common/brkeng.h index 6843f1cc953511..127ba59e186f23 100644 --- a/deps/icu-small/source/common/brkeng.h +++ b/deps/icu-small/source/common/brkeng.h @@ -75,6 +75,7 @@ class LanguageBreakEngine : public UMemory { int32_t startPos, int32_t endPos, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode &status) const = 0; }; @@ -194,6 +195,7 @@ class UnhandledEngine : public LanguageBreakEngine { int32_t startPos, int32_t endPos, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode &status) const override; /** diff --git a/deps/icu-small/source/common/brkiter.cpp b/deps/icu-small/source/common/brkiter.cpp index 8b228acf2c384c..8a1915880ee229 100644 --- a/deps/icu-small/source/common/brkiter.cpp +++ b/deps/icu-small/source/common/brkiter.cpp @@ -30,6 +30,7 @@ #include "unicode/ures.h" #include "unicode/ustring.h" #include "unicode/filteredbrk.h" +#include "bytesinkutil.h" #include "ucln_cmn.h" #include "cstring.h" #include "umutex.h" @@ -115,7 +116,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st } // Create a RuleBasedBreakIterator - result = new RuleBasedBreakIterator(file, status); + result = new RuleBasedBreakIterator(file, uprv_strstr(type, "phrase") != NULL, status); // If there is a result, set the valid locale and actual locale, and the kind if (U_SUCCESS(status) && result != NULL) { @@ -408,7 +409,6 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) if (U_FAILURE(status)) { return NULL; } - char lbType[kKeyValueLenMax]; BreakIterator *result = NULL; switch (kind) { @@ -428,18 +428,29 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) break; case UBRK_LINE: { + char lb_lw[kKeyValueLenMax]; UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE); - uprv_strcpy(lbType, "line"); - char lbKeyValue[kKeyValueLenMax] = {0}; + uprv_strcpy(lb_lw, "line"); UErrorCode kvStatus = U_ZERO_ERROR; - int32_t kLen = loc.getKeywordValue("lb", lbKeyValue, kKeyValueLenMax, kvStatus); - if (U_SUCCESS(kvStatus) && kLen > 0 && (uprv_strcmp(lbKeyValue,"strict")==0 || uprv_strcmp(lbKeyValue,"normal")==0 || uprv_strcmp(lbKeyValue,"loose")==0)) { - uprv_strcat(lbType, "_"); - uprv_strcat(lbType, lbKeyValue); + CharString value; + CharStringByteSink valueSink(&value); + loc.getKeywordValue("lb", valueSink, kvStatus); + if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) { + uprv_strcat(lb_lw, "_"); + uprv_strcat(lb_lw, value.data()); } - result = BreakIterator::buildInstance(loc, lbType, status); + // lw=phrase is only supported in Japanese. + if (uprv_strcmp(loc.getLanguage(), "ja") == 0) { + value.clear(); + loc.getKeywordValue("lw", valueSink, kvStatus); + if (U_SUCCESS(kvStatus) && value == "phrase") { + uprv_strcat(lb_lw, "_"); + uprv_strcat(lb_lw, value.data()); + } + } + result = BreakIterator::buildInstance(loc, lb_lw, status); - UTRACE_DATA1(UTRACE_INFO, "lb=%s", lbKeyValue); + UTRACE_DATA1(UTRACE_INFO, "lb_lw=%s", lb_lw); UTRACE_EXIT_STATUS(status); } break; diff --git a/deps/icu-small/source/common/dictbe.cpp b/deps/icu-small/source/common/dictbe.cpp index 4d158e3226db28..4fdbdf2760f1c6 100644 --- a/deps/icu-small/source/common/dictbe.cpp +++ b/deps/icu-small/source/common/dictbe.cpp @@ -17,7 +17,10 @@ #include "dictbe.h" #include "unicode/uniset.h" #include "unicode/chariter.h" +#include "unicode/resbund.h" #include "unicode/ubrk.h" +#include "unicode/usetiter.h" +#include "ubrkimpl.h" #include "utracimp.h" #include "uvectr32.h" #include "uvector.h" @@ -48,6 +51,7 @@ DictionaryBreakEngine::findBreaks( UText *text, int32_t startPos, int32_t endPos, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const { if (U_FAILURE(status)) return 0; (void)startPos; // TODO: remove this param? @@ -68,7 +72,7 @@ DictionaryBreakEngine::findBreaks( UText *text, } rangeStart = start; rangeEnd = current; - result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, status); + result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks, isPhraseBreaking, status); utext_setNativeIndex(text, current); return result; @@ -199,13 +203,13 @@ ThaiBreakEngine::ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode { UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Thai"); - fThaiWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]]"), status); + UnicodeSet thaiWordSet(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]]"), status); if (U_SUCCESS(status)) { - setCharacters(fThaiWordSet); + setCharacters(thaiWordSet); } - fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status); + fMarkSet.applyPattern(UnicodeString(u"[[:Thai:]&[:LineBreak=SA:]&[:M:]]"), status); fMarkSet.add(0x0020); - fEndWordSet = fThaiWordSet; + fEndWordSet = thaiWordSet; fEndWordSet.remove(0x0E31); // MAI HAN-AKAT fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK @@ -230,6 +234,7 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool /* isPhraseBreaking */, UErrorCode& status) const { if (U_FAILURE(status)) return 0; utext_setNativeIndex(text, rangeStart); @@ -441,13 +446,13 @@ LaoBreakEngine::LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &s { UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Laoo"); - fLaoWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]]"), status); + UnicodeSet laoWordSet(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]]"), status); if (U_SUCCESS(status)) { - setCharacters(fLaoWordSet); + setCharacters(laoWordSet); } - fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status); + fMarkSet.applyPattern(UnicodeString(u"[[:Laoo:]&[:LineBreak=SA:]&[:M:]]"), status); fMarkSet.add(0x0020); - fEndWordSet = fLaoWordSet; + fEndWordSet = laoWordSet; fEndWordSet.remove(0x0EC0, 0x0EC4); // prefix vowels fBeginWordSet.add(0x0E81, 0x0EAE); // basic consonants (including holes for corresponding Thai characters) fBeginWordSet.add(0x0EDC, 0x0EDD); // digraph consonants (no Thai equivalent) @@ -469,6 +474,7 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool /* isPhraseBreaking */, UErrorCode& status) const { if (U_FAILURE(status)) return 0; if ((rangeEnd - rangeStart) < LAO_MIN_WORD_SPAN) { @@ -637,14 +643,13 @@ BurmeseBreakEngine::BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErro { UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Mymr"); - fBurmeseWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]]"), status); + fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels + fEndWordSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]]"), status); + fMarkSet.applyPattern(UnicodeString(u"[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status); + fMarkSet.add(0x0020); if (U_SUCCESS(status)) { - setCharacters(fBurmeseWordSet); + setCharacters(fEndWordSet); } - fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Mymr:]&[:LineBreak=SA:]&[:M:]]"), status); - fMarkSet.add(0x0020); - fEndWordSet = fBurmeseWordSet; - fBeginWordSet.add(0x1000, 0x102A); // basic consonants and independent vowels // Compact for caching. fMarkSet.compact(); @@ -662,6 +667,7 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool /* isPhraseBreaking */, UErrorCode& status ) const { if (U_FAILURE(status)) return 0; if ((rangeEnd - rangeStart) < BURMESE_MIN_WORD_SPAN) { @@ -830,13 +836,13 @@ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCod { UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr"); - fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status); + UnicodeSet khmerWordSet(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]]"), status); if (U_SUCCESS(status)) { - setCharacters(fKhmerWordSet); + setCharacters(khmerWordSet); } - fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status); + fMarkSet.applyPattern(UnicodeString(u"[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status); fMarkSet.add(0x0020); - fEndWordSet = fKhmerWordSet; + fEndWordSet = khmerWordSet; fBeginWordSet.add(0x1780, 0x17B3); //fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels //fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word @@ -867,6 +873,7 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text, int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool /* isPhraseBreaking */, UErrorCode& status ) const { if (U_FAILURE(status)) return 0; if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) { @@ -1050,25 +1057,27 @@ CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType : DictionaryBreakEngine(), fDictionary(adoptDictionary) { UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE); UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Hani"); - // Korean dictionary only includes Hangul syllables - fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status); - fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status); - fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status); - fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status); nfkcNorm2 = Normalizer2::getNFKCInstance(status); - - if (U_SUCCESS(status)) { - // handle Korean and Japanese/Chinese using different dictionaries - if (type == kKorean) { + // Korean dictionary only includes Hangul syllables + fHangulWordSet.applyPattern(UnicodeString(u"[\\uac00-\\ud7a3]"), status); + fHangulWordSet.compact(); + // Digits, open puncutation and Alphabetic characters. + fDigitOrOpenPunctuationOrAlphabetSet.applyPattern( + UnicodeString(u"[[:Nd:][:Pi:][:Ps:][:Alphabetic:]]"), status); + fDigitOrOpenPunctuationOrAlphabetSet.compact(); + fClosePunctuationSet.applyPattern(UnicodeString(u"[[:Pc:][:Pd:][:Pe:][:Pf:][:Po:]]"), status); + fClosePunctuationSet.compact(); + + // handle Korean and Japanese/Chinese using different dictionaries + if (type == kKorean) { + if (U_SUCCESS(status)) { setCharacters(fHangulWordSet); - } else { //Chinese and Japanese - UnicodeSet cjSet; - cjSet.addAll(fHanWordSet); - cjSet.addAll(fKatakanaWordSet); - cjSet.addAll(fHiraganaWordSet); - cjSet.add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK - cjSet.add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK + } + } else { //Chinese and Japanese + UnicodeSet cjSet(UnicodeString(u"[[:Han:][:Hiragana:][:Katakana:]\\u30fc\\uff70\\uff9e\\uff9f]"), status); + if (U_SUCCESS(status)) { setCharacters(cjSet); + initJapanesePhraseParameter(status); } } UTRACE_EXIT_STATUS(status); @@ -1096,14 +1105,12 @@ static inline bool isKatakana(UChar32 value) { (value >= 0xFF66 && value <= 0xFF9f); } - // Function for accessing internal utext flags. // Replicates an internal UText function. static inline int32_t utext_i32_flag(int32_t bitIndex) { return (int32_t)1 << bitIndex; } - /* * @param text A UText representing the text @@ -1117,6 +1124,7 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const { if (U_FAILURE(status)) return 0; if (rangeStart >= rangeEnd) { @@ -1347,6 +1355,31 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, if ((uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) { t_boundary.addElement(numCodePts, status); numBreaks++; + } else if (isPhraseBreaking) { + t_boundary.addElement(numCodePts, status); + if(U_SUCCESS(status)) { + numBreaks++; + int32_t prevIdx = numCodePts; + + int32_t codeUnitIdx = -1; + int32_t prevCodeUnitIdx = -1; + int32_t length = -1; + for (int32_t i = prev.elementAti(numCodePts); i > 0; i = prev.elementAti(i)) { + codeUnitIdx = inString.moveIndex32(0, i); + prevCodeUnitIdx = inString.moveIndex32(0, prevIdx); + // Calculate the length by using the code unit. + length = prevCodeUnitIdx - codeUnitIdx; + prevIdx = i; + // Keep the breakpoint if the pattern is not in the fSkipSet and continuous Katakana + // characters don't occur. + if (!fSkipSet.containsKey(inString.tempSubString(codeUnitIdx, length)) + && (!isKatakana(inString.char32At(inString.moveIndex32(codeUnitIdx, -1))) + || !isKatakana(inString.char32At(codeUnitIdx)))) { + t_boundary.addElement(i, status); + numBreaks++; + } + } + } } else { for (int32_t i = numCodePts; i > 0; i = prev.elementAti(i)) { t_boundary.addElement(i, status); @@ -1367,7 +1400,8 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, // while reversing t_boundary and pushing values to foundBreaks. int32_t prevCPPos = -1; int32_t prevUTextPos = -1; - for (int32_t i = numBreaks-1; i >= 0; i--) { + int32_t correctedNumBreaks = 0; + for (int32_t i = numBreaks - 1; i >= 0; i--) { int32_t cpPos = t_boundary.elementAti(i); U_ASSERT(cpPos > prevCPPos); int32_t utextPos = inputMap.isValid() ? inputMap->elementAti(cpPos) : cpPos + rangeStart; @@ -1375,7 +1409,15 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, if (utextPos > prevUTextPos) { // Boundaries are added to foundBreaks output in ascending order. U_ASSERT(foundBreaks.size() == 0 || foundBreaks.peeki() < utextPos); - foundBreaks.push(utextPos, status); + // In phrase breaking, there has to be a breakpoint between Cj character and close + // punctuation. + // E.g.[携帯電話]正しい選択 -> [携帯▁電話]▁正しい▁選択 -> breakpoint between ] and 正 + if (utextPos != rangeStart + || (isPhraseBreaking && utextPos > 0 + && fClosePunctuationSet.contains(utext_char32At(inText, utextPos - 1)))) { + foundBreaks.push(utextPos, status); + correctedNumBreaks++; + } } else { // Normalization expanded the input text, the dictionary found a boundary // within the expansion, giving two boundaries with the same index in the @@ -1387,9 +1429,52 @@ CjkBreakEngine::divideUpDictionaryRange( UText *inText, } (void)prevCPPos; // suppress compiler warnings about unused variable + UChar32 nextChar = utext_char32At(inText, rangeEnd); + if (!foundBreaks.isEmpty() && foundBreaks.peeki() == rangeEnd) { + // In phrase breaking, there has to be a breakpoint between Cj character and + // the number/open punctuation. + // E.g. る文字「そうだ、京都」->る▁文字▁「そうだ、▁京都」-> breakpoint between 字 and「 + // E.g. 乗車率90%程度だろうか -> 乗車▁率▁90%▁程度だろうか -> breakpoint between 率 and 9 + // E.g. しかもロゴがUnicode! -> しかも▁ロゴが▁Unicode!-> breakpoint between が and U + if (isPhraseBreaking) { + if (!fDigitOrOpenPunctuationOrAlphabetSet.contains(nextChar)) { + foundBreaks.popi(); + correctedNumBreaks--; + } + } else { + foundBreaks.popi(); + correctedNumBreaks--; + } + } + // inString goes out of scope // inputMap goes out of scope - return numBreaks; + return correctedNumBreaks; +} + +void CjkBreakEngine::initJapanesePhraseParameter(UErrorCode& error) { + loadJapaneseExtensions(error); + loadHiragana(error); +} + +void CjkBreakEngine::loadJapaneseExtensions(UErrorCode& error) { + const char* tag = "extensions"; + ResourceBundle ja(U_ICUDATA_BRKITR, "ja", error); + if (U_SUCCESS(error)) { + ResourceBundle bundle = ja.get(tag, error); + while (U_SUCCESS(error) && bundle.hasNext()) { + fSkipSet.puti(bundle.getNextString(error), 1, error); + } + } +} + +void CjkBreakEngine::loadHiragana(UErrorCode& error) { + UnicodeSet hiraganaWordSet(UnicodeString(u"[:Hiragana:]"), error); + hiraganaWordSet.compact(); + UnicodeSetIterator iterator(hiraganaWordSet); + while (iterator.next()) { + fSkipSet.puti(UnicodeString(iterator.getCodepoint()), 1, error); + } } #endif diff --git a/deps/icu-small/source/common/dictbe.h b/deps/icu-small/source/common/dictbe.h index 4e70ed38171e44..ca1a3c28b7be80 100644 --- a/deps/icu-small/source/common/dictbe.h +++ b/deps/icu-small/source/common/dictbe.h @@ -15,6 +15,7 @@ #include "unicode/utext.h" #include "brkeng.h" +#include "hash.h" #include "uvectr32.h" U_NAMESPACE_BEGIN @@ -80,6 +81,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine { int32_t startPos, int32_t endPos, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status ) const override; protected: @@ -105,6 +107,7 @@ class DictionaryBreakEngine : public LanguageBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const = 0; }; @@ -127,7 +130,6 @@ class ThaiBreakEngine : public DictionaryBreakEngine { * @internal */ - UnicodeSet fThaiWordSet; UnicodeSet fEndWordSet; UnicodeSet fBeginWordSet; UnicodeSet fSuffixSet; @@ -164,6 +166,7 @@ class ThaiBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; @@ -186,7 +189,6 @@ class LaoBreakEngine : public DictionaryBreakEngine { * @internal */ - UnicodeSet fLaoWordSet; UnicodeSet fEndWordSet; UnicodeSet fBeginWordSet; UnicodeSet fMarkSet; @@ -222,6 +224,7 @@ class LaoBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; @@ -244,7 +247,6 @@ class BurmeseBreakEngine : public DictionaryBreakEngine { * @internal */ - UnicodeSet fBurmeseWordSet; UnicodeSet fEndWordSet; UnicodeSet fBeginWordSet; UnicodeSet fMarkSet; @@ -280,6 +282,7 @@ class BurmeseBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; @@ -302,7 +305,6 @@ class KhmerBreakEngine : public DictionaryBreakEngine { * @internal */ - UnicodeSet fKhmerWordSet; UnicodeSet fEndWordSet; UnicodeSet fBeginWordSet; UnicodeSet fMarkSet; @@ -338,6 +340,7 @@ class KhmerBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; @@ -366,13 +369,22 @@ class CjkBreakEngine : public DictionaryBreakEngine { * @internal */ UnicodeSet fHangulWordSet; - UnicodeSet fHanWordSet; - UnicodeSet fKatakanaWordSet; - UnicodeSet fHiraganaWordSet; + UnicodeSet fDigitOrOpenPunctuationOrAlphabetSet; + UnicodeSet fClosePunctuationSet; DictionaryMatcher *fDictionary; const Normalizer2 *nfkcNorm2; + private: + // Load Japanese extensions. + void loadJapaneseExtensions(UErrorCode& error); + // Load Japanese Hiragana. + void loadHiragana(UErrorCode& error); + // Initialize fSkipSet by loading Japanese Hiragana and extensions. + void initJapanesePhraseParameter(UErrorCode& error); + + Hashtable fSkipSet; + public: /** @@ -404,6 +416,7 @@ class CjkBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; }; diff --git a/deps/icu-small/source/common/localematcher.cpp b/deps/icu-small/source/common/localematcher.cpp index 3d178dfbaf1732..2cad708d99f0d2 100644 --- a/deps/icu-small/source/common/localematcher.cpp +++ b/deps/icu-small/source/common/localematcher.cpp @@ -168,12 +168,9 @@ void LocaleMatcher::Builder::clearSupportedLocales() { bool LocaleMatcher::Builder::ensureSupportedLocaleVector() { if (U_FAILURE(errorCode_)) { return false; } if (supportedLocales_ != nullptr) { return true; } - supportedLocales_ = new UVector(uprv_deleteUObject, nullptr, errorCode_); + LocalPointer lpSupportedLocales(new UVector(uprv_deleteUObject, nullptr, errorCode_), errorCode_); if (U_FAILURE(errorCode_)) { return false; } - if (supportedLocales_ == nullptr) { - errorCode_ = U_MEMORY_ALLOCATION_ERROR; - return false; - } + supportedLocales_ = lpSupportedLocales.orphan(); return true; } @@ -187,9 +184,8 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin for (int32_t i = 0; i < length; ++i) { Locale *locale = list.orphanLocaleAt(i); if (locale == nullptr) { continue; } - supportedLocales_->addElementX(locale, errorCode_); + supportedLocales_->adoptElement(locale, errorCode_); if (U_FAILURE(errorCode_)) { - delete locale; break; } } @@ -197,35 +193,21 @@ LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocalesFromListStrin } LocaleMatcher::Builder &LocaleMatcher::Builder::setSupportedLocales(Locale::Iterator &locales) { - if (U_FAILURE(errorCode_)) { return *this; } - clearSupportedLocales(); - if (!ensureSupportedLocaleVector()) { return *this; } - while (locales.hasNext()) { - const Locale &locale = locales.next(); - Locale *clone = locale.clone(); - if (clone == nullptr) { - errorCode_ = U_MEMORY_ALLOCATION_ERROR; - break; - } - supportedLocales_->addElementX(clone, errorCode_); - if (U_FAILURE(errorCode_)) { - delete clone; - break; + if (ensureSupportedLocaleVector()) { + clearSupportedLocales(); + while (locales.hasNext() && U_SUCCESS(errorCode_)) { + const Locale &locale = locales.next(); + LocalPointer clone (locale.clone(), errorCode_); + supportedLocales_->adoptElement(clone.orphan(), errorCode_); } } return *this; } LocaleMatcher::Builder &LocaleMatcher::Builder::addSupportedLocale(const Locale &locale) { - if (!ensureSupportedLocaleVector()) { return *this; } - Locale *clone = locale.clone(); - if (clone == nullptr) { - errorCode_ = U_MEMORY_ALLOCATION_ERROR; - return *this; - } - supportedLocales_->addElementX(clone, errorCode_); - if (U_FAILURE(errorCode_)) { - delete clone; + if (ensureSupportedLocaleVector()) { + LocalPointer clone(locale.clone(), errorCode_); + supportedLocales_->adoptElement(clone.orphan(), errorCode_); } return *this; } diff --git a/deps/icu-small/source/common/locid.cpp b/deps/icu-small/source/common/locid.cpp index e8859c7048b110..73bb8d8aec1c70 100644 --- a/deps/icu-small/source/common/locid.cpp +++ b/deps/icu-small/source/common/locid.cpp @@ -1204,14 +1204,11 @@ AliasReplacer::parseLanguageReplacement( // We have multiple field so we have to allocate and parse CharString* str = new CharString( replacement, (int32_t)uprv_strlen(replacement), status); + LocalPointer lpStr(str, status); + toBeFreed.adoptElement(lpStr.orphan(), status); if (U_FAILURE(status)) { return; } - if (str == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - toBeFreed.addElementX(str, status); char* data = str->data(); replacedLanguage = (const char*) data; char* endOfField = uprv_strchr(data, '_'); @@ -1420,12 +1417,9 @@ AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status) (int32_t)(firstSpace - replacement), status), status); } if (U_FAILURE(status)) { return false; } - if (item.isNull()) { - status = U_MEMORY_ALLOCATION_ERROR; - return false; - } replacedRegion = item->data(); - toBeFreed.addElementX(item.orphan(), status); + toBeFreed.adoptElement(item.orphan(), status); + if (U_FAILURE(status)) { return false; } } U_ASSERT(!same(region, replacedRegion)); region = replacedRegion; @@ -1659,10 +1653,10 @@ AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr && U_SUCCESS(status)) { *end = NULL_CHAR; // null terminate inside variantsBuff - variants.addElementX(start, status); + variants.addElement(start, status); start = end + 1; } - variants.addElementX(start, status); + variants.addElement(start, status); } if (U_FAILURE(status)) { return false; } diff --git a/deps/icu-small/source/common/lstmbe.cpp b/deps/icu-small/source/common/lstmbe.cpp index 3793abceb3fb1c..f6114cdfe25e19 100644 --- a/deps/icu-small/source/common/lstmbe.cpp +++ b/deps/icu-small/source/common/lstmbe.cpp @@ -1,8 +1,8 @@ // © 2021 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html +#include #include -#include #include "unicode/utypes.h" @@ -639,6 +639,7 @@ LSTMBreakEngine::divideUpDictionaryRange( UText *text, int32_t startPos, int32_t endPos, UVector32 &foundBreaks, + UBool /* isPhraseBreaking */, UErrorCode& status) const { if (U_FAILURE(status)) return 0; int32_t beginFoundBreakSize = foundBreaks.size(); diff --git a/deps/icu-small/source/common/lstmbe.h b/deps/icu-small/source/common/lstmbe.h index c3f7ecf81540dd..ffdf805eca265d 100644 --- a/deps/icu-small/source/common/lstmbe.h +++ b/deps/icu-small/source/common/lstmbe.h @@ -62,6 +62,7 @@ class LSTMBreakEngine : public DictionaryBreakEngine { int32_t rangeStart, int32_t rangeEnd, UVector32 &foundBreaks, + UBool isPhraseBreaking, UErrorCode& status) const override; private: const LSTMData* fData; diff --git a/deps/icu-small/source/common/normalizer2impl.cpp b/deps/icu-small/source/common/normalizer2impl.cpp index 5bfd49e8cb9e2a..e6bd75e7173889 100644 --- a/deps/icu-small/source/common/normalizer2impl.cpp +++ b/deps/icu-small/source/common/normalizer2impl.cpp @@ -2496,15 +2496,18 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode // origin is not the first character, or it is U+0000. UnicodeSet *set; if((canonValue&CANON_HAS_SET)==0) { - set=new UnicodeSet; - if(set==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; + LocalPointer lpSet(new UnicodeSet, errorCode); + set=lpSet.getAlias(); + if(U_FAILURE(errorCode)) { return; } UChar32 firstOrigin=(UChar32)(canonValue&CANON_VALUE_MASK); canonValue=(canonValue&~CANON_VALUE_MASK)|CANON_HAS_SET|(uint32_t)canonStartSets.size(); umutablecptrie_set(mutableTrie, decompLead, canonValue, &errorCode); - canonStartSets.addElementX(set, errorCode); + canonStartSets.adoptElement(lpSet.orphan(), errorCode); + if (U_FAILURE(errorCode)) { + return; + } if(firstOrigin!=0) { set->add(firstOrigin); } diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp index f65177f232334d..cae8d154b30802 100644 --- a/deps/icu-small/source/common/rbbi.cpp +++ b/deps/icu-small/source/common/rbbi.cpp @@ -82,6 +82,19 @@ RuleBasedBreakIterator::RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode } } +//------------------------------------------------------------------------------- +// +// Constructor from a UDataMemory handle to precompiled break rules +// stored in an ICU data file. This construcotr is private API, +// only for internal use. +// +//------------------------------------------------------------------------------- +RuleBasedBreakIterator::RuleBasedBreakIterator(UDataMemory* udm, UBool isPhraseBreaking, + UErrorCode &status) : RuleBasedBreakIterator(udm, status) +{ + fIsPhraseBreaking = isPhraseBreaking; +} + // // Construct from precompiled binary rules (tables). This constructor is public API, // taking the rules as a (const uint8_t *) to match the type produced by getBinaryRules(). @@ -322,6 +335,7 @@ void RuleBasedBreakIterator::init(UErrorCode &status) { fBreakCache = nullptr; fDictionaryCache = nullptr; fLookAheadMatches = nullptr; + fIsPhraseBreaking = false; // Note: IBM xlC is unable to assign or initialize member fText from UTEXT_INITIALIZER. // fText = UTEXT_INITIALIZER; diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp index 6bfe3feca495f6..26d82df7811838 100644 --- a/deps/icu-small/source/common/rbbi_cache.cpp +++ b/deps/icu-small/source/common/rbbi_cache.cpp @@ -163,7 +163,7 @@ void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPo // Ask the language object if there are any breaks. It will add them to the cache and // leave the text pointer on the other side of its range, ready to search for the next one. if (lbe != NULL) { - foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, status); + foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status); } // Reload the loop variables for the next go-round diff --git a/deps/icu-small/source/common/serv.cpp b/deps/icu-small/source/common/serv.cpp index 0c54a4dce99225..c26dbca1a9c244 100644 --- a/deps/icu-small/source/common/serv.cpp +++ b/deps/icu-small/source/common/serv.cpp @@ -625,10 +625,7 @@ ICUService::getVisibleIDs(UVector& result, const UnicodeString* matchID, UErrorC } } - LocalPointer idClone(new UnicodeString(*id), status); - if (U_SUCCESS(status) && idClone->isBogus()) { - status = U_MEMORY_ALLOCATION_ERROR; - } + LocalPointer idClone(id->clone(), status); result.adoptElement(idClone.orphan(), status); } delete fallbackKey; diff --git a/deps/icu-small/source/common/servls.cpp b/deps/icu-small/source/common/servls.cpp index 7108afd4a5282b..98f0a8a12b0006 100644 --- a/deps/icu-small/source/common/servls.cpp +++ b/deps/icu-small/source/common/servls.cpp @@ -179,7 +179,8 @@ class ServiceEnumeration : public StringEnumeration { length = other._ids.size(); for(i = 0; i < length; ++i) { - _ids.addElementX(((UnicodeString *)other._ids.elementAt(i))->clone(), status); + LocalPointer clonedId(((UnicodeString *)other._ids.elementAt(i))->clone(), status); + _ids.adoptElement(clonedId.orphan(), status); } if(U_SUCCESS(status)) { diff --git a/deps/icu-small/source/common/servnotf.cpp b/deps/icu-small/source/common/servnotf.cpp index 342e0d9f24d2a7..d9fb38875202df 100644 --- a/deps/icu-small/source/common/servnotf.cpp +++ b/deps/icu-small/source/common/servnotf.cpp @@ -49,7 +49,11 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status) if (acceptsListener(*l)) { Mutex lmx(¬ifyLock); if (listeners == NULL) { - listeners = new UVector(5, status); + LocalPointer lpListeners(new UVector(5, status), status); + if (U_FAILURE(status)) { + return; + } + listeners = lpListeners.orphan(); } else { for (int i = 0, e = listeners->size(); i < e; ++i) { const EventListener* el = (const EventListener*)(listeners->elementAt(i)); @@ -59,7 +63,7 @@ ICUNotifier::addListener(const EventListener* l, UErrorCode& status) } } - listeners->addElementX((void*)l, status); // cast away const + listeners->addElement((void*)l, status); // cast away const } #ifdef NOTIFIER_DEBUG else { @@ -102,13 +106,11 @@ ICUNotifier::removeListener(const EventListener *l, UErrorCode& status) void ICUNotifier::notifyChanged(void) { + Mutex lmx(¬ifyLock); if (listeners != NULL) { - Mutex lmx(¬ifyLock); - if (listeners != NULL) { - for (int i = 0, e = listeners->size(); i < e; ++i) { - EventListener* el = (EventListener*)listeners->elementAt(i); - notifyListener(*el); - } + for (int i = 0, e = listeners->size(); i < e; ++i) { + EventListener* el = (EventListener*)listeners->elementAt(i); + notifyListener(*el); } } } diff --git a/deps/icu-small/source/common/ubrk.cpp b/deps/icu-small/source/common/ubrk.cpp index bb5bdd1b5012fb..f4e064961f3968 100644 --- a/deps/icu-small/source/common/ubrk.cpp +++ b/deps/icu-small/source/common/ubrk.cpp @@ -168,7 +168,7 @@ ubrk_safeClone( BreakIterator *newBI = ((BreakIterator *)bi)->clone(); if (newBI == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; - } else { + } else if (pBufferSize != NULL) { *status = U_SAFECLONE_ALLOCATED_WARNING; } return (UBreakIterator *)newBI; @@ -176,15 +176,7 @@ ubrk_safeClone( U_CAPI UBreakIterator * U_EXPORT2 ubrk_clone(const UBreakIterator *bi, UErrorCode *status) { - if (U_FAILURE(*status)) { - return nullptr; - } - BreakIterator *newBI = ((BreakIterator *)bi)->clone(); - if (newBI == nullptr) { - *status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - return (UBreakIterator *)newBI; + return ubrk_safeClone(bi, nullptr, nullptr, status); } diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp index 4aa856507aafb1..388c86b1bba791 100644 --- a/deps/icu-small/source/common/ucase.cpp +++ b/deps/icu-small/source/common/ucase.cpp @@ -22,27 +22,14 @@ #include "unicode/utypes.h" #include "unicode/unistr.h" #include "unicode/uset.h" -#include "unicode/udata.h" /* UDataInfo */ #include "unicode/utf16.h" -#include "ucmndata.h" /* DataHeader */ -#include "udatamem.h" -#include "umutex.h" -#include "uassert.h" #include "cmemory.h" -#include "utrie2.h" +#include "uassert.h" #include "ucase.h" +#include "umutex.h" +#include "utrie2.h" -struct UCaseProps { - UDataMemory *mem; - const int32_t *indexes; - const uint16_t *exceptions; - const uint16_t *unfold; - - UTrie2 trie; - uint8_t formatVersion[4]; -}; - -/* ucase_props_data.h is machine-generated by gencase --csource */ +/* ucase_props_data.h is machine-generated by genprops/casepropsbuilder.cpp */ #define INCLUDED_FROM_UCASE_CPP #include "ucase_props_data.h" @@ -77,6 +64,13 @@ ucase_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode) { /* data access primitives --------------------------------------------------- */ +U_CAPI const struct UCaseProps * U_EXPORT2 +ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength) { + *pExceptionsLength = UPRV_LENGTHOF(ucase_props_exceptions); + *pUnfoldLength = UPRV_LENGTHOF(ucase_props_unfold); + return &ucase_props_singleton; +} + U_CFUNC const UTrie2 * U_EXPORT2 ucase_getTrie() { return &ucase_props_singleton.trie; @@ -690,7 +684,7 @@ ucase_isCaseSensitive(UChar32 c) { * - The general category of C is * Nonspacing Mark (Mn), or Enclosing Mark (Me), or Format Control (Cf), or * Letter Modifier (Lm), or Symbol Modifier (Sk) - * - C is one of the following characters + * - C is one of the following characters * U+0027 APOSTROPHE * U+00AD SOFT HYPHEN (SHY) * U+2019 RIGHT SINGLE QUOTATION MARK @@ -1064,6 +1058,8 @@ ucase_toFullLower(UChar32 c, // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; + // Reset the output pointer in case it was uninitialized. + *pString=nullptr; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_IS_UPPER_OR_TITLE(props)) { @@ -1148,7 +1144,6 @@ ucase_toFullLower(UChar32 c, 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE */ - *pString=nullptr; return 0; /* remove the dot (continue without output) */ } else if(loc==UCASE_LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter, context)) { /* @@ -1215,6 +1210,8 @@ toUpperOrTitle(UChar32 c, // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; + // Reset the output pointer in case it was uninitialized. + *pString=nullptr; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_GET_TYPE(props)==UCASE_LOWER) { @@ -1252,7 +1249,6 @@ toUpperOrTitle(UChar32 c, 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE */ - *pString=nullptr; return 0; /* remove the dot (continue without output) */ } else if(c==0x0587) { // See ICU-13416: @@ -1449,6 +1445,8 @@ ucase_toFullFolding(UChar32 c, // The sign of the result has meaning, input must be non-negative so that it can be returned as is. U_ASSERT(c >= 0); UChar32 result=c; + // Reset the output pointer in case it was uninitialized. + *pString=nullptr; uint16_t props=UTRIE2_GET16(&ucase_props_singleton.trie, c); if(!UCASE_HAS_EXCEPTION(props)) { if(UCASE_IS_UPPER_OR_TITLE(props)) { @@ -1542,7 +1540,7 @@ U_CAPI UChar32 U_EXPORT2 u_tolower(UChar32 c) { return ucase_tolower(c); } - + /* Transforms the Unicode character to its upper case equivalent.*/ U_CAPI UChar32 U_EXPORT2 u_toupper(UChar32 c) { diff --git a/deps/icu-small/source/common/ucase.h b/deps/icu-small/source/common/ucase.h index a018f82b81b229..7bf57fd370631b 100644 --- a/deps/icu-small/source/common/ucase.h +++ b/deps/icu-small/source/common/ucase.h @@ -312,6 +312,21 @@ UCaseMapFull(UChar32 c, U_CDECL_END +/* for icuexportdata -------------------------------------------------------- */ + +struct UCaseProps { + void *mem; // TODO: was unused, and type UDataMemory -- remove + const int32_t *indexes; + const uint16_t *exceptions; + const uint16_t *unfold; + + UTrie2 trie; + uint8_t formatVersion[4]; +}; + +U_CAPI const struct UCaseProps * U_EXPORT2 +ucase_getSingleton(int32_t *pExceptionsLength, int32_t *pUnfoldLength); + /* file definitions --------------------------------------------------------- */ #define UCASE_DATA_NAME "ucase" diff --git a/deps/icu-small/source/common/ucasemap.cpp b/deps/icu-small/source/common/ucasemap.cpp index ed72bda828fc1c..95b55d56a02c47 100644 --- a/deps/icu-small/source/common/ucasemap.cpp +++ b/deps/icu-small/source/common/ucasemap.cpp @@ -112,8 +112,7 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) { if(length==sizeof(csm->locale)) { *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } - if(U_SUCCESS(*pErrorCode)) { - csm->caseLocale=UCASE_LOC_UNKNOWN; + if(U_SUCCESS(*pErrorCode)) { csm->caseLocale = ucase_getCaseLocale(csm->locale); } else { csm->locale[0]=0; @@ -420,6 +419,97 @@ void toUpper(int32_t caseLocale, uint32_t options, #if !UCONFIG_NO_BREAK_ITERATION +namespace { + +constexpr uint8_t ACUTE_BYTE0 = u8"\u0301"[0]; + +constexpr uint8_t ACUTE_BYTE1 = u8"\u0301"[1]; + +/** + * Input: c is a letter I with or without acute accent. + * start is the index in src after c, and is less than segmentLimit. + * If a plain i/I is followed by a plain j/J, + * or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute, + * then we output accordingly. + * + * @return the src index after the titlecased sequence, or the start index if no Dutch IJ + */ +int32_t maybeTitleDutchIJ(const uint8_t *src, UChar32 c, int32_t start, int32_t segmentLimit, + ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) { + U_ASSERT(start < segmentLimit); + + int32_t index = start; + bool withAcute = false; + + // If the conditions are met, then the following variables tell us what to output. + int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3) + bool doTitleJ = false; // true if the j needs to be titlecased + int32_t unchanged2 = 0; // after the j (0 or 1) + + // next character after the first letter + UChar32 c2; + c2 = src[index++]; + + // Is the first letter an i/I with accent? + if (c == u'I') { + if (c2 == ACUTE_BYTE0 && index < segmentLimit && src[index++] == ACUTE_BYTE1) { + withAcute = true; + unchanged1 = 2; // ACUTE is 2 code units in UTF-8 + if (index == segmentLimit) { return start; } + c2 = src[index++]; + } + } else { // Í + withAcute = true; + } + + // Is the next character a j/J? + if (c2 == u'j') { + doTitleJ = true; + } else if (c2 == u'J') { + ++unchanged1; + } else { + return start; + } + + // A plain i/I must be followed by a plain j/J. + // An i/I with acute must be followed by a j/J with acute. + if (withAcute) { + if ((index + 1) >= segmentLimit || src[index++] != ACUTE_BYTE0 || src[index++] != ACUTE_BYTE1) { + return start; + } + if (doTitleJ) { + unchanged2 = 2; // ACUTE is 2 code units in UTF-8 + } else { + unchanged1 = unchanged1 + 2; // ACUTE is 2 code units in UTF-8 + } + } + + // There must not be another combining mark. + if (index < segmentLimit) { + int32_t cp; + int32_t i = index; + U8_NEXT(src, i, segmentLimit, cp); + uint32_t typeMask = U_GET_GC_MASK(cp); + if ((typeMask & U_GC_M_MASK) != 0) { + return start; + } + } + + // Output the rest of the Dutch IJ. + ByteSinkUtil::appendUnchanged(src + start, unchanged1, sink, options, edits, errorCode); + start += unchanged1; + if (doTitleJ) { + ByteSinkUtil::appendCodePoint(1, u'J', sink, edits); + ++start; + } + ByteSinkUtil::appendUnchanged(src + start, unchanged2, sink, options, edits, errorCode); + + U_ASSERT(start + unchanged2 == index); + return index; +} + +} // namespace + U_CFUNC void U_CALLCONV ucasemap_internalUTF8ToTitle( int32_t caseLocale, uint32_t options, BreakIterator *iter, @@ -504,19 +594,14 @@ ucasemap_internalUTF8ToTitle( } /* Special case Dutch IJ titlecasing */ - if (titleStart+1 < index && - caseLocale == UCASE_LOC_DUTCH && - (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { - if (src[titleStart+1] == 0x006A) { - ByteSinkUtil::appendCodePoint(1, 0x004A, sink, edits); - titleLimit++; - } else if (src[titleStart+1] == 0x004A) { - // Keep the capital J from getting lowercased. - if (!ByteSinkUtil::appendUnchanged(src+titleStart+1, 1, - sink, options, edits, errorCode)) { - return; - } - titleLimit++; + if (titleLimit < index && + caseLocale == UCASE_LOC_DUTCH) { + if (c < 0) { + c = ~c; + } + + if (c == u'I' || c == u'Í') { + titleLimit = maybeTitleDutchIJ(src, c, titleLimit, index, sink, options, edits, errorCode); } } diff --git a/deps/icu-small/source/common/ucnv.cpp b/deps/icu-small/source/common/ucnv.cpp index 5dcf35e043850e..019bcb6a79cd90 100644 --- a/deps/icu-small/source/common/ucnv.cpp +++ b/deps/icu-small/source/common/ucnv.cpp @@ -252,7 +252,10 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U UTRACE_EXIT_STATUS(*status); return NULL; } - *status = U_SAFECLONE_ALLOCATED_WARNING; + // If pBufferSize was NULL as the input, pBufferSize is set to &stackBufferSize in this function. + if (pBufferSize != &stackBufferSize) { + *status = U_SAFECLONE_ALLOCATED_WARNING; + } /* record the fact that memory was allocated */ *pBufferSize = bufferSizeNeeded; @@ -317,7 +320,11 @@ ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, U return localConverter; } - +U_CAPI UConverter* U_EXPORT2 +ucnv_clone(const UConverter* cnv, UErrorCode *status) +{ + return ucnv_safeClone(cnv, nullptr, nullptr, status); +} /*Decreases the reference counter in the shared immutable section of the object *and frees the mutable part*/ diff --git a/deps/icu-small/source/common/ucurr.cpp b/deps/icu-small/source/common/ucurr.cpp index 67aab4e8ffec2f..6e489e0563d416 100644 --- a/deps/icu-small/source/common/ucurr.cpp +++ b/deps/icu-small/source/common/ucurr.cpp @@ -254,7 +254,7 @@ currSymbolsEquiv_cleanup(void) } /** - * Deleter for OlsonToMetaMappingEntry + * Deleter for IsoCodeEntry */ static void U_CALLCONV deleteIsoCodeEntry(void *obj) { diff --git a/deps/icu-small/source/common/uloc.cpp b/deps/icu-small/source/common/uloc.cpp index c8a3f1ff731340..99c6a0af39dbae 100644 --- a/deps/icu-small/source/common/uloc.cpp +++ b/deps/icu-small/source/common/uloc.cpp @@ -186,10 +186,10 @@ NULL }; static const char* const DEPRECATED_LANGUAGES[]={ - "in", "iw", "ji", "jw", NULL, NULL + "in", "iw", "ji", "jw", "mo", NULL, NULL }; static const char* const REPLACEMENT_LANGUAGES[]={ - "id", "he", "yi", "jv", NULL, NULL + "id", "he", "yi", "jv", "ro", NULL, NULL }; /** @@ -444,7 +444,7 @@ static const char * const COUNTRIES_3[] = { /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", /* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */ - "WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE", + "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE", NULL, /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", diff --git a/deps/icu-small/source/common/unicode/localematcher.h b/deps/icu-small/source/common/unicode/localematcher.h index 252bb7fdc20753..0f7e04a3afdcf2 100644 --- a/deps/icu-small/source/common/unicode/localematcher.h +++ b/deps/icu-small/source/common/unicode/localematcher.h @@ -461,13 +461,13 @@ class U_COMMON_API LocaleMatcher : public UMemory { * Option for whether to include or ignore one-way (fallback) match data. * By default, they are included. * - * @param direction the match direction to set. + * @param matchDirection the match direction to set. * @return this Builder object * @stable ICU 67 */ - Builder &setDirection(ULocMatchDirection direction) { + Builder &setDirection(ULocMatchDirection matchDirection) { if (U_SUCCESS(errorCode_)) { - direction_ = direction; + direction_ = matchDirection; } return *this; } diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h index 0ce93819f54cbf..0bad0d3897cc48 100644 --- a/deps/icu-small/source/common/unicode/rbbi.h +++ b/deps/icu-small/source/common/unicode/rbbi.h @@ -147,6 +147,11 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { */ int32_t *fLookAheadMatches; + /** + * A flag to indicate if phrase based breaking is enabled. + */ + UBool fIsPhraseBreaking; + //======================================================================= // constructors //======================================================================= @@ -163,6 +168,21 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { */ RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); + /** + * This constructor uses the udata interface to create a BreakIterator + * whose internal tables live in a memory-mapped file. "image" is an + * ICU UDataMemory handle for the pre-compiled break iterator tables. + * @param image handle to the memory image for the break iterator data. + * Ownership of the UDataMemory handle passes to the Break Iterator, + * which will be responsible for closing it when it is no longer needed. + * @param status Information on any errors encountered. + * @param isPhraseBreaking true if phrase based breaking is required, otherwise false. + * @see udata_open + * @see #getBinaryRules + * @internal (private) + */ + RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status); + /** @internal */ friend class RBBIRuleBuilder; /** @internal */ diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h index c603f7c13f359a..2b3dc7aa576803 100644 --- a/deps/icu-small/source/common/unicode/ubrk.h +++ b/deps/icu-small/source/common/unicode/ubrk.h @@ -312,11 +312,12 @@ ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength, * If *pBufferSize is not enough for a stack-based safe clone, * new memory will be allocated. * @param status to indicate whether the operation went on smoothly or there were errors - * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary. + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used + * if pBufferSize != NULL and any allocations were necessary * @return pointer to the new clone * @deprecated ICU 69 Use ubrk_clone() instead. */ -U_CAPI UBreakIterator * U_EXPORT2 +U_DEPRECATED UBreakIterator * U_EXPORT2 ubrk_safeClone( const UBreakIterator *bi, void *stackBuffer, @@ -325,21 +326,17 @@ ubrk_safeClone( #endif /* U_HIDE_DEPRECATED_API */ -#ifndef U_HIDE_DRAFT_API - /** * Thread safe cloning operation. * @param bi iterator to be cloned * @param status to indicate whether the operation went on smoothly or there were errors * @return pointer to the new clone - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI UBreakIterator * U_EXPORT2 ubrk_clone(const UBreakIterator *bi, UErrorCode *status); -#endif // U_HIDE_DRAFT_API - #ifndef U_HIDE_DEPRECATED_API /** diff --git a/deps/icu-small/source/common/unicode/ucnv.h b/deps/icu-small/source/common/unicode/ucnv.h index 2687c984d43b1d..20c173b662832d 100644 --- a/deps/icu-small/source/common/unicode/ucnv.h +++ b/deps/icu-small/source/common/unicode/ucnv.h @@ -477,7 +477,7 @@ ucnv_openCCSID(int32_t codepage, * *

The name will NOT be looked up in the alias mechanism, nor will the converter be * stored in the converter cache or the alias table. The only way to open further converters - * is call this function multiple times, or use the ucnv_safeClone() function to clone a + * is call this function multiple times, or use the ucnv_clone() function to clone a * 'primary' converter.

* *

A future version of ICU may add alias table lookups and/or caching @@ -493,13 +493,27 @@ ucnv_openCCSID(int32_t codepage, * @return the created Unicode converter object, or NULL if an error occurred * @see udata_open * @see ucnv_open - * @see ucnv_safeClone + * @see ucnv_clone * @see ucnv_close * @stable ICU 2.2 */ U_CAPI UConverter* U_EXPORT2 ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err); +/** + * Thread safe converter cloning operation. + * + * You must ucnv_close() the clone. + * + * @param cnv converter to be cloned + * @param status to indicate whether the operation went on smoothly or there were errors + * @return pointer to the new clone + * @stable ICU 71 + */ +U_CAPI UConverter* U_EXPORT2 ucnv_clone(const UConverter *cnv, UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API + /** * Thread safe converter cloning operation. * For most efficient operation, pass in a stackBuffer (and a *pBufferSize) @@ -532,21 +546,19 @@ ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode * pointer to size of allocated space. * @param status to indicate whether the operation went on smoothly or there were errors * An informational status value, U_SAFECLONE_ALLOCATED_WARNING, - * is used if any allocations were necessary. + * is used if pBufferSize != NULL and any allocations were necessary * However, it is better to check if *pBufferSize grew for checking for * allocations because warning codes can be overridden by subsequent * function calls. * @return pointer to the new clone - * @stable ICU 2.0 + * @deprecated ICU 71 Use ucnv_clone() instead. */ -U_CAPI UConverter * U_EXPORT2 +U_DEPRECATED UConverter * U_EXPORT2 ucnv_safeClone(const UConverter *cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status); -#ifndef U_HIDE_DEPRECATED_API - /** * \def U_CNV_SAFECLONE_BUFFERSIZE * Definition of a buffer size that is designed to be large enough for diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h index 730337a3535ea8..310c7c8d2011cd 100644 --- a/deps/icu-small/source/common/unicode/uniset.h +++ b/deps/icu-small/source/common/unicode/uniset.h @@ -1229,7 +1229,6 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { */ UnicodeSet& retain(UChar32 c); -#ifndef U_HIDE_DRAFT_API /** * Retains only the specified string from this set if it is present. * Upon return this set will be empty if it did not contain s, or @@ -1238,10 +1237,9 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { * * @param s the source string * @return this object, for chaining - * @draft ICU 69 + * @stable ICU 69 */ UnicodeSet& retain(const UnicodeString &s); -#endif // U_HIDE_DRAFT_API /** * Removes the specified range from this set if it is present. diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index 4605f632ea8c91..d9f9b8f336c4cf 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -567,6 +567,7 @@ #define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) #define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) #define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) +#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton) #define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie) #define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) #define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) @@ -630,6 +631,7 @@ #define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars) #define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub) #define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars) +#define ucnv_clone U_ICU_ENTRY_POINT_RENAME(ucnv_clone) #define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close) #define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames) #define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert) @@ -725,6 +727,7 @@ #define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString) #define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8) #define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize) +#define ucol_clone U_ICU_ENTRY_POINT_RENAME(ucol_clone) #define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary) #define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close) #define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements) @@ -904,6 +907,7 @@ #define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern) #define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) #define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) +#define udatpg_getDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormatForStyle) #define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) #define udatpg_getDefaultHourCycle U_ICU_ENTRY_POINT_RENAME(udatpg_getDefaultHourCycle) #define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName) @@ -918,6 +922,7 @@ #define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat) #define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName) #define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat) +#define udatpg_setDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormatForStyle) #define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal) #define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap) #define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close) diff --git a/deps/icu-small/source/common/unicode/uset.h b/deps/icu-small/source/common/unicode/uset.h index 2ef352ef563b02..33332f2d362441 100644 --- a/deps/icu-small/source/common/unicode/uset.h +++ b/deps/icu-small/source/common/unicode/uset.h @@ -628,7 +628,6 @@ uset_removeRange(USet* set, UChar32 start, UChar32 end); U_CAPI void U_EXPORT2 uset_removeString(USet* set, const UChar* str, int32_t strLen); -#ifndef U_HIDE_DRAFT_API /** * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"} * A frozen set will not be modified. @@ -636,11 +635,10 @@ uset_removeString(USet* set, const UChar* str, int32_t strLen); * @param set the object to be modified * @param str the string * @param length the length of the string, or -1 if NUL-terminated - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI void U_EXPORT2 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length); -#endif // U_HIDE_DRAFT_API /** * Removes from this set all of its elements that are contained in the @@ -671,7 +669,6 @@ uset_removeAll(USet* set, const USet* removeSet); U_CAPI void U_EXPORT2 uset_retain(USet* set, UChar32 start, UChar32 end); -#ifndef U_HIDE_DRAFT_API /** * Retains only the specified string from this set if it is present. * Upon return this set will be empty if it did not contain s, or @@ -681,7 +678,7 @@ uset_retain(USet* set, UChar32 start, UChar32 end); * @param set the object to be modified * @param str the string * @param length the length of the string, or -1 if NUL-terminated - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI void U_EXPORT2 uset_retainString(USet *set, const UChar *str, int32_t length); @@ -693,11 +690,10 @@ uset_retainString(USet *set, const UChar *str, int32_t length); * @param set the object to be modified * @param str the string * @param length the length of the string, or -1 if NUL-terminated - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI void U_EXPORT2 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length); -#endif // U_HIDE_DRAFT_API /** * Retains only the elements in this set that are contained in the @@ -741,7 +737,6 @@ uset_compact(USet* set); U_CAPI void U_EXPORT2 uset_complement(USet* set); -#ifndef U_HIDE_DRAFT_API /** * Complements the specified range in this set. Any character in * the range will be removed if it is in this set, or will be @@ -753,7 +748,7 @@ uset_complement(USet* set); * @param set the object to be modified * @param start first character, inclusive, of range * @param end last character, inclusive, of range - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI void U_EXPORT2 uset_complementRange(USet *set, UChar32 start, UChar32 end); @@ -766,7 +761,7 @@ uset_complementRange(USet *set, UChar32 start, UChar32 end); * @param set the object to be modified * @param str the string * @param length the length of the string, or -1 if NUL-terminated - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI void U_EXPORT2 uset_complementString(USet *set, const UChar *str, int32_t length); @@ -778,11 +773,10 @@ uset_complementString(USet *set, const UChar *str, int32_t length); * @param set the object to be modified * @param str the string * @param length the length of the string, or -1 if NUL-terminated - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI void U_EXPORT2 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length); -#endif // U_HIDE_DRAFT_API /** * Complements in this set all elements contained in the specified diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index 42e8865d7e330b..2706e0b0606429 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -60,7 +60,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 70 +#define U_ICU_VERSION_MAJOR_NUM 71 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU @@ -86,7 +86,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _70 +#define U_ICU_VERSION_SUFFIX _71 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -139,7 +139,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "70.1" +#define U_ICU_VERSION "71.1" /** * The current ICU library major version number as a string, for library name suffixes. @@ -152,13 +152,13 @@ * * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "70" +#define U_ICU_VERSION_SHORT "71" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "70.1" +#define U_ICU_DATA_VERSION "71.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/deps/icu-small/source/common/unistr.cpp b/deps/icu-small/source/common/unistr.cpp index 077b4d6ef20811..c18665928d851a 100644 --- a/deps/icu-small/source/common/unistr.cpp +++ b/deps/icu-small/source/common/unistr.cpp @@ -334,7 +334,8 @@ Replaceable::clone() const { // UnicodeString overrides clone() with a real implementation UnicodeString * UnicodeString::clone() const { - return new UnicodeString(*this); + LocalPointer clonedString(new UnicodeString(*this)); + return clonedString.isValid() && !clonedString->isBogus() ? clonedString.orphan() : nullptr; } //======================================== @@ -1976,7 +1977,12 @@ The vector deleting destructor is already a part of UObject, but defining it here makes sure that it is included with this object file. This makes sure that static library dependencies are kept to a minimum. */ +#if defined(__clang__) || U_GCC_MAJOR_MINOR >= 1100 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" static void uprv_UnicodeStringDummy(void) { delete [] (new UnicodeString[2]); } +#pragma GCC diagnostic pop +#endif #endif diff --git a/deps/icu-small/source/common/ustrcase.cpp b/deps/icu-small/source/common/ustrcase.cpp index 36b19e75f2d7ae..43910ea520984e 100644 --- a/deps/icu-small/source/common/ustrcase.cpp +++ b/deps/icu-small/source/common/ustrcase.cpp @@ -36,6 +36,12 @@ #include "ustr_imp.h" #include "uassert.h" +/** + * Code point for COMBINING ACUTE ACCENT + * @internal + */ +#define ACUTE u'\u0301' + U_NAMESPACE_BEGIN namespace { @@ -396,6 +402,94 @@ U_NAMESPACE_USE #if !UCONFIG_NO_BREAK_ITERATION +namespace { + +/** + * Input: c is a letter I with or without acute accent. + * start is the index in src after c, and is less than segmentLimit. + * If a plain i/I is followed by a plain j/J, + * or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute, + * then we output accordingly. + * + * @return the src index after the titlecased sequence, or the start index if no Dutch IJ + */ +int32_t maybeTitleDutchIJ(const UChar *src, UChar32 c, int32_t start, int32_t segmentLimit, + UChar *dest, int32_t &destIndex, int32_t destCapacity, uint32_t options, + icu::Edits *edits) { + U_ASSERT(start < segmentLimit); + + int32_t index = start; + bool withAcute = false; + + // If the conditions are met, then the following variables tell us what to output. + int32_t unchanged1 = 0; // code units before the j, or the whole sequence (0..3) + bool doTitleJ = false; // true if the j needs to be titlecased + int32_t unchanged2 = 0; // after the j (0 or 1) + + // next character after the first letter + UChar c2 = src[index++]; + + // Is the first letter an i/I with accent? + if (c == u'I') { + if (c2 == ACUTE) { + withAcute = true; + unchanged1 = 1; + if (index == segmentLimit) { return start; } + c2 = src[index++]; + } + } else { // Í + withAcute = true; + } + + // Is the next character a j/J? + if (c2 == u'j') { + doTitleJ = true; + } else if (c2 == u'J') { + ++unchanged1; + } else { + return start; + } + + // A plain i/I must be followed by a plain j/J. + // An i/I with acute must be followed by a j/J with acute. + if (withAcute) { + if (index == segmentLimit || src[index++] != ACUTE) { return start; } + if (doTitleJ) { + unchanged2 = 1; + } else { + ++unchanged1; + } + } + + // There must not be another combining mark. + if (index < segmentLimit) { + int32_t cp; + int32_t i = index; + U16_NEXT(src, i, segmentLimit, cp); + uint32_t typeMask = U_GET_GC_MASK(cp); + if ((typeMask & U_GC_M_MASK) != 0) { + return start; + } + } + + // Output the rest of the Dutch IJ. + destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged1, options, edits); + start += unchanged1; + if (doTitleJ) { + destIndex = appendUChar(dest, destIndex, destCapacity, u'J'); + if (edits != nullptr) { + edits->addReplace(1, 1); + } + ++start; + } + destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged2, options, edits); + + U_ASSERT(start + unchanged2 == index); + return index; +} + +} // namespace + U_CFUNC int32_t U_CALLCONV ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter, UChar *dest, int32_t destCapacity, @@ -412,14 +506,14 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it csc.limit=srcLength; int32_t destIndex=0; int32_t prev=0; - UBool isFirstIndex=TRUE; + bool isFirstIndex=true; /* titlecasing loop */ while(prevfirst(); } else { index=iter->next(); @@ -446,7 +540,7 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it // Stop with titleStartaddReplace(1, 1); - } - titleLimit++; - } else if (src[titleStart+1] == 0x004A) { - // Keep the capital J from getting lowercased. - destIndex=appendUnchanged(dest, destIndex, destCapacity, - src+titleStart+1, 1, options, edits); - if(destIndex<0) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - titleLimit++; + caseLocale == UCASE_LOC_DUTCH) { + if (c < 0) { + c = ~c; + } + + if (c == u'I' || c == u'Í') { + titleLimit = maybeTitleDutchIJ(src, c, titleStart + 1, index, + dest, destIndex, destCapacity, options, + edits); } } diff --git a/deps/icu-small/source/common/uvector.cpp b/deps/icu-small/source/common/uvector.cpp index 4da8b864e1be34..844463921efd9a 100644 --- a/deps/icu-small/source/common/uvector.cpp +++ b/deps/icu-small/source/common/uvector.cpp @@ -99,14 +99,6 @@ bool UVector::operator==(const UVector& other) const { return true; } -// TODO: delete this function once all call sites have been migrated to the -// new addElement(). -void UVector::addElementX(void* obj, UErrorCode &status) { - if (ensureCapacityX(count + 1, status)) { - elements[count++].pointer = obj; - } -} - void UVector::addElement(void* obj, UErrorCode &status) { U_ASSERT(deleter == nullptr); if (ensureCapacity(count + 1, status)) { @@ -331,38 +323,6 @@ int32_t UVector::indexOf(UElement key, int32_t startIndex, int8_t hint) const { return -1; } -UBool UVector::ensureCapacityX(int32_t minimumCapacity, UErrorCode &status) { - if (minimumCapacity < 0) { - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - if (capacity < minimumCapacity) { - if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - int32_t newCap = capacity * 2; - if (newCap < minimumCapacity) { - newCap = minimumCapacity; - } - if (newCap > (int32_t)(INT32_MAX / sizeof(UElement))) { // integer overflow check - // We keep the original memory contents on bad minimumCapacity. - status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - UElement* newElems = (UElement *)uprv_realloc(elements, sizeof(UElement)*newCap); - if (newElems == nullptr) { - // We keep the original contents on the memory failure on realloc or bad minimumCapacity. - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - elements = newElems; - capacity = newCap; - } - return TRUE; -} - - UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) { if (U_FAILURE(status)) { return false; @@ -370,7 +330,7 @@ UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) { if (minimumCapacity < 0) { status = U_ILLEGAL_ARGUMENT_ERROR; return false; - } + } if (capacity < minimumCapacity) { if (capacity > (INT32_MAX - 1) / 2) { // integer overflow check status = U_ILLEGAL_ARGUMENT_ERROR; @@ -396,6 +356,7 @@ UBool UVector::ensureCapacity(int32_t minimumCapacity, UErrorCode &status) { } return true; } + /** * Change the size of this vector as follows: If newSize is smaller, * then truncate the array, possibly deleting held elements for i >= diff --git a/deps/icu-small/source/common/uvector.h b/deps/icu-small/source/common/uvector.h index f61fcc2be60fb1..1eb7d136e7ab82 100644 --- a/deps/icu-small/source/common/uvector.h +++ b/deps/icu-small/source/common/uvector.h @@ -123,12 +123,6 @@ class U_COMMON_API UVector : public UObject { // java.util.Vector API //------------------------------------------------------------ - /* - * Old version of addElement, with non-standard error handling. - * Will be removed once all uses have been switched to the new addElement(). - */ - void addElementX(void* obj, UErrorCode &status); - /** * Add an element at the end of the vector. * For use only with vectors that do not adopt their elements, which is to say, @@ -197,12 +191,6 @@ class U_COMMON_API UVector : public UObject { inline UBool isEmpty(void) const {return count == 0;} - /* - * Old version of ensureCapacity, with non-standard error handling. - * Will be removed once all uses have been switched to the new ensureCapacity(). - */ - UBool ensureCapacityX(int32_t minimumCapacity, UErrorCode &status); - UBool ensureCapacity(int32_t minimumCapacity, UErrorCode &status); /** diff --git a/deps/icu-small/source/common/uvectr32.cpp b/deps/icu-small/source/common/uvectr32.cpp index a77ecb689fdaad..2b4d0b8a75a365 100644 --- a/deps/icu-small/source/common/uvectr32.cpp +++ b/deps/icu-small/source/common/uvectr32.cpp @@ -83,7 +83,7 @@ void UVector32::assign(const UVector32& other, UErrorCode &ec) { } -bool UVector32::operator==(const UVector32& other) { +bool UVector32::operator==(const UVector32& other) const { int32_t i; if (count != other.count) return false; for (i=0; isetDeleter(uprv_deleteUObject); // underflow bucket - Bucket *bucket = new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW); - if (bucket == NULL) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + LocalPointer bucket(new Bucket(getUnderflowLabel(), emptyString_, U_ALPHAINDEX_UNDERFLOW), errorCode); + if (U_FAILURE(errorCode)) { return NULL; } - bucketList->addElementX(bucket, errorCode); + bucketList->adoptElement(bucket.orphan(), errorCode); if (U_FAILURE(errorCode)) { return NULL; } UnicodeString temp; @@ -481,28 +480,24 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { if (skippedScript && bucketList->size() > 1) { // We are skipping one or more scripts, // and we are not just getting out of the underflow label. - bucket = new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW); - if (bucket == NULL) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - bucketList->addElementX(bucket, errorCode); + bucket.adoptInsteadAndCheckErrorCode( + new Bucket(getInflowLabel(), inflowBoundary, U_ALPHAINDEX_INFLOW), errorCode); + bucketList->adoptElement(bucket.orphan(), errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } } } // Add a bucket with the current label. - bucket = new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL); - if (bucket == NULL) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - bucketList->addElementX(bucket, errorCode); + bucket.adoptInsteadAndCheckErrorCode( + new Bucket(fixLabel(current, temp), current, U_ALPHAINDEX_NORMAL), errorCode); + bucketList->adoptElement(bucket.orphan(), errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } // Remember ASCII and Pinyin buckets for Pinyin redirects. UChar c; if (current.length() == 1 && 0x41 <= (c = current.charAt(0)) && c <= 0x5A) { // A-Z - asciiBuckets[c - 0x41] = bucket; + asciiBuckets[c - 0x41] = (Bucket *)bucketList->lastElement(); } else if (current.length() == BASE_LENGTH + 1 && current.startsWith(BASE, BASE_LENGTH) && 0x41 <= (c = current.charAt(BASE_LENGTH)) && c <= 0x5A) { - pinyinBuckets[c - 0x41] = bucket; + pinyinBuckets[c - 0x41] = (Bucket *)bucketList->lastElement(); hasPinyin = TRUE; } // Check for multiple primary weights. @@ -526,15 +521,16 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { // to the previous single-character bucket. // For example, after ... Q R S Sch we add Sch\uFFFF->S // and after ... Q R S Sch Sch\uFFFF St we add St\uFFFF->S. - bucket = new Bucket(emptyString_, + bucket.adoptInsteadAndCheckErrorCode(new Bucket(emptyString_, UnicodeString(current).append((UChar)0xFFFF), - U_ALPHAINDEX_NORMAL); - if (bucket == NULL) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + U_ALPHAINDEX_NORMAL), + errorCode); + if (U_FAILURE(errorCode)) { return NULL; } bucket->displayBucket_ = singleBucket; - bucketList->addElementX(bucket, errorCode); + bucketList->adoptElement(bucket.orphan(), errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } hasInvisibleBuckets = TRUE; break; } @@ -553,12 +549,10 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { return bl; } // overflow bucket - bucket = new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW); - if (bucket == NULL) { - errorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - bucketList->addElementX(bucket, errorCode); // final + bucket.adoptInsteadAndCheckErrorCode( + new Bucket(getOverflowLabel(), *scriptUpperBoundary, U_ALPHAINDEX_OVERFLOW), errorCode); + bucketList->adoptElement(bucket.orphan(), errorCode); // final + if (U_FAILURE(errorCode)) { return nullptr; } if (hasPinyin) { // Redirect Pinyin buckets. @@ -589,7 +583,7 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { int32_t i = bucketList->size() - 1; Bucket *nextBucket = getBucket(*bucketList, i); while (--i > 0) { - bucket = getBucket(*bucketList, i); + Bucket *bucket = getBucket(*bucketList, i); if (bucket->displayBucket_ != NULL) { continue; // skip invisible buckets } @@ -609,9 +603,9 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { // Do not call publicBucketList->setDeleter(): // This vector shares its objects with the bucketList. for (int32_t j = 0; j < bucketList->size(); ++j) { - bucket = getBucket(*bucketList, j); + Bucket *bucket = getBucket(*bucketList, j); if (bucket->displayBucket_ == NULL) { - publicBucketList->addElementX(bucket, errorCode); + publicBucketList->addElement(bucket, errorCode); } } if (U_FAILURE(errorCode)) { return NULL; } @@ -679,13 +673,13 @@ void AlphabeticIndex::initBuckets(UErrorCode &errorCode) { bucket = bucket->displayBucket_; } if (bucket->records_ == NULL) { - bucket->records_ = new UVector(errorCode); - if (bucket->records_ == NULL) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + LocalPointer records(new UVector(errorCode), errorCode); + if (U_FAILURE(errorCode)) { return; } + bucket->records_ = records.orphan(); } - bucket->records_->addElementX(r, errorCode); + bucket->records_->addElement(r, errorCode); } } @@ -1011,12 +1005,11 @@ UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) { // and the one for unassigned implicit weights (Cn). continue; } - UnicodeString *s = new UnicodeString(boundary); - if (s == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + LocalPointer s(new UnicodeString(boundary), status); + dest->adoptElement(s.orphan(), status); + if (U_FAILURE(status)) { + return nullptr; } - dest->addElementX(s, status); } return dest.orphan(); } @@ -1067,19 +1060,18 @@ AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const vo return *this; } if (inputList_ == NULL) { - inputList_ = new UVector(status); - if (inputList_ == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + LocalPointer inputList(new UVector(status), status); + if (U_FAILURE(status)) { return *this; } + inputList_ = inputList.orphan(); inputList_->setDeleter(alphaIndex_deleteRecord); } - Record *r = new Record(name, data); - if (r == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + LocalPointer r(new Record(name, data), status); + inputList_->adoptElement(r.orphan(), status); + if (U_FAILURE(status)) { return *this; } - inputList_->addElementX(r, status); clearBuckets(); //std::string ss; //std::string ss2; diff --git a/deps/icu-small/source/i18n/collationdatabuilder.cpp b/deps/icu-small/source/i18n/collationdatabuilder.cpp index 25050aa777e681..b10de993c279a3 100644 --- a/deps/icu-small/source/i18n/collationdatabuilder.cpp +++ b/deps/icu-small/source/i18n/collationdatabuilder.cpp @@ -522,12 +522,11 @@ CollationDataBuilder::addConditionalCE32(const UnicodeString &context, uint32_t errorCode = U_BUFFER_OVERFLOW_ERROR; return -1; } - ConditionalCE32 *cond = new ConditionalCE32(context, ce32); - if(cond == NULL) { - errorCode = U_MEMORY_ALLOCATION_ERROR; + LocalPointer cond(new ConditionalCE32(context, ce32), errorCode); + conditionalCE32s.adoptElement(cond.orphan(), errorCode); + if(U_FAILURE(errorCode)) { return -1; } - conditionalCE32s.addElementX(cond, errorCode); return index; } diff --git a/deps/icu-small/source/i18n/csdetect.cpp b/deps/icu-small/source/i18n/csdetect.cpp index 84f0776542d896..d866eb66286811 100644 --- a/deps/icu-small/source/i18n/csdetect.cpp +++ b/deps/icu-small/source/i18n/csdetect.cpp @@ -270,6 +270,11 @@ const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, maxMatchesFound = resultCount; + if (maxMatchesFound == 0) { + status = U_INVALID_CHAR_FOUND; + return NULL; + } + return resultArray; } diff --git a/deps/icu-small/source/i18n/double-conversion-ieee.h b/deps/icu-small/source/i18n/double-conversion-ieee.h index 31c35867de5610..2940acb16981e8 100644 --- a/deps/icu-small/source/i18n/double-conversion-ieee.h +++ b/deps/icu-small/source/i18n/double-conversion-ieee.h @@ -164,11 +164,19 @@ class Double { } bool IsQuietNan() const { +#if (defined(__mips__) && !defined(__mips_nan2008)) || defined(__hppa__) + return IsNan() && ((AsUint64() & kQuietNanBit) == 0); +#else return IsNan() && ((AsUint64() & kQuietNanBit) != 0); +#endif } bool IsSignalingNan() const { +#if (defined(__mips__) && !defined(__mips_nan2008)) || defined(__hppa__) + return IsNan() && ((AsUint64() & kQuietNanBit) != 0); +#else return IsNan() && ((AsUint64() & kQuietNanBit) == 0); +#endif } @@ -250,7 +258,12 @@ class Double { private: static const int kDenormalExponent = -kExponentBias + 1; static const uint64_t kInfinity = DOUBLE_CONVERSION_UINT64_2PART_C(0x7FF00000, 00000000); +#if (defined(__mips__) && !defined(__mips_nan2008)) || defined(__hppa__) + static const uint64_t kNaN = DOUBLE_CONVERSION_UINT64_2PART_C(0x7FF7FFFF, FFFFFFFF); +#else static const uint64_t kNaN = DOUBLE_CONVERSION_UINT64_2PART_C(0x7FF80000, 00000000); +#endif + const uint64_t d64_; @@ -350,11 +363,19 @@ class Single { } bool IsQuietNan() const { +#if (defined(__mips__) && !defined(__mips_nan2008)) || defined(__hppa__) + return IsNan() && ((AsUint32() & kQuietNanBit) == 0); +#else return IsNan() && ((AsUint32() & kQuietNanBit) != 0); +#endif } bool IsSignalingNan() const { +#if (defined(__mips__) && !defined(__mips_nan2008)) || defined(__hppa__) + return IsNan() && ((AsUint32() & kQuietNanBit) != 0); +#else return IsNan() && ((AsUint32() & kQuietNanBit) == 0); +#endif } @@ -424,7 +445,11 @@ class Single { static const int kDenormalExponent = -kExponentBias + 1; static const int kMaxExponent = 0xFF - kExponentBias; static const uint32_t kInfinity = 0x7F800000; +#if (defined(__mips__) && !defined(__mips_nan2008)) || defined(__hppa__) + static const uint32_t kNaN = 0x7FBFFFFF; +#else static const uint32_t kNaN = 0x7FC00000; +#endif const uint32_t d32_; diff --git a/deps/icu-small/source/i18n/double-conversion-utils.h b/deps/icu-small/source/i18n/double-conversion-utils.h index 7f23e0a8250d2b..11c92717c10d72 100644 --- a/deps/icu-small/source/i18n/double-conversion-utils.h +++ b/deps/icu-small/source/i18n/double-conversion-utils.h @@ -37,6 +37,9 @@ #ifndef DOUBLE_CONVERSION_UTILS_H_ #define DOUBLE_CONVERSION_UTILS_H_ +// Use DOUBLE_CONVERSION_NON_PREFIXED_MACROS to get unprefixed macros as was +// the case in double-conversion releases prior to 3.1.6 + #include #include @@ -46,9 +49,17 @@ #define DOUBLE_CONVERSION_ASSERT(condition) \ U_ASSERT(condition) #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(ASSERT) +#define ASSERT DOUBLE_CONVERSION_ASSERT +#endif + #ifndef DOUBLE_CONVERSION_UNIMPLEMENTED #define DOUBLE_CONVERSION_UNIMPLEMENTED() (abort()) #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(UNIMPLEMENTED) +#define UNIMPLEMENTED DOUBLE_CONVERSION_UNIMPLEMENTED +#endif + #ifndef DOUBLE_CONVERSION_NO_RETURN #ifdef _MSC_VER #define DOUBLE_CONVERSION_NO_RETURN __declspec(noreturn) @@ -56,6 +67,10 @@ #define DOUBLE_CONVERSION_NO_RETURN __attribute__((noreturn)) #endif #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(NO_RETURN) +#define NO_RETURN DOUBLE_CONVERSION_NO_RETURN +#endif + #ifndef DOUBLE_CONVERSION_UNREACHABLE #ifdef _MSC_VER void DOUBLE_CONVERSION_NO_RETURN abort_noreturn(); @@ -65,6 +80,9 @@ inline void abort_noreturn() { abort(); } #define DOUBLE_CONVERSION_UNREACHABLE() (abort()) #endif #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(UNREACHABLE) +#define UNREACHABLE DOUBLE_CONVERSION_UNREACHABLE +#endif // Not all compilers support __has_attribute and combining a check for both // ifdef and __has_attribute on the same preprocessor line isn't portable. @@ -81,12 +99,18 @@ inline void abort_noreturn() { abort(); } #define DOUBLE_CONVERSION_UNUSED #endif #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(UNUSED) +#define UNUSED DOUBLE_CONVERSION_UNUSED +#endif #if DOUBLE_CONVERSION_HAS_ATTRIBUTE(uninitialized) #define DOUBLE_CONVERSION_STACK_UNINITIALIZED __attribute__((uninitialized)) #else #define DOUBLE_CONVERSION_STACK_UNINITIALIZED #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(STACK_UNINITIALIZED) +#define STACK_UNINITIALIZED DOUBLE_CONVERSION_STACK_UNINITIALIZED +#endif // Double operations detection based on target architecture. // Linux uses a 80bit wide floating point stack on x86. This induces double @@ -127,7 +151,7 @@ int main(int argc, char** argv) { defined(_MIPS_ARCH_MIPS32R2) || defined(__ARMEB__) ||\ defined(__AARCH64EL__) || defined(__aarch64__) || defined(__AARCH64EB__) || \ defined(__riscv) || defined(__e2k__) || \ - defined(__or1k__) || defined(__arc__) || \ + defined(__or1k__) || defined(__arc__) || defined(__ARC64__) || \ defined(__microblaze__) || defined(__XTENSA__) || \ defined(__EMSCRIPTEN__) || defined(__wasm32__) #define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 @@ -144,6 +168,9 @@ int main(int argc, char** argv) { #else #error Target architecture was not detected as supported by Double-Conversion. #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(CORRECT_DOUBLE_OPERATIONS) +#define CORRECT_DOUBLE_OPERATIONS DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS +#endif #if defined(_WIN32) && !defined(__MINGW32__) @@ -169,7 +196,9 @@ typedef uint16_t uc16; // Usage: instead of writing 0x1234567890123456 // write DOUBLE_CONVERSION_UINT64_2PART_C(0x12345678,90123456); #define DOUBLE_CONVERSION_UINT64_2PART_C(a, b) (((static_cast(a) << 32) + 0x##b##u)) - +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(UINT64_2PART_C) +#define UINT64_2PART_C DOUBLE_CONVERSION_UINT64_2PART_C +#endif // The expression DOUBLE_CONVERSION_ARRAY_SIZE(a) is a compile-time constant of type // size_t which represents the number of elements of the given @@ -180,6 +209,9 @@ typedef uint16_t uc16; ((sizeof(a) / sizeof(*(a))) / \ static_cast(!(sizeof(a) % sizeof(*(a))))) #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(ARRAY_SIZE) +#define ARRAY_SIZE DOUBLE_CONVERSION_ARRAY_SIZE +#endif // A macro to disallow the evil copy constructor and operator= functions // This should be used in the private: declarations for a class @@ -188,6 +220,9 @@ typedef uint16_t uc16; TypeName(const TypeName&); \ void operator=(const TypeName&) #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(DC_DISALLOW_COPY_AND_ASSIGN) +#define DC_DISALLOW_COPY_AND_ASSIGN DOUBLE_CONVERSION_DISALLOW_COPY_AND_ASSIGN +#endif // A macro to disallow all the implicit constructors, namely the // default constructor, copy constructor and operator= functions. @@ -200,6 +235,9 @@ typedef uint16_t uc16; TypeName(); \ DOUBLE_CONVERSION_DISALLOW_COPY_AND_ASSIGN(TypeName) #endif +#if defined(DOUBLE_CONVERSION_NON_PREFIXED_MACROS) && !defined(DC_DISALLOW_IMPLICIT_CONSTRUCTORS) +#define DC_DISALLOW_IMPLICIT_CONSTRUCTORS DOUBLE_CONVERSION_DISALLOW_IMPLICIT_CONSTRUCTORS +#endif // ICU PATCH: Wrap in ICU namespace U_NAMESPACE_BEGIN diff --git a/deps/icu-small/source/i18n/dtfmtsym.cpp b/deps/icu-small/source/i18n/dtfmtsym.cpp index ab5f2b612c1f0e..134b919f06ea59 100644 --- a/deps/icu-small/source/i18n/dtfmtsym.cpp +++ b/deps/icu-small/source/i18n/dtfmtsym.cpp @@ -1574,26 +1574,20 @@ struct CalendarDataSink : public ResourceSink { errorCode); if (U_FAILURE(errorCode)) { return; } } - LocalPointer aliasRelativePathCopy(new UnicodeString(aliasRelativePath), errorCode); - resourcesToVisitNext->addElementX(aliasRelativePathCopy.getAlias(), errorCode); + LocalPointer aliasRelativePathCopy(aliasRelativePath.clone(), errorCode); + resourcesToVisitNext->adoptElement(aliasRelativePathCopy.orphan(), errorCode); if (U_FAILURE(errorCode)) { return; } - // Only release ownership after resourcesToVisitNext takes it (no error happened): - aliasRelativePathCopy.orphan(); continue; } else if (aliasType == SAME_CALENDAR) { // Register same-calendar alias if (arrays.get(aliasRelativePath) == NULL && maps.get(aliasRelativePath) == NULL) { - LocalPointer aliasRelativePathCopy(new UnicodeString(aliasRelativePath), errorCode); - aliasPathPairs.addElementX(aliasRelativePathCopy.getAlias(), errorCode); + LocalPointer aliasRelativePathCopy(aliasRelativePath.clone(), errorCode); + aliasPathPairs.adoptElement(aliasRelativePathCopy.orphan(), errorCode); if (U_FAILURE(errorCode)) { return; } - // Only release ownership after aliasPathPairs takes it (no error happened): - aliasRelativePathCopy.orphan(); - LocalPointer keyUStringCopy(new UnicodeString(keyUString), errorCode); - aliasPathPairs.addElementX(keyUStringCopy.getAlias(), errorCode); + LocalPointer keyUStringCopy(keyUString.clone(), errorCode); + aliasPathPairs.adoptElement(keyUStringCopy.orphan(), errorCode); if (U_FAILURE(errorCode)) { return; } - // Only release ownership after aliasPathPairs takes it (no error happened): - keyUStringCopy.orphan(); } continue; } @@ -1760,16 +1754,12 @@ struct CalendarDataSink : public ResourceSink { if (U_FAILURE(errorCode)) { return; } if (aliasType == SAME_CALENDAR) { // Store the alias path and the current path on aliasPathPairs - LocalPointer aliasRelativePathCopy(new UnicodeString(aliasRelativePath), errorCode); - aliasPathPairs.addElementX(aliasRelativePathCopy.getAlias(), errorCode); + LocalPointer aliasRelativePathCopy(aliasRelativePath.clone(), errorCode); + aliasPathPairs.adoptElement(aliasRelativePathCopy.orphan(), errorCode); if (U_FAILURE(errorCode)) { return; } - // Only release ownership after aliasPathPairs takes it (no error happened): - aliasRelativePathCopy.orphan(); - LocalPointer pathCopy(new UnicodeString(path), errorCode); - aliasPathPairs.addElementX(pathCopy.getAlias(), errorCode); + LocalPointer pathCopy(path.clone(), errorCode); + aliasPathPairs.adoptElement(pathCopy.orphan(), errorCode); if (U_FAILURE(errorCode)) { return; } - // Only release ownership after aliasPathPairs takes it (no error happened): - pathCopy.orphan(); // Drop the latest key on the path and continue path.retainBetween(0, pathLength); diff --git a/deps/icu-small/source/i18n/dtitvfmt.cpp b/deps/icu-small/source/i18n/dtitvfmt.cpp index 298fb62be0cc22..d51ddcd5c70356 100644 --- a/deps/icu-small/source/i18n/dtitvfmt.cpp +++ b/deps/icu-small/source/i18n/dtitvfmt.cpp @@ -965,16 +965,22 @@ DateIntervalFormat::normalizeHourMetacharacters(const UnicodeString& skeleton) c UnicodeString result = skeleton; UChar hourMetachar = u'\0'; + UChar dayPeriodChar = u'\0'; int32_t metacharStart = 0; int32_t metacharCount = 0; for (int32_t i = 0; i < result.length(); i++) { UChar c = result[i]; - if (c == LOW_J || c == CAP_J || c == CAP_C) { + if (c == LOW_J || c == CAP_J || c == CAP_C || c == LOW_H || c == CAP_H || c == LOW_K || c == CAP_K) { if (hourMetachar == u'\0') { hourMetachar = c; metacharStart = i; } ++metacharCount; + } else if (c == LOW_A || c == LOW_B || c == CAP_B) { + if (dayPeriodChar == u'\0') { + dayPeriodChar = c; + } + ++metacharCount; } else { if (hourMetachar != u'\0') { break; @@ -985,7 +991,6 @@ DateIntervalFormat::normalizeHourMetacharacters(const UnicodeString& skeleton) c if (hourMetachar != u'\0') { UErrorCode err = U_ZERO_ERROR; UChar hourChar = CAP_H; - UChar dayPeriodChar = LOW_A; UnicodeString convertedPattern = DateFormat::getBestPattern(fLocale, UnicodeString(hourMetachar), err); if (U_SUCCESS(err)) { @@ -1012,6 +1017,8 @@ DateIntervalFormat::normalizeHourMetacharacters(const UnicodeString& skeleton) c dayPeriodChar = LOW_B; } else if (convertedPattern.indexOf(CAP_B) != -1) { dayPeriodChar = CAP_B; + } else if (dayPeriodChar == u'\0') { + dayPeriodChar = LOW_A; } } diff --git a/deps/icu-small/source/i18n/dtptngen.cpp b/deps/icu-small/source/i18n/dtptngen.cpp index 6aee1750f90617..e781c6e26f5fcb 100644 --- a/deps/icu-small/source/i18n/dtptngen.cpp +++ b/deps/icu-small/source/i18n/dtptngen.cpp @@ -273,8 +273,6 @@ static const char* const CLDR_FIELD_WIDTH[] = { // [UDATPG_WIDTH_COUNT] "", "-short", "-narrow" }; -// TODO(ticket:13619): remove when definition uncommented in dtptngen.h. -static const int32_t UDATPG_WIDTH_COUNT = UDATPG_NARROW + 1; static constexpr UDateTimePGDisplayWidth UDATPG_WIDTH_APPENDITEM = UDATPG_WIDE; static constexpr int32_t UDATPG_FIELD_KEY_MAX = 24; // max length of CLDR field tag (type + width) @@ -393,10 +391,13 @@ DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) { *fp = *(other.fp); dtMatcher->copyFrom(other.dtMatcher->skeleton); *distanceInfo = *(other.distanceInfo); - dateTimeFormat = other.dateTimeFormat; + for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) { + dateTimeFormat[style] = other.dateTimeFormat[style]; + } decimal = other.decimal; - // NUL-terminate for the C API. - dateTimeFormat.getTerminatedBuffer(); + for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) { + dateTimeFormat[style].getTerminatedBuffer(); // NUL-terminate for the C API. + } decimal.getTerminatedBuffer(); delete skipMatcher; if ( other.skipMatcher == nullptr ) { @@ -430,7 +431,12 @@ DateTimePatternGenerator::operator==(const DateTimePatternGenerator& other) cons return true; } if ((pLocale==other.pLocale) && (patternMap->equals(*other.patternMap)) && - (dateTimeFormat==other.dateTimeFormat) && (decimal==other.decimal)) { + (decimal==other.decimal)) { + for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) { + if (dateTimeFormat[style] != other.dateTimeFormat[style]) { + return false; + } + } for ( int32_t i=0 ; igetSkeletonPtr(); + UDateFormatStyle style = UDAT_SHORT; + int32_t monthFieldLen = reqSkeleton->baseOriginal.getFieldLength(UDATPG_MONTH_FIELD); + if (monthFieldLen == 4) { + if (reqSkeleton->baseOriginal.getFieldLength(UDATPG_WEEKDAY_FIELD) > 0) { + style = UDAT_FULL; + } else { + style = UDAT_LONG; + } + } else if (monthFieldLen == 3) { + style = UDAT_MEDIUM; + } + // and now use it to compose date and time + dtFormat=getDateTimeFormat(style, status); SimpleFormatter(dtFormat, 2, 2, status).format(timePattern, datePattern, resultPattern, status); return resultPattern; } @@ -1335,14 +1355,45 @@ DateTimePatternGenerator::addCanonicalItems(UErrorCode& status) { void DateTimePatternGenerator::setDateTimeFormat(const UnicodeString& dtFormat) { - dateTimeFormat = dtFormat; - // NUL-terminate for the C API. - dateTimeFormat.getTerminatedBuffer(); + UErrorCode status = U_ZERO_ERROR; + for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) { + setDateTimeFormat((UDateFormatStyle)style, dtFormat, status); + } } const UnicodeString& DateTimePatternGenerator::getDateTimeFormat() const { - return dateTimeFormat; + UErrorCode status = U_ZERO_ERROR; + return getDateTimeFormat(UDAT_MEDIUM, status); +} + +void +DateTimePatternGenerator::setDateTimeFormat(UDateFormatStyle style, const UnicodeString& dtFormat, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + if (style < UDAT_FULL || style > UDAT_SHORT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + dateTimeFormat[style] = dtFormat; + // Note for the following: getTerminatedBuffer() can re-allocate the UnicodeString + // buffer so we do this here before clients request a const ref to the UnicodeString + // or its buffer. + dateTimeFormat[style].getTerminatedBuffer(); // NUL-terminate for the C API. +} + +const UnicodeString& +DateTimePatternGenerator::getDateTimeFormat(UDateFormatStyle style, UErrorCode& status) const { + static const UnicodeString emptyString = UNICODE_STRING_SIMPLE(""); + if (U_FAILURE(status)) { + return emptyString; + } + if (style < UDAT_FULL || style > UDAT_SHORT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return emptyString; + } + return dateTimeFormat[style]; } void @@ -1378,13 +1429,15 @@ DateTimePatternGenerator::setDateTimeFromCalendar(const Locale& locale, UErrorCo } if (U_FAILURE(status)) { return; } - if (ures_getSize(dateTimePatterns.getAlias()) <= DateFormat::kDateTime) + if (ures_getSize(dateTimePatterns.getAlias()) <= DateFormat::kDateTimeOffset + DateFormat::kShort) { status = U_INVALID_FORMAT_ERROR; return; } - resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), (int32_t)DateFormat::kDateTime, &resStrLen, &status); - setDateTimeFormat(UnicodeString(TRUE, resStr, resStrLen)); + for (int32_t style = UDAT_FULL; style <= UDAT_SHORT; style++) { + resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), (int32_t)DateFormat::kDateTimeOffset + style, &resStrLen, &status); + setDateTimeFormat((UDateFormatStyle)style, UnicodeString(TRUE, resStr, resStrLen), status); + } } void @@ -2788,16 +2841,17 @@ DTSkeletonEnumeration::DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum t break; } if ( !isCanonicalItem(s) ) { - LocalPointer newElem(new UnicodeString(s), status); + LocalPointer newElem(s.clone(), status); if (U_FAILURE(status)) { return; } - fSkeletons->addElementX(newElem.getAlias(), status); + fSkeletons->addElement(newElem.getAlias(), status); if (U_FAILURE(status)) { fSkeletons.adoptInstead(nullptr); return; } - newElem.orphan(); // fSkeletons vector now owns the UnicodeString. + newElem.orphan(); // fSkeletons vector now owns the UnicodeString (although it + // does not use a deleter function to manage the ownership). } curElem = curElem->next.getAlias(); } @@ -2865,12 +2919,13 @@ DTRedundantEnumeration::add(const UnicodeString& pattern, UErrorCode& status) { if (U_FAILURE(status)) { return; } - fPatterns->addElementX(newElem.getAlias(), status); + fPatterns->addElement(newElem.getAlias(), status); if (U_FAILURE(status)) { fPatterns.adoptInstead(nullptr); return; } - newElem.orphan(); // fPatterns now owns the string. + newElem.orphan(); // fPatterns now owns the string, although a UVector + // deleter function is not used to manage that ownership. } const UnicodeString* diff --git a/deps/icu-small/source/i18n/formattedval_sbimpl.cpp b/deps/icu-small/source/i18n/formattedval_sbimpl.cpp index 70ffacac4b7416..72197cdd8c7abf 100644 --- a/deps/icu-small/source/i18n/formattedval_sbimpl.cpp +++ b/deps/icu-small/source/i18n/formattedval_sbimpl.cpp @@ -230,6 +230,11 @@ bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& if (si + 1 < spanIndicesCount) { nextSpanStart = spanIndices[si + 1].start; } + if (length == 0) { + // ICU-21871: Don't return fields on empty spans + i--; + continue; + } if (cfpos.matchesField(spanCategory, spanValue)) { fieldStart = i - fString.fZero; int32_t end = fieldStart + length; diff --git a/deps/icu-small/source/i18n/measunit_extra.cpp b/deps/icu-small/source/i18n/measunit_extra.cpp index 8281119007949a..2df9edee96a8a6 100644 --- a/deps/icu-small/source/i18n/measunit_extra.cpp +++ b/deps/icu-small/source/i18n/measunit_extra.cpp @@ -615,7 +615,7 @@ class Parser { // Set to true when we've seen a "-per-" or a "per-", after which all units // are in the denominator. Until we find an "-and-", at which point the - // identifier is invalid pending TODO(CLDR-13700). + // identifier is invalid pending TODO(CLDR-13701). bool fAfterPer = false; Parser() : fSource(""), fTrie(u"") {} @@ -669,7 +669,7 @@ class Parser { * dimensionality. * * Returns an error if we parse both compound units and "-and-", since mixed - * compound units are not yet supported - TODO(CLDR-13700). + * compound units are not yet supported - TODO(CLDR-13701). * * @param result Will be overwritten by the result, if status shows success. * @param sawAnd If an "-and-" was parsed prior to finding the "single diff --git a/deps/icu-small/source/i18n/msgfmt.cpp b/deps/icu-small/source/i18n/msgfmt.cpp index b8cb2e2ca560fe..13a5a0895160fb 100644 --- a/deps/icu-small/source/i18n/msgfmt.cpp +++ b/deps/icu-small/source/i18n/msgfmt.cpp @@ -854,19 +854,21 @@ StringEnumeration* MessageFormat::getFormatNames(UErrorCode& status) { if (U_FAILURE(status)) return NULL; - UVector *fFormatNames = new UVector(status); + LocalPointer formatNames(new UVector(status), status); if (U_FAILURE(status)) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } - fFormatNames->setDeleter(uprv_deleteUObject); + formatNames->setDeleter(uprv_deleteUObject); for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { - fFormatNames->addElementX(new UnicodeString(getArgName(partIndex + 1)), status); + LocalPointer name(getArgName(partIndex + 1).clone(), status); + formatNames->adoptElement(name.orphan(), status); + if (U_FAILURE(status)) return nullptr; } - StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); - return nameEnumerator; + LocalPointer nameEnumerator( + new FormatNameEnumeration(std::move(formatNames), status), status); + return U_SUCCESS(status) ? nameEnumerator.orphan() : nullptr; } // ------------------------------------- @@ -1912,9 +1914,9 @@ void MessageFormat::DummyFormat::parseObject(const UnicodeString&, } -FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) { +FormatNameEnumeration::FormatNameEnumeration(LocalPointer nameList, UErrorCode& /*status*/) { pos=0; - fFormatNames = fNameList; + fFormatNames = std::move(nameList); } const UnicodeString* @@ -1936,7 +1938,6 @@ FormatNameEnumeration::count(UErrorCode& /*status*/) const { } FormatNameEnumeration::~FormatNameEnumeration() { - delete fFormatNames; } MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const MessageFormat &mf, UPluralType t) diff --git a/deps/icu-small/source/i18n/msgfmt_impl.h b/deps/icu-small/source/i18n/msgfmt_impl.h index 57988389132a67..80cb8a691eb1dd 100644 --- a/deps/icu-small/source/i18n/msgfmt_impl.h +++ b/deps/icu-small/source/i18n/msgfmt_impl.h @@ -26,7 +26,7 @@ U_NAMESPACE_BEGIN class FormatNameEnumeration : public StringEnumeration { public: - FormatNameEnumeration(UVector *fFormatNames, UErrorCode& status); + FormatNameEnumeration(LocalPointer fFormatNames, UErrorCode& status); virtual ~FormatNameEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); virtual UClassID getDynamicClassID(void) const override; @@ -35,7 +35,7 @@ class FormatNameEnumeration : public StringEnumeration { virtual int32_t count(UErrorCode& status) const override; private: int32_t pos; - UVector *fFormatNames; + LocalPointer fFormatNames; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/number_affixutils.cpp b/deps/icu-small/source/i18n/number_affixutils.cpp index f9c154c885cd8a..5f5ff4c3a63422 100644 --- a/deps/icu-small/source/i18n/number_affixutils.cpp +++ b/deps/icu-small/source/i18n/number_affixutils.cpp @@ -135,8 +135,7 @@ Field AffixUtils::getFieldForType(AffixPatternType type) { case TYPE_PLUS_SIGN: return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD}; case TYPE_APPROXIMATELY_SIGN: - // TODO: Introduce a new field for the approximately sign? - return {UFIELD_CATEGORY_NUMBER, UNUM_SIGN_FIELD}; + return {UFIELD_CATEGORY_NUMBER, UNUM_APPROXIMATELY_SIGN_FIELD}; case TYPE_PERCENT: return {UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD}; case TYPE_PERMILLE: diff --git a/deps/icu-small/source/i18n/number_compact.cpp b/deps/icu-small/source/i18n/number_compact.cpp index 62692f444dff07..60cd7bedf667b0 100644 --- a/deps/icu-small/source/i18n/number_compact.cpp +++ b/deps/icu-small/source/i18n/number_compact.cpp @@ -157,8 +157,8 @@ void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const { } // The string was not found; add it to the UVector. - // ANDY: This requires a const_cast. Why? - output.addElementX(const_cast(pattern), status); + // Note: must cast off const from pattern to store it in a UVector, which expects (void *) + output.addElement(const_cast(pattern), status); continue_outer: continue; diff --git a/deps/icu-small/source/i18n/number_decimalquantity.cpp b/deps/icu-small/source/i18n/number_decimalquantity.cpp index 6a2847b1c18f19..b40e1276c350fe 100644 --- a/deps/icu-small/source/i18n/number_decimalquantity.cpp +++ b/deps/icu-small/source/i18n/number_decimalquantity.cpp @@ -181,20 +181,22 @@ uint64_t DecimalQuantity::getPositionFingerprint() const { return fingerprint; } -void DecimalQuantity::roundToIncrement(double roundingIncrement, RoundingMode roundingMode, - UErrorCode& status) { +void DecimalQuantity::roundToIncrement( + uint64_t increment, + digits_t magnitude, + RoundingMode roundingMode, + UErrorCode& status) { // Do not call this method with an increment having only a 1 or a 5 digit! // Use a more efficient call to either roundToMagnitude() or roundToNickel(). // Check a few popular rounding increments; a more thorough check is in Java. - U_ASSERT(roundingIncrement != 0.01); - U_ASSERT(roundingIncrement != 0.05); - U_ASSERT(roundingIncrement != 0.1); - U_ASSERT(roundingIncrement != 0.5); - U_ASSERT(roundingIncrement != 1); - U_ASSERT(roundingIncrement != 5); + U_ASSERT(increment != 1); + U_ASSERT(increment != 5); + DecimalQuantity incrementDQ; + incrementDQ.setToLong(increment); + incrementDQ.adjustMagnitude(magnitude); DecNum incrementDN; - incrementDN.setTo(roundingIncrement, status); + incrementDQ.toDecNum(incrementDN, status); if (U_FAILURE(status)) { return; } // Divide this DecimalQuantity by the increment, round, then multiply back. @@ -254,6 +256,12 @@ bool DecimalQuantity::adjustMagnitude(int32_t delta) { return false; } +int32_t DecimalQuantity::adjustToZeroScale() { + int32_t retval = scale; + scale = 0; + return retval; +} + double DecimalQuantity::getPluralOperand(PluralOperand operand) const { // If this assertion fails, you need to call roundToInfinity() or some other rounding method. // See the comment at the top of this file explaining the "isApproximate" field. @@ -549,6 +557,65 @@ void DecimalQuantity::_setToDecNum(const DecNum& decnum, UErrorCode& status) { } } +DecimalQuantity DecimalQuantity::fromExponentString(UnicodeString num, UErrorCode& status) { + if (num.indexOf(u'e') >= 0 || num.indexOf(u'c') >= 0 + || num.indexOf(u'E') >= 0 || num.indexOf(u'C') >= 0) { + int32_t ePos = num.lastIndexOf('e'); + if (ePos < 0) { + ePos = num.lastIndexOf('c'); + } + if (ePos < 0) { + ePos = num.lastIndexOf('E'); + } + if (ePos < 0) { + ePos = num.lastIndexOf('C'); + } + int32_t expNumPos = ePos + 1; + UnicodeString exponentStr = num.tempSubString(expNumPos, num.length() - expNumPos); + + // parse exponentStr into exponent, but note that parseAsciiInteger doesn't handle the minus sign + bool isExpStrNeg = num[expNumPos] == u'-'; + int32_t exponentParsePos = isExpStrNeg ? 1 : 0; + int32_t exponent = ICU_Utility::parseAsciiInteger(exponentStr, exponentParsePos); + exponent = isExpStrNeg ? -exponent : exponent; + + // Compute the decNumber representation + UnicodeString fractionStr = num.tempSubString(0, ePos); + CharString fracCharStr = CharString(); + fracCharStr.appendInvariantChars(fractionStr, status); + DecNum decnum; + decnum.setTo(fracCharStr.toStringPiece(), status); + + // Clear and set this DecimalQuantity instance + DecimalQuantity dq; + dq.setToDecNum(decnum, status); + int32_t numFracDigit = getVisibleFractionCount(fractionStr); + dq.setMinFraction(numFracDigit); + dq.adjustExponent(exponent); + + return dq; + } else { + DecimalQuantity dq; + int numFracDigit = getVisibleFractionCount(num); + + CharString numCharStr = CharString(); + numCharStr.appendInvariantChars(num, status); + dq.setToDecNumber(numCharStr.toStringPiece(), status); + + dq.setMinFraction(numFracDigit); + return dq; + } +} + +int32_t DecimalQuantity::getVisibleFractionCount(UnicodeString value) { + int decimalPos = value.indexOf('.') + 1; + if (decimalPos == 0) { + return 0; + } else { + return value.length() - decimalPos; + } +} + int64_t DecimalQuantity::toLong(bool truncateIfOverflow) const { // NOTE: Call sites should be guarded by fitsInLong(), like this: // if (dq.fitsInLong()) { /* use dq.toLong() */ } else { /* use some fallback */ } @@ -948,6 +1015,44 @@ UnicodeString DecimalQuantity::toPlainString() const { return sb; } + +UnicodeString DecimalQuantity::toExponentString() const { + U_ASSERT(!isApproximate); + UnicodeString sb; + if (isNegative()) { + sb.append(u'-'); + } + + int32_t upper = scale + precision - 1; + int32_t lower = scale; + if (upper < lReqPos - 1) { + upper = lReqPos - 1; + } + if (lower > rReqPos) { + lower = rReqPos; + } + int32_t p = upper; + if (p < 0) { + sb.append(u'0'); + } + for (; p >= 0; p--) { + sb.append(u'0' + getDigitPos(p - scale)); + } + if (lower < 0) { + sb.append(u'.'); + } + for(; p >= lower; p--) { + sb.append(u'0' + getDigitPos(p - scale)); + } + + if (exponent != 0) { + sb.append(u'c'); + ICU_Utility::appendNumber(sb, exponent); + } + + return sb; +} + UnicodeString DecimalQuantity::toScientificString() const { U_ASSERT(!isApproximate); UnicodeString result; diff --git a/deps/icu-small/source/i18n/number_decimalquantity.h b/deps/icu-small/source/i18n/number_decimalquantity.h index 107c09a96a53d2..862addf5d6cd90 100644 --- a/deps/icu-small/source/i18n/number_decimalquantity.h +++ b/deps/icu-small/source/i18n/number_decimalquantity.h @@ -81,11 +81,15 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { * *

If rounding to a power of ten, use the more efficient {@link #roundToMagnitude} instead. * - * @param roundingIncrement The increment to which to round. + * @param increment The increment to which to round. + * @param magnitude The power of 10 to which to round. * @param roundingMode The {@link RoundingMode} to use if rounding is necessary. */ - void roundToIncrement(double roundingIncrement, RoundingMode roundingMode, - UErrorCode& status); + void roundToIncrement( + uint64_t increment, + digits_t magnitude, + RoundingMode roundingMode, + UErrorCode& status); /** Removes all fraction digits. */ void truncate(); @@ -140,6 +144,13 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { */ bool adjustMagnitude(int32_t delta); + /** + * Scales the number such that the least significant nonzero digit is at magnitude 0. + * + * @return The previous magnitude of the least significant digit. + */ + int32_t adjustToZeroScale(); + /** * @return The power of ten corresponding to the most significant nonzero digit. * The number must not be zero. @@ -234,6 +245,9 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { /** Internal method if the caller already has a DecNum. */ DecimalQuantity &setToDecNum(const DecNum& n, UErrorCode& status); + /** Returns a DecimalQuantity after parsing the input string. */ + static DecimalQuantity fromExponentString(UnicodeString n, UErrorCode& status); + /** * Appends a digit, optionally with one or more leading zeros, to the end of the value represented * by this DecimalQuantity. @@ -315,6 +329,10 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { /** Returns the string without exponential notation. Slightly slower than toScientificString(). */ UnicodeString toPlainString() const; + /** Returns the string using ASCII digits and using exponential notation for non-zero + exponents, following the UTS 35 specification for plural rule samples. */ + UnicodeString toExponentString() const; + /** Visible for testing */ inline bool isUsingBytes() { return usingBytes; } @@ -518,6 +536,8 @@ class U_I18N_API DecimalQuantity : public IFixedDecimal, public UMemory { void _setToDecNum(const DecNum& dn, UErrorCode& status); + static int32_t getVisibleFractionCount(UnicodeString value); + void convertToAccurateDouble(); /** Ensure that a byte array of at least 40 digits is allocated. */ diff --git a/deps/icu-small/source/i18n/number_longnames.cpp b/deps/icu-small/source/i18n/number_longnames.cpp index 5a4cf6321c8a40..b4e96504dede98 100644 --- a/deps/icu-small/source/i18n/number_longnames.cpp +++ b/deps/icu-small/source/i18n/number_longnames.cpp @@ -431,13 +431,33 @@ void getMeasureData(const Locale &locale, subKey.append(unit.getType(), status); subKey.append("/", status); + // Check if unitSubType is an alias or not. + LocalUResourceBundlePointer aliasBundle(ures_open(U_ICUDATA_ALIAS, "metadata", &status)); + + UErrorCode aliasStatus = status; + StackUResourceBundle aliasFillIn; + CharString aliasKey; + aliasKey.append("alias/unit/", aliasStatus); + aliasKey.append(unit.getSubtype(), aliasStatus); + aliasKey.append("/replacement", aliasStatus); + ures_getByKeyWithFallback(aliasBundle.getAlias(), aliasKey.data(), aliasFillIn.getAlias(), + &aliasStatus); + CharString unitSubType; + if (!U_FAILURE(aliasStatus)) { + // This means the subType is an alias. Then, replace unitSubType with the replacement. + auto replacement = ures_getUnicodeString(aliasFillIn.getAlias(), &status); + unitSubType.appendInvariantChars(replacement, status); + } else { + unitSubType.append(unit.getSubtype(), status); + } + // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ... // TODO(ICU-20400): Get duration-*-person data properly with aliases. - int32_t subtypeLen = static_cast(uprv_strlen(unit.getSubtype())); - if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) { - subKey.append({unit.getSubtype(), subtypeLen - 7}, status); + int32_t subtypeLen = static_cast(uprv_strlen(unitSubType.data())); + if (subtypeLen > 7 && uprv_strcmp(unitSubType.data() + subtypeLen - 7, "-person") == 0) { + subKey.append({unitSubType.data(), subtypeLen - 7}, status); } else { - subKey.append({unit.getSubtype(), subtypeLen}, status); + subKey.append({unitSubType.data(), subtypeLen}, status); } if (width != UNUM_UNIT_WIDTH_FULL_NAME) { diff --git a/deps/icu-small/source/i18n/number_mapper.cpp b/deps/icu-small/source/i18n/number_mapper.cpp index 2d4d47a094d999..350c431dfdd079 100644 --- a/deps/icu-small/source/i18n/number_mapper.cpp +++ b/deps/icu-small/source/i18n/number_mapper.cpp @@ -134,7 +134,8 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert if (PatternStringUtils::ignoreRoundingIncrement(roundingIncrement, maxFrac)) { precision = Precision::constructFraction(minFrac, maxFrac); } else { - precision = Precision::constructIncrement(roundingIncrement, minFrac); + // Convert the double increment to an integer increment + precision = Precision::increment(roundingIncrement).withMinFraction(minFrac); } } else if (explicitMinMaxSig) { minSig = minSig < 1 ? 1 : minSig > kMaxIntFracSig ? kMaxIntFracSig : minSig; @@ -293,9 +294,14 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert } else if (rounding_.fType == Precision::PrecisionType::RND_INCREMENT || rounding_.fType == Precision::PrecisionType::RND_INCREMENT_ONE || rounding_.fType == Precision::PrecisionType::RND_INCREMENT_FIVE) { - increment_ = rounding_.fUnion.increment.fIncrement; minFrac_ = rounding_.fUnion.increment.fMinFrac; + // If incrementRounding is used, maxFrac is set equal to minFrac maxFrac_ = rounding_.fUnion.increment.fMinFrac; + // Convert the integer increment to a double + DecimalQuantity dq; + dq.setToLong(rounding_.fUnion.increment.fIncrement); + dq.adjustMagnitude(rounding_.fUnion.increment.fIncrementMagnitude); + increment_ = dq.toDouble(); } else if (rounding_.fType == Precision::PrecisionType::RND_SIGNIFICANT) { minSig_ = rounding_.fUnion.fracSig.fMinSig; maxSig_ = rounding_.fUnion.fracSig.fMaxSig; diff --git a/deps/icu-small/source/i18n/number_output.cpp b/deps/icu-small/source/i18n/number_output.cpp index 2c2c25eaedb427..78006da8c42f0a 100644 --- a/deps/icu-small/source/i18n/number_output.cpp +++ b/deps/icu-small/source/i18n/number_output.cpp @@ -39,6 +39,49 @@ MeasureUnit FormattedNumber::getOutputUnit(UErrorCode& status) const { return fData->outputUnit; } +NounClass FormattedNumber::getNounClass(UErrorCode &status) const { + UPRV_FORMATTED_VALUE_METHOD_GUARD(NounClass::OTHER); + const char *nounClass = fData->gender; + + // if it is not exist, return `OTHER` + if (uprv_strcmp(nounClass, "") == 0) { + return NounClass::OTHER; + } + + if (uprv_strcmp(nounClass, "neuter") == 0) { + return NounClass::NEUTER; + } + + if (uprv_strcmp(nounClass, "feminine") == 0) { + return NounClass::FEMININE; + } + + if (uprv_strcmp(nounClass, "masculine") == 0) { + return NounClass::MASCULINE; + } + + if (uprv_strcmp(nounClass, "animate") == 0) { + return NounClass::ANIMATE; + } + + if (uprv_strcmp(nounClass, "inanimate") == 0) { + return NounClass::INANIMATE; + } + + if (uprv_strcmp(nounClass, "personal") == 0) { + return NounClass::PERSONAL; + } + + if (uprv_strcmp(nounClass, "common") == 0) { + return NounClass::COMMON; + } + + // In case there is no matching, this means there are noun classes + // that are not supported yet. + status = U_INTERNAL_PROGRAM_ERROR; + return NounClass::OTHER; +} + const char *FormattedNumber::getGender(UErrorCode &status) const { UPRV_FORMATTED_VALUE_METHOD_GUARD("") return fData->gender; diff --git a/deps/icu-small/source/i18n/number_patternstring.cpp b/deps/icu-small/source/i18n/number_patternstring.cpp index e819d39e96769d..2738895d8ad03f 100644 --- a/deps/icu-small/source/i18n/number_patternstring.cpp +++ b/deps/icu-small/source/i18n/number_patternstring.cpp @@ -750,7 +750,7 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP int32_t groupingLength = grouping1 + grouping2 + 1; // Figure out the digits we need to put in the pattern. - double roundingInterval = properties.roundingIncrement; + double increment = properties.roundingIncrement; UnicodeString digitsString; int32_t digitsStringScale = 0; if (maxSig != uprv_min(dosMax, -1)) { @@ -761,14 +761,14 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP while (digitsString.length() < maxSig) { digitsString.append(u'#'); } - } else if (roundingInterval != 0.0 && !ignoreRoundingIncrement(roundingInterval,maxFrac)) { - // Rounding Interval. - digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval, nullptr); - // TODO: Check for DoS here? + } else if (increment != 0.0 && !ignoreRoundingIncrement(increment,maxFrac)) { + // Rounding Increment. DecimalQuantity incrementQuantity; - incrementQuantity.setToDouble(roundingInterval); + incrementQuantity.setToDouble(increment); + incrementQuantity.roundToInfinity(); + digitsStringScale = incrementQuantity.getLowerDisplayMagnitude(); incrementQuantity.adjustMagnitude(-digitsStringScale); - incrementQuantity.roundToMagnitude(0, kDefaultMode, status); + incrementQuantity.setMinInteger(minInt - digitsStringScale); UnicodeString str = incrementQuantity.toPlainString(); if (str.charAt(0) == u'-') { // TODO: Unsupported operation exception or fail silently? diff --git a/deps/icu-small/source/i18n/number_rounding.cpp b/deps/icu-small/source/i18n/number_rounding.cpp index 877df63c8f68ef..a9b3f16c050d94 100644 --- a/deps/icu-small/source/i18n/number_rounding.cpp +++ b/deps/icu-small/source/i18n/number_rounding.cpp @@ -36,27 +36,24 @@ void number::impl::parseIncrementOption(const StringSegment &segment, // Utilize DecimalQuantity/decNumber to parse this for us. DecimalQuantity dq; UErrorCode localStatus = U_ZERO_ERROR; - DecNum decnum; - decnum.setTo({buffer.data(), buffer.length()}, localStatus); - dq.setToDecNum(decnum, localStatus); - if (U_FAILURE(localStatus) || decnum.isSpecial()) { + dq.setToDecNumber({buffer.data(), buffer.length()}, localStatus); + if (U_FAILURE(localStatus) || dq.isNaN() || dq.isInfinite()) { // throw new SkeletonSyntaxException("Invalid rounding increment", segment, e); status = U_NUMBER_SKELETON_SYNTAX_ERROR; return; } - double increment = dq.toDouble(); - - // We also need to figure out how many digits. Do a brute force string operation. - int decimalOffset = 0; - while (decimalOffset < segment.length() && segment.charAt(decimalOffset) != '.') { - decimalOffset++; - } - if (decimalOffset == segment.length()) { - outPrecision = Precision::increment(increment); - } else { - int32_t fractionLength = segment.length() - decimalOffset - 1; - outPrecision = Precision::increment(increment).withMinFraction(fractionLength); + // Now we break apart the number into a mantissa and exponent (magnitude). + int32_t magnitude = dq.adjustToZeroScale(); + // setToDecNumber drops trailing zeros, so we search for the '.' manually. + for (int32_t i=0; i(length - point); -} - - Precision Precision::unlimited() { return Precision(RND_NONE, {}); } @@ -204,7 +173,19 @@ Precision Precision::trailingZeroDisplay(UNumberTrailingZeroDisplay trailingZero IncrementPrecision Precision::increment(double roundingIncrement) { if (roundingIncrement > 0.0) { - return constructIncrement(roundingIncrement, 0); + DecimalQuantity dq; + dq.setToDouble(roundingIncrement); + dq.roundToInfinity(); + int32_t magnitude = dq.adjustToZeroScale(); + return constructIncrement(dq.toLong(), magnitude); + } else { + return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; + } +} + +IncrementPrecision Precision::incrementExact(uint64_t mantissa, int16_t magnitude) { + if (mantissa > 0.0) { + return constructIncrement(mantissa, magnitude); } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -226,7 +207,8 @@ Precision FractionPrecision::withSignificantDigits( *this, minSignificantDigits, maxSignificantDigits, - priority); + priority, + false); } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -239,7 +221,8 @@ Precision FractionPrecision::withMinDigits(int32_t minSignificantDigits) const { *this, 1, minSignificantDigits, - UNUM_ROUNDING_PRIORITY_RELAXED); + UNUM_ROUNDING_PRIORITY_RELAXED, + true); } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -251,7 +234,8 @@ Precision FractionPrecision::withMaxDigits(int32_t maxSignificantDigits) const { return constructFractionSignificant(*this, 1, maxSignificantDigits, - UNUM_ROUNDING_PRIORITY_STRICT); + UNUM_ROUNDING_PRIORITY_STRICT, + true); } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -266,8 +250,8 @@ Precision Precision::withCurrency(const CurrencyUnit ¤cy, UErrorCode &stat int32_t minMaxFrac = ucurr_getDefaultFractionDigitsForUsage( isoCode, fUnion.currencyUsage, &status); Precision retval = (increment != 0.0) - ? static_cast(constructIncrement(increment, minMaxFrac)) - : static_cast(constructFraction(minMaxFrac, minMaxFrac)); + ? Precision::increment(increment) + : static_cast(Precision::fixedFraction(minMaxFrac)); retval.fTrailingZeroDisplay = fTrailingZeroDisplay; return retval; } @@ -285,7 +269,9 @@ Precision CurrencyPrecision::withCurrency(const CurrencyUnit ¤cy) const { Precision IncrementPrecision::withMinFraction(int32_t minFrac) const { if (fType == RND_ERROR) { return *this; } // no-op in error state if (minFrac >= 0 && minFrac <= kMaxIntFracSig) { - return constructIncrement(fUnion.increment.fIncrement, minFrac); + IncrementPrecision copy = *this; + copy.fUnion.increment.fMinFrac = minFrac; + return copy; } else { return {U_NUMBER_ARG_OUTOFBOUNDS_ERROR}; } @@ -318,35 +304,34 @@ Precision::constructFractionSignificant( const FractionPrecision &base, int32_t minSig, int32_t maxSig, - UNumberRoundingPriority priority) { + UNumberRoundingPriority priority, + bool retain) { FractionSignificantSettings settings = base.fUnion.fracSig; settings.fMinSig = static_cast(minSig); settings.fMaxSig = static_cast(maxSig); settings.fPriority = priority; + settings.fRetain = retain; PrecisionUnion union_; union_.fracSig = settings; return {RND_FRACTION_SIGNIFICANT, union_}; } -IncrementPrecision Precision::constructIncrement(double increment, int32_t minFrac) { +IncrementPrecision Precision::constructIncrement(uint64_t increment, digits_t magnitude) { IncrementSettings settings; // Note: For number formatting, fIncrement is used for RND_INCREMENT but not // RND_INCREMENT_ONE or RND_INCREMENT_FIVE. However, fIncrement is used in all // three when constructing a skeleton. settings.fIncrement = increment; - settings.fMinFrac = static_cast(minFrac); - // One of the few pre-computed quantities: - // Note: it is possible for minFrac to be more than maxFrac... (misleading) - int8_t singleDigit; - settings.fMaxFrac = roundingutils::doubleFractionLength(increment, &singleDigit); + settings.fIncrementMagnitude = magnitude; + settings.fMinFrac = magnitude > 0 ? 0 : -magnitude; PrecisionUnion union_; union_.increment = settings; - if (singleDigit == 1) { + if (increment == 1) { // NOTE: In C++, we must return the correct value type with the correct union. // It would be invalid to return a RND_FRACTION here because the methods on the // IncrementPrecision type assume that the union is backed by increment data. return {RND_INCREMENT_ONE, union_}; - } else if (singleDigit == 5) { + } else if (increment == 5) { return {RND_INCREMENT_FIVE, union_}; } else { return {RND_INCREMENT, union_}; @@ -457,6 +442,23 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const break; case Precision::RND_FRACTION_SIGNIFICANT: { + // From ECMA-402: + /* + Let sResult be ToRawPrecision(...). + Let fResult be ToRawFixed(...). + If intlObj.[[RoundingType]] is morePrecision, then + If sResult.[[RoundingMagnitude]] ≤ fResult.[[RoundingMagnitude]], then + Let result be sResult. + Else, + Let result be fResult. + Else, + Assert: intlObj.[[RoundingType]] is lessPrecision. + If sResult.[[RoundingMagnitude]] ≤ fResult.[[RoundingMagnitude]], then + Let result be fResult. + Else, + Let result be sResult. + */ + int32_t roundingMag1 = getRoundingMagnitudeFraction(fPrecision.fUnion.fracSig.fMaxFrac); int32_t roundingMag2 = getRoundingMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMaxSig); int32_t roundingMag; @@ -465,11 +467,35 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const } else { roundingMag = uprv_max(roundingMag1, roundingMag2); } - value.roundToMagnitude(roundingMag, fRoundingMode, status); + if (!value.isZeroish()) { + int32_t upperMag = value.getMagnitude(); + value.roundToMagnitude(roundingMag, fRoundingMode, status); + if (!value.isZeroish() && value.getMagnitude() != upperMag && roundingMag1 == roundingMag2) { + // roundingMag2 needs to be the magnitude after rounding + roundingMag2 += 1; + } + } int32_t displayMag1 = getDisplayMagnitudeFraction(fPrecision.fUnion.fracSig.fMinFrac); int32_t displayMag2 = getDisplayMagnitudeSignificant(value, fPrecision.fUnion.fracSig.fMinSig); - int32_t displayMag = uprv_min(displayMag1, displayMag2); + int32_t displayMag; + if (fPrecision.fUnion.fracSig.fRetain) { + // withMinDigits + withMaxDigits + displayMag = uprv_min(displayMag1, displayMag2); + } else if (fPrecision.fUnion.fracSig.fPriority == UNUM_ROUNDING_PRIORITY_RELAXED) { + if (roundingMag2 <= roundingMag1) { + displayMag = displayMag2; + } else { + displayMag = displayMag1; + } + } else { + U_ASSERT(fPrecision.fUnion.fracSig.fPriority == UNUM_ROUNDING_PRIORITY_STRICT); + if (roundingMag2 <= roundingMag1) { + displayMag = displayMag1; + } else { + displayMag = displayMag2; + } + } resolvedMinFraction = uprv_max(0, -displayMag); break; @@ -478,6 +504,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const case Precision::RND_INCREMENT: value.roundToIncrement( fPrecision.fUnion.increment.fIncrement, + fPrecision.fUnion.increment.fIncrementMagnitude, fRoundingMode, status); resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; @@ -485,7 +512,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const case Precision::RND_INCREMENT_ONE: value.roundToMagnitude( - -fPrecision.fUnion.increment.fMaxFrac, + fPrecision.fUnion.increment.fIncrementMagnitude, fRoundingMode, status); resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; @@ -493,7 +520,7 @@ void RoundingImpl::apply(impl::DecimalQuantity &value, UErrorCode& status) const case Precision::RND_INCREMENT_FIVE: value.roundToNickel( - -fPrecision.fUnion.increment.fMaxFrac, + fPrecision.fUnion.increment.fIncrementMagnitude, fRoundingMode, status); resolvedMinFraction = fPrecision.fUnion.increment.fMinFrac; diff --git a/deps/icu-small/source/i18n/number_roundingutils.h b/deps/icu-small/source/i18n/number_roundingutils.h index 06fadd29fd544e..66571272545854 100644 --- a/deps/icu-small/source/i18n/number_roundingutils.h +++ b/deps/icu-small/source/i18n/number_roundingutils.h @@ -174,15 +174,6 @@ inline bool roundsAtMidpoint(int roundingMode) { } } -/** - * Computes the number of fraction digits in a double. Used for computing maxFrac for an increment. - * Calls into the DoubleToStringConverter library to do so. - * - * @param singleDigit An output parameter; set to a number if that is the - * only digit in the double, or -1 if there is more than one digit. - */ -digits_t doubleFractionLength(double input, int8_t* singleDigit); - } // namespace roundingutils diff --git a/deps/icu-small/source/i18n/number_skeletons.cpp b/deps/icu-small/source/i18n/number_skeletons.cpp index de70c5cedff3ca..c51831b6823809 100644 --- a/deps/icu-small/source/i18n/number_skeletons.cpp +++ b/deps/icu-small/source/i18n/number_skeletons.cpp @@ -1344,8 +1344,9 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr // @, @@, @@@ maxSig = minSig; } - UNumberRoundingPriority priority; + auto& oldPrecision = static_cast(macros.precision); if (offset < segment.length()) { + UNumberRoundingPriority priority; if (maxSig == -1) { // The wildcard character is not allowed with the priority annotation status = U_NUMBER_SKELETON_SYNTAX_ERROR; @@ -1367,22 +1368,19 @@ bool blueprint_helpers::parseFracSigOption(const StringSegment& segment, MacroPr status = U_NUMBER_SKELETON_SYNTAX_ERROR; return false; } + macros.precision = oldPrecision.withSignificantDigits(minSig, maxSig, priority); } else if (maxSig == -1) { // withMinDigits - maxSig = minSig; - minSig = 1; - priority = UNUM_ROUNDING_PRIORITY_RELAXED; + macros.precision = oldPrecision.withMinDigits(minSig); } else if (minSig == 1) { // withMaxDigits - priority = UNUM_ROUNDING_PRIORITY_STRICT; + macros.precision = oldPrecision.withMaxDigits(maxSig); } else { // Digits options with both min and max sig require the priority option status = U_NUMBER_SKELETON_SYNTAX_ERROR; return false; } - auto& oldPrecision = static_cast(macros.precision); - macros.precision = oldPrecision.withSignificantDigits(minSig, maxSig, priority); return true; } @@ -1399,12 +1397,16 @@ void blueprint_helpers::parseIncrementOption(const StringSegment &segment, Macro number::impl::parseIncrementOption(segment, macros.precision, status); } -void blueprint_helpers::generateIncrementOption(double increment, int32_t minFrac, UnicodeString& sb, - UErrorCode&) { +void blueprint_helpers::generateIncrementOption( + uint32_t increment, + digits_t incrementMagnitude, + int32_t minFrac, + UnicodeString& sb, + UErrorCode&) { // Utilize DecimalQuantity/double_conversion to format this for us. DecimalQuantity dq; - dq.setToDouble(increment); - dq.roundToInfinity(); + dq.setToLong(increment); + dq.adjustMagnitude(incrementMagnitude); dq.setMinFraction(minFrac); sb.append(dq.toPlainString()); } @@ -1617,11 +1619,21 @@ bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UE const Precision::FractionSignificantSettings& impl = macros.precision.fUnion.fracSig; blueprint_helpers::generateFractionStem(impl.fMinFrac, impl.fMaxFrac, sb, status); sb.append(u'/'); - blueprint_helpers::generateDigitsStem(impl.fMinSig, impl.fMaxSig, sb, status); - if (impl.fPriority == UNUM_ROUNDING_PRIORITY_RELAXED) { - sb.append(u'r'); + if (impl.fRetain) { + if (impl.fPriority == UNUM_ROUNDING_PRIORITY_RELAXED) { + // withMinDigits + blueprint_helpers::generateDigitsStem(impl.fMaxSig, -1, sb, status); + } else { + // withMaxDigits + blueprint_helpers::generateDigitsStem(1, impl.fMaxSig, sb, status); + } } else { - sb.append(u's'); + blueprint_helpers::generateDigitsStem(impl.fMinSig, impl.fMaxSig, sb, status); + if (impl.fPriority == UNUM_ROUNDING_PRIORITY_RELAXED) { + sb.append(u'r'); + } else { + sb.append(u's'); + } } } else if (macros.precision.fType == Precision::RND_INCREMENT || macros.precision.fType == Precision::RND_INCREMENT_ONE @@ -1630,6 +1642,7 @@ bool GeneratorHelpers::precision(const MacroProps& macros, UnicodeString& sb, UE sb.append(u"precision-increment/", -1); blueprint_helpers::generateIncrementOption( impl.fIncrement, + impl.fIncrementMagnitude, impl.fMinFrac, sb, status); diff --git a/deps/icu-small/source/i18n/number_skeletons.h b/deps/icu-small/source/i18n/number_skeletons.h index be41f1b3237a94..27f69cd48c39e9 100644 --- a/deps/icu-small/source/i18n/number_skeletons.h +++ b/deps/icu-small/source/i18n/number_skeletons.h @@ -286,7 +286,7 @@ bool parseTrailingZeroOption(const StringSegment& segment, MacroProps& macros, U void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void -generateIncrementOption(double increment, int32_t minFrac, UnicodeString& sb, UErrorCode& status); +generateIncrementOption(uint32_t increment, digits_t incrementMagnitude, int32_t minFrac, UnicodeString& sb, UErrorCode& status); void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); diff --git a/deps/icu-small/source/i18n/numsys.cpp b/deps/icu-small/source/i18n/numsys.cpp index 44aaf8e2a5f987..934149039c52d7 100644 --- a/deps/icu-small/source/i18n/numsys.cpp +++ b/deps/icu-small/source/i18n/numsys.cpp @@ -313,12 +313,7 @@ U_CFUNC void initNumsysNames(UErrorCode &status) { } const char *nsName = ures_getKey(nsCurrent.getAlias()); LocalPointer newElem(new UnicodeString(nsName, -1, US_INV), status); - if (U_SUCCESS(status)) { - numsysNames->addElementX(newElem.getAlias(), status); - if (U_SUCCESS(status)) { - newElem.orphan(); // on success, the numsysNames vector owns newElem. - } - } + numsysNames->adoptElement(newElem.orphan(), status); } ures_close(numberingSystemsInfo); diff --git a/deps/icu-small/source/i18n/plurrule.cpp b/deps/icu-small/source/i18n/plurrule.cpp index d1918c4698138b..7d1037f8bdd9e0 100644 --- a/deps/icu-small/source/i18n/plurrule.cpp +++ b/deps/icu-small/source/i18n/plurrule.cpp @@ -1548,14 +1548,9 @@ PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode UBool addKeywordOther = TRUE; RuleChain *node = header; while (node != nullptr) { - auto newElem = new UnicodeString(node->fKeyword); - if (newElem == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - fKeywordNames.addElementX(newElem, status); + LocalPointer newElem(node->fKeyword.clone(), status); + fKeywordNames.adoptElement(newElem.orphan(), status); if (U_FAILURE(status)) { - delete newElem; return; } if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) { @@ -1565,14 +1560,9 @@ PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode } if (addKeywordOther) { - auto newElem = new UnicodeString(PLURAL_KEYWORD_OTHER); - if (newElem == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - fKeywordNames.addElementX(newElem, status); + LocalPointer newElem(new UnicodeString(PLURAL_KEYWORD_OTHER), status); + fKeywordNames.adoptElement(newElem.orphan(), status); if (U_FAILURE(status)) { - delete newElem; return; } } @@ -1628,7 +1618,7 @@ FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f, int32_t e) { init(n, v, f, e); // check values. TODO make into unit test. // - // long visiblePower = (int) Math.pow(10, v); + // long visiblePower = (int) Math.pow(10.0, v); // if (decimalDigits > visiblePower) { // throw new IllegalArgumentException(); // } @@ -1881,7 +1871,7 @@ void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) { double FixedDecimal::getPluralOperand(PluralOperand operand) const { switch(operand) { - case PLURAL_OPERAND_N: return (exponent == 0 ? source : source * pow(10, exponent)); + case PLURAL_OPERAND_N: return (exponent == 0 ? source : source * pow(10.0, exponent)); case PLURAL_OPERAND_I: return (double) longValue(); case PLURAL_OPERAND_F: return static_cast(decimalDigits); case PLURAL_OPERAND_T: return static_cast(decimalDigitsWithoutTrailingZeros); @@ -1932,14 +1922,14 @@ UnicodeString FixedDecimal::toString() const { } double FixedDecimal::doubleValue() const { - return (isNegative ? -source : source) * pow(10, exponent); + return (isNegative ? -source : source) * pow(10.0, exponent); } int64_t FixedDecimal::longValue() const { if (exponent == 0) { return intValue; } else { - return (long) (pow(10, exponent) * intValue); + return (long) (pow(10.0, exponent) * intValue); } } diff --git a/deps/icu-small/source/i18n/rbt_set.cpp b/deps/icu-small/source/i18n/rbt_set.cpp index abc4413c2c6f61..6835c03a698b96 100644 --- a/deps/icu-small/source/i18n/rbt_set.cpp +++ b/deps/icu-small/source/i18n/rbt_set.cpp @@ -163,16 +163,13 @@ U_NAMESPACE_BEGIN /** * Construct a new empty rule set. */ -TransliterationRuleSet::TransliterationRuleSet(UErrorCode& status) : UMemory() { - ruleVector = new UVector(&_deleteRule, NULL, status); +TransliterationRuleSet::TransliterationRuleSet(UErrorCode& status) : + UMemory(), ruleVector(nullptr), rules(nullptr), index {}, maxContextLength(0) { + LocalPointer lpRuleVector(new UVector(_deleteRule, nullptr, status), status); if (U_FAILURE(status)) { return; } - if (ruleVector == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } - rules = NULL; - maxContextLength = 0; + ruleVector = lpRuleVector.orphan(); } /** @@ -180,27 +177,24 @@ TransliterationRuleSet::TransliterationRuleSet(UErrorCode& status) : UMemory() { */ TransliterationRuleSet::TransliterationRuleSet(const TransliterationRuleSet& other) : UMemory(other), - ruleVector(0), - rules(0), + ruleVector(nullptr), + rules(nullptr), maxContextLength(other.maxContextLength) { int32_t i, len; uprv_memcpy(index, other.index, sizeof(index)); UErrorCode status = U_ZERO_ERROR; - ruleVector = new UVector(&_deleteRule, NULL, status); - if (other.ruleVector != 0 && ruleVector != 0 && U_SUCCESS(status)) { + LocalPointer lpRuleVector(new UVector(_deleteRule, nullptr, status), status); + if (U_FAILURE(status)) { + return; + } + ruleVector = lpRuleVector.orphan(); + if (other.ruleVector != nullptr && U_SUCCESS(status)) { len = other.ruleVector->size(); for (i=0; ielementAt(i)); - // Null pointer test - if (tempTranslitRule == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - break; - } - ruleVector->addElementX(tempTranslitRule, status); - if (U_FAILURE(status)) { - break; - } + LocalPointer tempTranslitRule( + new TransliterationRule(*(TransliterationRule*)other.ruleVector->elementAt(i)), status); + ruleVector->adoptElement(tempTranslitRule.orphan(), status); } } if (other.rules != 0 && U_SUCCESS(status)) { @@ -247,11 +241,11 @@ int32_t TransliterationRuleSet::getMaximumContextLength(void) const { */ void TransliterationRuleSet::addRule(TransliterationRule* adoptedRule, UErrorCode& status) { + LocalPointer lpAdoptedRule(adoptedRule); + ruleVector->adoptElement(lpAdoptedRule.orphan(), status); if (U_FAILURE(status)) { - delete adoptedRule; return; } - ruleVector->addElementX(adoptedRule, status); int32_t len; if ((len = adoptedRule->getContextLength()) > maxContextLength) { @@ -316,7 +310,7 @@ void TransliterationRuleSet::freeze(UParseError& parseError,UErrorCode& status) for (j=0; j= 0) { if (indexValue[j] == x) { - v.addElementX(ruleVector->elementAt(j), status); + v.addElement(ruleVector->elementAt(j), status); } } else { // If the indexValue is < 0, then the first key character is @@ -325,13 +319,16 @@ void TransliterationRuleSet::freeze(UParseError& parseError,UErrorCode& status) // rarely, so we seldom treat this code path. TransliterationRule* r = (TransliterationRule*) ruleVector->elementAt(j); if (r->matchesIndexValue((uint8_t)x)) { - v.addElementX(r, status); + v.addElement(r, status); } } } } uprv_free(indexValue); index[256] = v.size(); + if (U_FAILURE(status)) { + return; + } /* Freeze things into an array. */ diff --git a/deps/icu-small/source/i18n/region.cpp b/deps/icu-small/source/i18n/region.cpp index 2e013708bb88e3..277a22fd091cfb 100644 --- a/deps/icu-small/source/i18n/region.cpp +++ b/deps/icu-small/source/i18n/region.cpp @@ -39,11 +39,6 @@ U_CDECL_BEGIN -static void U_CALLCONV -deleteRegion(void *obj) { - delete (icu::Region *)obj; -} - /** * Cleanup callback func */ @@ -90,7 +85,8 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { LocalPointer continents(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); LocalPointer groupings(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); - allRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); + LocalPointer lpAllRegions(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); + allRegions = lpAllRegions.orphan(); LocalUResourceBundlePointer metadata(ures_openDirect(NULL,"metadata",&status)); LocalUResourceBundlePointer metadataAlias(ures_getByKey(metadata.getAlias(),"alias",NULL,&status)); @@ -109,16 +105,17 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { LocalUResourceBundlePointer worldContainment(ures_getByKey(territoryContainment.getAlias(),"001",NULL,&status)); LocalUResourceBundlePointer groupingContainment(ures_getByKey(territoryContainment.getAlias(),"grouping",NULL,&status)); + ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); if (U_FAILURE(status)) { return; } // now, initialize - uhash_setValueDeleter(newRegionIDMap.getAlias(), deleteRegion); // regionIDMap owns objs - uhash_setKeyDeleter(newRegionAliases.getAlias(), uprv_deleteUObject); // regionAliases owns the string keys + uhash_setValueDeleter(newRegionIDMap.getAlias(), uprv_deleteUObject); // regionIDMap owns objs + uhash_setKeyDeleter(newRegionAliases.getAlias(), uprv_deleteUObject); // regionAliases owns the string keys - while ( ures_hasNext(regionRegular.getAlias()) ) { + while (U_SUCCESS(status) && ures_hasNext(regionRegular.getAlias())) { UnicodeString regionName = ures_getNextUnicodeString(regionRegular.getAlias(),NULL,&status); int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); UChar buf[6]; @@ -126,18 +123,18 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { if ( rangeMarkerLocation > 0 ) { UChar endRange = regionName.charAt(rangeMarkerLocation+1); buf[rangeMarkerLocation] = 0; - while ( buf[rangeMarkerLocation-1] <= endRange ) { + while (U_SUCCESS(status) && buf[rangeMarkerLocation-1] <= endRange) { LocalPointer newRegion(new UnicodeString(buf), status); - allRegions->addElementX(newRegion.orphan(),status); + allRegions->adoptElement(newRegion.orphan(), status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer newRegion(new UnicodeString(regionName), status); - allRegions->addElementX(newRegion.orphan(),status); + allRegions->adoptElement(newRegion.orphan(), status); } } - while ( ures_hasNext(regionMacro.getAlias()) ) { + while (U_SUCCESS(status) && ures_hasNext(regionMacro.getAlias())) { UnicodeString regionName = ures_getNextUnicodeString(regionMacro.getAlias(),NULL,&status); int32_t rangeMarkerLocation = regionName.indexOf(RANGE_MARKER); UChar buf[6]; @@ -145,25 +142,29 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { if ( rangeMarkerLocation > 0 ) { UChar endRange = regionName.charAt(rangeMarkerLocation+1); buf[rangeMarkerLocation] = 0; - while ( buf[rangeMarkerLocation-1] <= endRange ) { + while ( buf[rangeMarkerLocation-1] <= endRange && U_SUCCESS(status)) { LocalPointer newRegion(new UnicodeString(buf), status); - allRegions->addElementX(newRegion.orphan(),status); + allRegions->adoptElement(newRegion.orphan(),status); buf[rangeMarkerLocation-1]++; } } else { LocalPointer newRegion(new UnicodeString(regionName), status); - allRegions->addElementX(newRegion.orphan(),status); + allRegions->adoptElement(newRegion.orphan(),status); } } - while ( ures_hasNext(regionUnknown.getAlias()) ) { - LocalPointer regionName (new UnicodeString(ures_getNextUnicodeString(regionUnknown.getAlias(),NULL,&status),status)); - allRegions->addElementX(regionName.orphan(),status); + while (U_SUCCESS(status) && ures_hasNext(regionUnknown.getAlias())) { + LocalPointer regionName ( + new UnicodeString(ures_getNextUnicodeString(regionUnknown.getAlias(), nullptr, &status), status)); + allRegions->adoptElement(regionName.orphan(),status); } - while ( ures_hasNext(worldContainment.getAlias()) ) { + while (U_SUCCESS(status) && ures_hasNext(worldContainment.getAlias())) { UnicodeString *continentName = new UnicodeString(ures_getNextUnicodeString(worldContainment.getAlias(),NULL,&status)); - continents->addElementX(continentName,status); + continents->adoptElement(continentName,status); + } + if (U_FAILURE(status)) { + return; } for ( int32_t i = 0 ; i < allRegions->size() ; i++ ) { @@ -191,22 +192,32 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { } UResourceBundle *groupingBundle = nullptr; - while ( ures_hasNext(groupingContainment.getAlias()) ) { + while (U_SUCCESS(status) && ures_hasNext(groupingContainment.getAlias())) { groupingBundle = ures_getNextResource(groupingContainment.getAlias(), groupingBundle, &status); if (U_FAILURE(status)) { break; } UnicodeString *groupingName = new UnicodeString(ures_getKey(groupingBundle), -1, US_INV); - groupings->addElementX(groupingName,status); - Region *grouping = (Region *) uhash_get(newRegionIDMap.getAlias(),groupingName); + LocalPointer lpGroupingName(groupingName, status); + groupings->adoptElement(lpGroupingName.orphan(), status); + if (U_FAILURE(status)) { + break; + } + Region *grouping = (Region *) uhash_get(newRegionIDMap.getAlias(), groupingName); if (grouping != NULL) { - for (int32_t i = 0; i < ures_getSize(groupingBundle); i++) { + for (int32_t i = 0; i < ures_getSize(groupingBundle) && U_SUCCESS(status); i++) { UnicodeString child = ures_getUnicodeStringByIndex(groupingBundle, i, &status); if (U_SUCCESS(status)) { if (grouping->containedRegions == NULL) { - grouping->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); + LocalPointer lpContainedRegions( + new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); + grouping->containedRegions = lpContainedRegions.orphan(); + if (U_FAILURE(status)) { + break; + } } - grouping->containedRegions->addElementX(new UnicodeString(child), status); + LocalPointer lpChildCopy(new UnicodeString(child), status); + grouping->containedRegions->adoptElement(lpChildCopy.orphan(), status); } } } @@ -214,7 +225,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { ures_close(groupingBundle); // Process the territory aliases - while ( ures_hasNext(territoryAlias.getAlias()) ) { + while (U_SUCCESS(status) && ures_hasNext(territoryAlias.getAlias())) { LocalUResourceBundlePointer res(ures_getNextResource(territoryAlias.getAlias(),NULL,&status)); const char *aliasFrom = ures_getKey(res.getAlias()); LocalPointer aliasFromStr(new UnicodeString(aliasFrom, -1, US_INV), status); @@ -259,7 +270,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { } UnicodeString currentRegion; //currentRegion.remove(); TODO: was already 0 length? - for (int32_t i = 0 ; i < aliasTo.length() ; i++ ) { + for (int32_t i = 0 ; i < aliasTo.length() && U_SUCCESS(status); i++ ) { if ( aliasTo.charAt(i) != 0x0020 ) { currentRegion.append(aliasTo.charAt(i)); } @@ -267,7 +278,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { Region *target = (Region *)uhash_get(newRegionIDMap.getAlias(),(void *)¤tRegion); if (target) { LocalPointer preferredValue(new UnicodeString(target->idStr), status); - aliasFromRegion->preferredValues->addElementX((void *)preferredValue.orphan(),status); // may add null if err + aliasFromRegion->preferredValues->adoptElement(preferredValue.orphan(),status); // may add null if err } currentRegion.remove(); } @@ -276,9 +287,9 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { } // Process the code mappings - This will allow us to assign numeric codes to most of the territories. - while ( ures_hasNext(codeMappings.getAlias()) ) { + while (U_SUCCESS(status) && ures_hasNext(codeMappings.getAlias())) { UResourceBundle *mapping = ures_getNextResource(codeMappings.getAlias(),NULL,&status); - if ( ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) { + if (U_SUCCESS(status) && ures_getType(mapping) == URES_ARRAY && ures_getSize(mapping) == 3) { UnicodeString codeMappingID = ures_getUnicodeStringByIndex(mapping,0,&status); UnicodeString codeMappingNumber = ures_getUnicodeStringByIndex(mapping,1,&status); UnicodeString codeMapping3Letter = ures_getUnicodeStringByIndex(mapping,2,&status); @@ -356,15 +367,23 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { // Add the child region to the set of regions contained by the parent if (parentRegion->containedRegions == NULL) { - parentRegion->containedRegions = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); + LocalPointer lpContainedRegions( + new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); + parentRegion->containedRegions = lpContainedRegions.orphan(); + if (U_FAILURE(status)) { + return; + } } LocalPointer childStr(new UnicodeString(), status); - if( U_FAILURE(status) ) { + if (U_FAILURE(status)) { return; // error out } childStr->fastCopyFrom(childRegion->idStr); - parentRegion->containedRegions->addElementX((void *)childStr.orphan(),status); + parentRegion->containedRegions->adoptElement(childStr.orphan(),status); + if (U_FAILURE(status)) { + return; + } // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region @@ -388,10 +407,9 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { if( U_FAILURE(status) ) { return; // error out } - availableRegions[ar->fType]->addElementX((void *)arString.orphan(),status); + availableRegions[ar->fType]->adoptElement(arString.orphan(), status); } - ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); // copy hashtables numericCodeMap = newNumericCodeMap.orphan(); regionIDMap = newRegionIDMap.orphan(); @@ -402,6 +420,7 @@ void Region::cleanupRegionData() { for (int32_t i = 0 ; i < URGN_LIMIT ; i++ ) { if ( availableRegions[i] ) { delete availableRegions[i]; + availableRegions[i] = nullptr; } } @@ -417,7 +436,6 @@ void Region::cleanupRegionData() { uhash_close(regionIDMap); } if (allRegions) { - allRegions->removeAllElements(); // Don't need the temporary list anymore. delete allRegions; allRegions = NULL; } @@ -615,33 +633,30 @@ Region::getContainedRegions(UErrorCode &status) const { StringEnumeration* Region::getContainedRegions( URegionType type, UErrorCode &status ) const { umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status); // returns immediately if U_FAILURE(status) + + UVector result(nullptr, uhash_compareChars, status); + LocalPointer cr(getContainedRegions(status), status); if (U_FAILURE(status)) { - return NULL; + return nullptr; } - UVector *result = new UVector(NULL, uhash_compareChars, status); - - StringEnumeration *cr = getContainedRegions(status); - - for ( int32_t i = 0 ; i < cr->count(status) ; i++ ) { - const char *regionId = cr->next(NULL,status); - const Region *r = Region::getInstance(regionId,status); + const char *regionId; + while((regionId = cr->next(nullptr, status)) != nullptr && U_SUCCESS(status)) { + const Region *r = Region::getInstance(regionId, status); if ( r->getType() == type) { - result->addElementX((void *)&r->idStr,status); + result.addElement(const_cast(&r->idStr), status); } else { - StringEnumeration *children = r->getContainedRegions(type, status); - for ( int32_t j = 0 ; j < children->count(status) ; j++ ) { - const char *id2 = children->next(NULL,status); + LocalPointer children(r->getContainedRegions(type, status)); + const char *id2; + while(U_SUCCESS(status) && ((id2 = children->next(nullptr, status)) != nullptr)) { const Region *r2 = Region::getInstance(id2,status); - result->addElementX((void *)&r2->idStr,status); + result.addElement(const_cast(&r2->idStr), status); } - delete children; } } - delete cr; - StringEnumeration* resultEnumeration = new RegionNameEnumeration(result,status); - delete result; - return resultEnumeration; + LocalPointer resultEnumeration( + new RegionNameEnumeration(&result, status), status); + return U_SUCCESS(status) ? resultEnumeration.orphan() : nullptr; } /** @@ -706,18 +721,21 @@ Region::getType() const { return fType; } -RegionNameEnumeration::RegionNameEnumeration(UVector *fNameList, UErrorCode& status) { - pos=0; - if (fNameList && U_SUCCESS(status)) { - fRegionNames = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, fNameList->size(),status); - for ( int32_t i = 0 ; i < fNameList->size() ; i++ ) { - UnicodeString* this_region_name = (UnicodeString *)fNameList->elementAt(i); - UnicodeString* new_region_name = new UnicodeString(*this_region_name); - fRegionNames->addElementX((void *)new_region_name,status); +RegionNameEnumeration::RegionNameEnumeration(UVector *nameList, UErrorCode& status) : + pos(0), fRegionNames(nullptr) { + // TODO: https://unicode-org.atlassian.net/browse/ICU-21829 + // Is all of the copying going on here really necessary? + if (nameList && U_SUCCESS(status)) { + LocalPointer regionNames( + new UVector(uprv_deleteUObject, uhash_compareUnicodeString, nameList->size(), status), status); + for ( int32_t i = 0 ; U_SUCCESS(status) && i < nameList->size() ; i++ ) { + UnicodeString* this_region_name = (UnicodeString *)nameList->elementAt(i); + LocalPointer new_region_name(new UnicodeString(*this_region_name), status); + regionNames->adoptElement(new_region_name.orphan(), status); + } + if (U_SUCCESS(status)) { + fRegionNames = regionNames.orphan(); } - } - else { - fRegionNames = NULL; } } diff --git a/deps/icu-small/source/i18n/region_impl.h b/deps/icu-small/source/i18n/region_impl.h index 62acaa4511b49f..b6a281393f8911 100644 --- a/deps/icu-small/source/i18n/region_impl.h +++ b/deps/icu-small/source/i18n/region_impl.h @@ -26,7 +26,11 @@ U_NAMESPACE_BEGIN class RegionNameEnumeration : public StringEnumeration { public: - RegionNameEnumeration(UVector *fNameList, UErrorCode& status); + /** + * Construct an string enumeration over the supplied name list. + * Makes a copy of the supplied input name list; does not retain a reference to the original. + */ + RegionNameEnumeration(UVector *nameList, UErrorCode& status); virtual ~RegionNameEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); virtual UClassID getDynamicClassID(void) const override; diff --git a/deps/icu-small/source/i18n/smpdtfmt.cpp b/deps/icu-small/source/i18n/smpdtfmt.cpp index 91748d82f9fd64..c1e943a0949da2 100644 --- a/deps/icu-small/source/i18n/smpdtfmt.cpp +++ b/deps/icu-small/source/i18n/smpdtfmt.cpp @@ -3792,6 +3792,9 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC src = &text; } parseInt(*src, number, pos, allowNegative,currentNumberFormat); + if (!isLenient() && pos.getIndex() < start + count) { + return -start; + } if (pos.getIndex() != parseStart) { int32_t val = number.getLong(); diff --git a/deps/icu-small/source/i18n/tmutfmt.cpp b/deps/icu-small/source/i18n/tmutfmt.cpp index 057bb634ebbb24..f0335a81f50fa4 100644 --- a/deps/icu-small/source/i18n/tmutfmt.cpp +++ b/deps/icu-small/source/i18n/tmutfmt.cpp @@ -320,14 +320,14 @@ void TimeUnitFormat::setup(UErrorCode& err) { initDataMembers(err); - UVector pluralCounts(0, uhash_compareUnicodeString, 6, err); + UVector pluralCounts(nullptr, uhash_compareUnicodeString, 6, err); LocalPointer keywords(getPluralRules().getKeywords(err), err); if (U_FAILURE(err)) { return; } UnicodeString* pluralCount; while ((pluralCount = const_cast(keywords->snext(err))) != NULL) { - pluralCounts.addElementX(pluralCount, err); + pluralCounts.addElement(pluralCount, err); } readFromCurrentLocale(UTMUTFMT_FULL_STYLE, gUnitsTag, pluralCounts, err); checkConsistency(UTMUTFMT_FULL_STYLE, gUnitsTag, err); diff --git a/deps/icu-small/source/i18n/tzfmt.cpp b/deps/icu-small/source/i18n/tzfmt.cpp index ef3cfad80ce1d6..9d046c30c8f07b 100644 --- a/deps/icu-small/source/i18n/tzfmt.cpp +++ b/deps/icu-small/source/i18n/tzfmt.cpp @@ -2459,7 +2459,7 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re if (itemType != GMTOffsetField::TEXT) { if (GMTOffsetField::isValid(itemType, itemLength)) { GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast(itemLength), status); - result->addElementX(fld, status); + result->adoptElement(fld, status); if (U_FAILURE(status)) { break; } @@ -2485,7 +2485,7 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re if (itemType == GMTOffsetField::TEXT) { if (text.length() > 0) { GMTOffsetField* textfld = GMTOffsetField::createText(text, status); - result->addElementX(textfld, status); + result->adoptElement(textfld, status); if (U_FAILURE(status)) { break; } @@ -2494,7 +2494,7 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re } else { if (GMTOffsetField::isValid(itemType, itemLength)) { GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast(itemLength), status); - result->addElementX(fld, status); + result->adoptElement(fld, status); if (U_FAILURE(status)) { break; } @@ -2512,7 +2512,7 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re if (itemType != GMTOffsetField::TEXT) { if (GMTOffsetField::isValid(itemType, itemLength)) { GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast(itemLength), status); - result->addElementX(fld, status); + result->adoptElement(fld, status); if (U_FAILURE(status)) { break; } @@ -2532,12 +2532,12 @@ TimeZoneFormat::parseOffsetPattern(const UnicodeString& pattern, OffsetFields re if (itemType == GMTOffsetField::TEXT) { if (text.length() > 0) { GMTOffsetField* tfld = GMTOffsetField::createText(text, status); - result->addElementX(tfld, status); + result->adoptElement(tfld, status); } } else { if (GMTOffsetField::isValid(itemType, itemLength)) { GMTOffsetField* fld = GMTOffsetField::createTimeField(itemType, static_cast(itemLength), status); - result->addElementX(fld, status); + result->adoptElement(fld, status); } else { status = U_ILLEGAL_ARGUMENT_ERROR; } diff --git a/deps/icu-small/source/i18n/tzgnames.cpp b/deps/icu-small/source/i18n/tzgnames.cpp index ed5f42d7bc1d6d..d5ee45ced78db4 100644 --- a/deps/icu-small/source/i18n/tzgnames.cpp +++ b/deps/icu-small/source/i18n/tzgnames.cpp @@ -229,30 +229,27 @@ GNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *node, if ((nameinfo->type & fTypes) != 0) { // matches a requested type if (fResults == NULL) { - fResults = new UVector(uprv_free, NULL, status); - if (fResults == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + LocalPointer lpResults(new UVector(uprv_free, NULL, status), status); + if (U_FAILURE(status)) { + return false; } + fResults = lpResults.orphan(); } - if (U_SUCCESS(status)) { - U_ASSERT(fResults != NULL); - GMatchInfo *gmatch = (GMatchInfo *)uprv_malloc(sizeof(GMatchInfo)); - if (gmatch == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - // add the match to the vector - gmatch->gnameInfo = nameinfo; - gmatch->matchLength = matchLength; - gmatch->timeType = UTZFMT_TIME_TYPE_UNKNOWN; - fResults->addElementX(gmatch, status); - if (U_FAILURE(status)) { - uprv_free(gmatch); - } else { - if (matchLength > fMaxMatchLen) { - fMaxMatchLen = matchLength; - } - } - } + GMatchInfo *gmatch = (GMatchInfo *)uprv_malloc(sizeof(GMatchInfo)); + if (gmatch == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return false; + } + // add the match to the vector + gmatch->gnameInfo = nameinfo; + gmatch->matchLength = matchLength; + gmatch->timeType = UTZFMT_TIME_TYPE_UNKNOWN; + fResults->adoptElement(gmatch, status); + if (U_FAILURE(status)) { + return false; + } + if (matchLength > fMaxMatchLen) { + fMaxMatchLen = matchLength; } } } diff --git a/deps/icu-small/source/i18n/tznames.cpp b/deps/icu-small/source/i18n/tznames.cpp index 5c504d01cb6342..781f1cc161f9c5 100644 --- a/deps/icu-small/source/i18n/tznames.cpp +++ b/deps/icu-small/source/i18n/tznames.cpp @@ -414,15 +414,12 @@ TimeZoneNames::MatchInfoCollection::addZone(UTimeZoneNameType nameType, int32_t if (U_FAILURE(status)) { return; } - MatchInfo* matchInfo = new MatchInfo(nameType, matchLength, &tzID, NULL); - if (matchInfo == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - matches(status)->addElementX(matchInfo, status); + LocalPointer matchInfo(new MatchInfo(nameType, matchLength, &tzID, NULL), status); + UVector *matchesVec = matches(status); if (U_FAILURE(status)) { - delete matchInfo; + return; } + matchesVec->adoptElement(matchInfo.orphan(), status); } void @@ -431,15 +428,12 @@ TimeZoneNames::MatchInfoCollection::addMetaZone(UTimeZoneNameType nameType, int3 if (U_FAILURE(status)) { return; } - MatchInfo* matchInfo = new MatchInfo(nameType, matchLength, NULL, &mzID); - if (matchInfo == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - matches(status)->addElementX(matchInfo, status); + LocalPointer matchInfo(new MatchInfo(nameType, matchLength, NULL, &mzID), status); + UVector *matchesVec = matches(status); if (U_FAILURE(status)) { - delete matchInfo; + return; } + matchesVec->adoptElement(matchInfo.orphan(), status); } int32_t diff --git a/deps/icu-small/source/i18n/tznames_impl.cpp b/deps/icu-small/source/i18n/tznames_impl.cpp index d450b7456489bf..69991dfef4b5c0 100644 --- a/deps/icu-small/source/i18n/tznames_impl.cpp +++ b/deps/icu-small/source/i18n/tznames_impl.cpp @@ -148,19 +148,29 @@ CharacterNode::addValue(void *value, UObjectDeleter *valueDeleter, UErrorCode &s if (!fHasValuesVector) { // There is only one value so far, and not in a vector yet. // Create a vector and add the old value. - UVector *values = new UVector(valueDeleter, NULL, DEFAULT_CHARACTERNODE_CAPACITY, status); + LocalPointer values( + new UVector(valueDeleter, NULL, DEFAULT_CHARACTERNODE_CAPACITY, status), status); if (U_FAILURE(status)) { if (valueDeleter) { valueDeleter(value); } return; } - values->addElementX(fValues, status); - fValues = values; + if (values->hasDeleter()) { + values->adoptElement(fValues, status); + } else { + values->addElement(fValues, status); + } + fValues = values.orphan(); fHasValuesVector = TRUE; } // Add the new value. - ((UVector *)fValues)->addElementX(value, status); + UVector *values = (UVector *)fValues; + if (values->hasDeleter()) { + values->adoptElement(value, status); + } else { + values->addElement(value, status); + } } } @@ -219,10 +229,8 @@ void TextTrieMap::put(const UChar *key, void *value, UErrorCode &status) { fIsEmpty = FALSE; if (fLazyContents == NULL) { - fLazyContents = new UVector(status); - if (fLazyContents == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } + LocalPointer lpLazyContents(new UVector(status), status); + fLazyContents = lpLazyContents.orphan(); } if (U_FAILURE(status)) { if (fValueDeleter) { @@ -233,7 +241,7 @@ TextTrieMap::put(const UChar *key, void *value, UErrorCode &status) { U_ASSERT(fLazyContents != NULL); UChar *s = const_cast(key); - fLazyContents->addElementX(s, status); + fLazyContents->addElement(s, status); if (U_FAILURE(status)) { if (fValueDeleter) { fValueDeleter((void*) key); @@ -241,7 +249,7 @@ TextTrieMap::put(const UChar *key, void *value, UErrorCode &status) { return; } - fLazyContents->addElementX(value, status); + fLazyContents->addElement(value, status); } void @@ -854,7 +862,7 @@ class MetaZoneIDsEnumeration : public StringEnumeration { public: MetaZoneIDsEnumeration(); MetaZoneIDsEnumeration(const UVector& mzIDs); - MetaZoneIDsEnumeration(UVector* mzIDs); + MetaZoneIDsEnumeration(LocalPointer mzIDs); virtual ~MetaZoneIDsEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); virtual UClassID getDynamicClassID(void) const override; @@ -865,7 +873,7 @@ class MetaZoneIDsEnumeration : public StringEnumeration { int32_t fLen; int32_t fPos; const UVector* fMetaZoneIDs; - UVector *fLocalVector; + LocalPointer fLocalVector; }; UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MetaZoneIDsEnumeration) @@ -879,8 +887,9 @@ MetaZoneIDsEnumeration::MetaZoneIDsEnumeration(const UVector& mzIDs) fLen = fMetaZoneIDs->size(); } -MetaZoneIDsEnumeration::MetaZoneIDsEnumeration(UVector *mzIDs) -: fLen(0), fPos(0), fMetaZoneIDs(mzIDs), fLocalVector(mzIDs) { +MetaZoneIDsEnumeration::MetaZoneIDsEnumeration(LocalPointer mzIDs) +: fLen(0), fPos(0), fMetaZoneIDs(nullptr), fLocalVector(std::move(mzIDs)) { + fMetaZoneIDs = fLocalVector.getAlias(); if (fMetaZoneIDs) { fLen = fMetaZoneIDs->size(); } @@ -906,9 +915,6 @@ MetaZoneIDsEnumeration::count(UErrorCode& /*status*/) const { } MetaZoneIDsEnumeration::~MetaZoneIDsEnumeration() { - if (fLocalVector) { - delete fLocalVector; - } } @@ -1153,28 +1159,23 @@ TimeZoneNamesImpl::_getAvailableMetaZoneIDs(const UnicodeString& tzID, UErrorCod return new MetaZoneIDsEnumeration(); } - MetaZoneIDsEnumeration *senum = NULL; - UVector* mzIDs = new UVector(NULL, uhash_compareUChars, status); - if (mzIDs == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } + LocalPointer senum; + LocalPointer mzIDs(new UVector(NULL, uhash_compareUChars, status), status); if (U_SUCCESS(status)) { - U_ASSERT(mzIDs != NULL); + U_ASSERT(mzIDs.isValid()); for (int32_t i = 0; U_SUCCESS(status) && i < mappings->size(); i++) { OlsonToMetaMappingEntry *map = (OlsonToMetaMappingEntry *)mappings->elementAt(i); const UChar *mzID = map->mzid; if (!mzIDs->contains((void *)mzID)) { - mzIDs->addElementX((void *)mzID, status); + mzIDs->addElement((void *)mzID, status); } } if (U_SUCCESS(status)) { - senum = new MetaZoneIDsEnumeration(mzIDs); - } else { - delete mzIDs; + senum.adoptInsteadAndCheckErrorCode(new MetaZoneIDsEnumeration(std::move(mzIDs)), status); } } - return senum; + return U_SUCCESS(status) ? senum.orphan() : nullptr; } UnicodeString& diff --git a/deps/icu-small/source/i18n/ucol.cpp b/deps/icu-small/source/i18n/ucol.cpp index f59333ede3c890..8e1df8d5577beb 100644 --- a/deps/icu-small/source/i18n/ucol.cpp +++ b/deps/icu-small/source/i18n/ucol.cpp @@ -96,12 +96,18 @@ ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferS if (newColl == NULL) { *status = U_MEMORY_ALLOCATION_ERROR; return nullptr; - } else { + } else if (pBufferSize != NULL) { *status = U_SAFECLONE_ALLOCATED_WARNING; } return newColl->toUCollator(); } +U_CAPI UCollator* U_EXPORT2 +ucol_clone(const UCollator *coll, UErrorCode *status) +{ + return ucol_safeClone(coll, nullptr, nullptr, status); +} + U_CAPI void U_EXPORT2 ucol_close(UCollator *coll) { diff --git a/deps/icu-small/source/i18n/udatpg.cpp b/deps/icu-small/source/i18n/udatpg.cpp index 332636a93889f1..9e61a12076803a 100644 --- a/deps/icu-small/source/i18n/udatpg.cpp +++ b/deps/icu-small/source/i18n/udatpg.cpp @@ -210,12 +210,47 @@ udatpg_setDateTimeFormat(const UDateTimePatternGenerator *dtpg, U_CAPI const UChar * U_EXPORT2 udatpg_getDateTimeFormat(const UDateTimePatternGenerator *dtpg, int32_t *pLength) { - const UnicodeString &result=((const DateTimePatternGenerator *)dtpg)->getDateTimeFormat(); - if(pLength!=NULL) { + UErrorCode status = U_ZERO_ERROR; + return udatpg_getDateTimeFormatForStyle(dtpg, UDAT_MEDIUM, pLength, &status); +} + +U_CAPI void U_EXPORT2 +udatpg_setDateTimeFormatForStyle(UDateTimePatternGenerator *udtpg, + UDateFormatStyle style, + const UChar *dateTimeFormat, int32_t length, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return; + } else if (dateTimeFormat==nullptr) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + DateTimePatternGenerator *dtpg = reinterpret_cast(udtpg); + UnicodeString dtFormatString((UBool)(length<0), dateTimeFormat, length); + dtpg->setDateTimeFormat(style, dtFormatString, *pErrorCode); +} + +U_CAPI const UChar* U_EXPORT2 +udatpg_getDateTimeFormatForStyle(const UDateTimePatternGenerator *udtpg, + UDateFormatStyle style, int32_t *pLength, + UErrorCode *pErrorCode) { + static const UChar emptyString[] = { (UChar)0 }; + if (U_FAILURE(*pErrorCode)) { + if (pLength !=nullptr) { + *pLength = 0; + } + return emptyString; + } + const DateTimePatternGenerator *dtpg = reinterpret_cast(udtpg); + const UnicodeString &result = dtpg->getDateTimeFormat(style, *pErrorCode); + if (pLength != nullptr) { *pLength=result.length(); } + // Note: The UnicodeString for the dateTimeFormat string in the DateTimePatternGenerator + // was NUL-terminated what it was set, to avoid doing it here which could re-allocate + // the buffe and affect and cont references to the string or its buffer. return result.getBuffer(); -} + } U_CAPI void U_EXPORT2 udatpg_setDecimal(UDateTimePatternGenerator *dtpg, diff --git a/deps/icu-small/source/i18n/unicode/basictz.h b/deps/icu-small/source/i18n/unicode/basictz.h index 250ea309279aa7..d9f85e45eeff26 100644 --- a/deps/icu-small/source/i18n/unicode/basictz.h +++ b/deps/icu-small/source/i18n/unicode/basictz.h @@ -152,17 +152,15 @@ class U_I18N_API BasicTimeZone: public TimeZone { virtual void getSimpleRulesNear(UDate date, InitialTimeZoneRule*& initial, AnnualTimeZoneRule*& std, AnnualTimeZoneRule*& dst, UErrorCode& status) const; -#ifndef U_FORCE_HIDE_DRAFT_API /** * Get time zone offsets from local wall time. - * @draft ICU 69 + * @stable ICU 69 */ virtual void getOffsetFromLocal( UDate date, UTimeZoneLocalOption nonExistingTimeOpt, UTimeZoneLocalOption duplicatedTimeOpt, int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const; -#endif /* U_FORCE_HIDE_DRAFT_API */ #ifndef U_HIDE_INTERNAL_API /** diff --git a/deps/icu-small/source/i18n/unicode/dtptngen.h b/deps/icu-small/source/i18n/unicode/dtptngen.h index 250a0e089fed6b..6be0e2a571c7bb 100644 --- a/deps/icu-small/source/i18n/unicode/dtptngen.h +++ b/deps/icu-small/source/i18n/unicode/dtptngen.h @@ -311,6 +311,11 @@ class U_I18N_API DateTimePatternGenerator : public UObject { * for those two skeletons, so the result is put together with this pattern, * resulting in "d-MMM h:mm". * + * There are four DateTimeFormats in a DateTimePatternGenerator object, + * corresponding to date styles UDAT_FULL..UDAT_SHORT. This method sets + * all of them to the specified pattern. To set them individually, see + * setDateTimeFormat(UDateFormatStyle style, ...). + * * @param dateTimeFormat * message format pattern, here {1} will be replaced by the date * pattern and {0} will be replaced by the time pattern. @@ -320,11 +325,66 @@ class U_I18N_API DateTimePatternGenerator : public UObject { /** * Getter corresponding to setDateTimeFormat. + * + * There are four DateTimeFormats in a DateTimePatternGenerator object, + * corresponding to date styles UDAT_FULL..UDAT_SHORT. This method gets + * the style for UDAT_MEDIUM (the default). To get them individually, see + * getDateTimeFormat(UDateFormatStyle style). + * * @return DateTimeFormat. * @stable ICU 3.8 */ const UnicodeString& getDateTimeFormat() const; +#if !UCONFIG_NO_FORMATTING +#ifndef U_HIDE_DRAFT_API + /** + * dateTimeFormats are message patterns used to compose combinations of date + * and time patterns. There are four length styles, corresponding to the + * inferred style of the date pattern; these are UDateFormatStyle values: + * - UDAT_FULL (for date pattern with weekday and long month), else + * - UDAT_LONG (for a date pattern with long month), else + * - UDAT_MEDIUM (for a date pattern with abbreviated month), else + * - UDAT_SHORT (for any other date pattern). + * For details on dateTimeFormats, see + * https://www.unicode.org/reports/tr35/tr35-dates.html#dateTimeFormats. + * The default pattern in the root locale for all styles is "{1} {0}". + * + * @param style + * one of DateFormat.FULL..DateFormat.SHORT. Error if out of range. + * @param dateTimeFormat + * the new dateTimeFormat to set for the the specified style + * @param status + * in/out parameter; if no failure status is already set, + * it will be set according to result of the function (e.g. + * U_ILLEGAL_ARGUMENT_ERROR for style out of range). + * @draft ICU 71 + */ + void setDateTimeFormat(UDateFormatStyle style, const UnicodeString& dateTimeFormat, + UErrorCode& status); + + /** + * Getter corresponding to setDateTimeFormat. + * + * @param style + * one of UDAT_FULL..UDAT_SHORT. Error if out of range. + * @param status + * in/out parameter; if no failure status is already set, + * it will be set according to result of the function (e.g. + * U_ILLEGAL_ARGUMENT_ERROR for style out of range). + * @return + * the current dateTimeFormat for the the specified style, or + * empty string in case of error. The UnicodeString reference, + * or the contents of the string, may no longer be valid if + * setDateTimeFormat is called, or the DateTimePatternGenerator + * object is deleted. + * @draft ICU 71 + */ + const UnicodeString& getDateTimeFormat(UDateFormatStyle style, + UErrorCode& status) const; +#endif /* U_HIDE_DRAFT_API */ +#endif /* #if !UCONFIG_NO_FORMATTING */ + /** * Return the best pattern matching the input skeleton. It is guaranteed to * have all of the fields in the skeleton. @@ -545,8 +605,7 @@ class U_I18N_API DateTimePatternGenerator : public UObject { */ DateTimePatternGenerator& operator=(const DateTimePatternGenerator& other); - // TODO(ticket:13619): re-enable when UDATPG_NARROW no longer in draft mode. - // static const int32_t UDATPG_WIDTH_COUNT = UDATPG_NARROW + 1; + static const int32_t UDATPG_WIDTH_COUNT = UDATPG_NARROW + 1; Locale pLocale; // pattern locale FormatParser *fp; @@ -554,9 +613,8 @@ class U_I18N_API DateTimePatternGenerator : public UObject { DistanceInfo *distanceInfo; PatternMap *patternMap; UnicodeString appendItemFormats[UDATPG_FIELD_COUNT]; - // TODO(ticket:13619): [3] -> UDATPG_WIDTH_COUNT - UnicodeString fieldDisplayNames[UDATPG_FIELD_COUNT][3]; - UnicodeString dateTimeFormat; + UnicodeString fieldDisplayNames[UDATPG_FIELD_COUNT][UDATPG_WIDTH_COUNT]; + UnicodeString dateTimeFormat[4]; UnicodeString decimal; DateTimeMatcher *skipMatcher; Hashtable *fAvailableFormatKeyHash; diff --git a/deps/icu-small/source/i18n/unicode/measunit.h b/deps/icu-small/source/i18n/unicode/measunit.h index 61da62e71f2271..b7e8e1676a41dc 100644 --- a/deps/icu-small/source/i18n/unicode/measunit.h +++ b/deps/icu-small/source/i18n/unicode/measunit.h @@ -77,14 +77,13 @@ enum UMeasureUnitComplexity { }; -#ifndef U_HIDE_DRAFT_API /** * Enumeration for SI and binary prefixes, e.g. "kilo-", "nano-", "mebi-". * * Enum values should be treated as opaque: use umeas_getPrefixPower() and * umeas_getPrefixBase() to find their corresponding values. * - * @draft ICU 69 + * @stable ICU 69 * @see umeas_getPrefixBase * @see umeas_getPrefixPower */ @@ -96,14 +95,14 @@ typedef enum UMeasurePrefix { * implementation detail and should not be relied upon: use * umeas_getPrefixPower() to obtain meaningful values. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_ONE = 30 + 0, /** * SI prefix: yotta, 10^24. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_YOTTA = UMEASURE_PREFIX_ONE + 24, @@ -119,133 +118,133 @@ typedef enum UMeasurePrefix { /** * SI prefix: zetta, 10^21. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_ZETTA = UMEASURE_PREFIX_ONE + 21, /** * SI prefix: exa, 10^18. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_EXA = UMEASURE_PREFIX_ONE + 18, /** * SI prefix: peta, 10^15. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_PETA = UMEASURE_PREFIX_ONE + 15, /** * SI prefix: tera, 10^12. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_TERA = UMEASURE_PREFIX_ONE + 12, /** * SI prefix: giga, 10^9. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_GIGA = UMEASURE_PREFIX_ONE + 9, /** * SI prefix: mega, 10^6. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_MEGA = UMEASURE_PREFIX_ONE + 6, /** * SI prefix: kilo, 10^3. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_KILO = UMEASURE_PREFIX_ONE + 3, /** * SI prefix: hecto, 10^2. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_HECTO = UMEASURE_PREFIX_ONE + 2, /** * SI prefix: deka, 10^1. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_DEKA = UMEASURE_PREFIX_ONE + 1, /** * SI prefix: deci, 10^-1. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_DECI = UMEASURE_PREFIX_ONE + -1, /** * SI prefix: centi, 10^-2. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_CENTI = UMEASURE_PREFIX_ONE + -2, /** * SI prefix: milli, 10^-3. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_MILLI = UMEASURE_PREFIX_ONE + -3, /** * SI prefix: micro, 10^-6. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_MICRO = UMEASURE_PREFIX_ONE + -6, /** * SI prefix: nano, 10^-9. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_NANO = UMEASURE_PREFIX_ONE + -9, /** * SI prefix: pico, 10^-12. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_PICO = UMEASURE_PREFIX_ONE + -12, /** * SI prefix: femto, 10^-15. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_FEMTO = UMEASURE_PREFIX_ONE + -15, /** * SI prefix: atto, 10^-18. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_ATTO = UMEASURE_PREFIX_ONE + -18, /** * SI prefix: zepto, 10^-21. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_ZEPTO = UMEASURE_PREFIX_ONE + -21, /** * SI prefix: yocto, 10^-24. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_YOCTO = UMEASURE_PREFIX_ONE + -24, @@ -270,7 +269,7 @@ typedef enum UMeasurePrefix { /** * Binary prefix: kibi, 1024^1. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_KIBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 1, @@ -286,49 +285,49 @@ typedef enum UMeasurePrefix { /** * Binary prefix: mebi, 1024^2. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_MEBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 2, /** * Binary prefix: gibi, 1024^3. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_GIBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 3, /** * Binary prefix: tebi, 1024^4. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_TEBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 4, /** * Binary prefix: pebi, 1024^5. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_PEBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 5, /** * Binary prefix: exbi, 1024^6. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_EXBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 6, /** * Binary prefix: zebi, 1024^7. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_ZEBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 7, /** * Binary prefix: yobi, 1024^8. * - * @draft ICU 69 + * @stable ICU 69 */ UMEASURE_PREFIX_YOBI = UMEASURE_PREFIX_INTERNAL_ONE_BIN + 8, @@ -347,7 +346,7 @@ typedef enum UMeasurePrefix { * base is 10 for SI prefixes (kilo, micro) and 1024 for binary prefixes (kibi, * mebi). * - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI int32_t U_EXPORT2 umeas_getPrefixBase(UMeasurePrefix unitPrefix); @@ -355,12 +354,10 @@ U_CAPI int32_t U_EXPORT2 umeas_getPrefixBase(UMeasurePrefix unitPrefix); * Returns the exponent of the factor associated with the given unit prefix, for * example 3 for kilo, -6 for micro, 1 for kibi, 2 for mebi, 3 for gibi. * - * @draft ICU 69 + * @stable ICU 69 */ U_CAPI int32_t U_EXPORT2 umeas_getPrefixPower(UMeasurePrefix unitPrefix); -#endif // U_HIDE_DRAFT_API - /** * A unit such as length, mass, volume, currency, etc. A unit is * coupled with a numeric amount to produce a Measure. @@ -481,7 +478,6 @@ class U_I18N_API MeasureUnit: public UObject { */ UMeasureUnitComplexity getComplexity(UErrorCode& status) const; -#ifndef U_HIDE_DRAFT_API /** * Creates a MeasureUnit which is this SINGLE unit augmented with the specified prefix. * For example, UMEASURE_PREFIX_KILO for "kilo", or UMEASURE_PREFIX_KIBI for "kibi". @@ -494,7 +490,7 @@ class U_I18N_API MeasureUnit: public UObject { * @param prefix The prefix, from UMeasurePrefix. * @param status Set if this is not a SINGLE unit or if another error occurs. * @return A new SINGLE unit. - * @draft ICU 69 + * @stable ICU 69 */ MeasureUnit withPrefix(UMeasurePrefix prefix, UErrorCode& status) const; @@ -510,10 +506,9 @@ class U_I18N_API MeasureUnit: public UObject { * @return The prefix of this SINGLE unit, from UMeasurePrefix. * @see umeas_getPrefixBase * @see umeas_getPrefixPower - * @draft ICU 69 + * @stable ICU 69 */ UMeasurePrefix getPrefix(UErrorCode& status) const; -#endif // U_HIDE_DRAFT_API /** * Creates a MeasureUnit which is this SINGLE unit augmented with the specified dimensionality @@ -989,23 +984,21 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit getKarat(); -#ifndef U_HIDE_DRAFT_API /** * Returns by pointer, unit of concentr: milligram-ofglucose-per-deciliter. * Caller owns returned value and must free it. * Also see {@link #getMilligramOfglucosePerDeciliter()}. * @param status ICU error code. - * @draft ICU 69 + * @stable ICU 69 */ static MeasureUnit *createMilligramOfglucosePerDeciliter(UErrorCode &status); /** * Returns by value, unit of concentr: milligram-ofglucose-per-deciliter. * Also see {@link #createMilligramOfglucosePerDeciliter()}. - * @draft ICU 69 + * @stable ICU 69 */ static MeasureUnit getMilligramOfglucosePerDeciliter(); -#endif /* U_HIDE_DRAFT_API */ /** * Returns by pointer, unit of concentr: milligram-per-deciliter. diff --git a/deps/icu-small/source/i18n/unicode/numberformatter.h b/deps/icu-small/source/i18n/unicode/numberformatter.h index ece433b55f09ea..711064ece8dbb3 100644 --- a/deps/icu-small/source/i18n/unicode/numberformatter.h +++ b/deps/icu-small/source/i18n/unicode/numberformatter.h @@ -22,6 +22,7 @@ #include "unicode/parseerr.h" #include "unicode/plurrule.h" #include "unicode/ucurr.h" +#include "unicode/unounclass.h" #include "unicode/unum.h" #include "unicode/unumberformatter.h" #include "unicode/uobject.h" @@ -640,6 +641,33 @@ class U_I18N_API Precision : public UMemory { */ static IncrementPrecision increment(double roundingIncrement); +#ifndef U_HIDE_DRAFT_API + /** + * Version of `Precision::increment()` that takes an integer at a particular power of 10. + * + * To round to the nearest 0.5 and display 2 fraction digits, with this function, you should write one of the following: + * + *

+     * Precision::incrementExact(5, -1).withMinFraction(2)
+     * Precision::incrementExact(50, -2).withMinFraction(2)
+     * Precision::incrementExact(50, -2)
+     * 
+ * + * This is analagous to ICU4J `Precision.increment(new BigDecimal("0.50"))`. + * + * This behavior is modeled after ECMA-402. For more information, see: + * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/NumberFormat/NumberFormat#roundingincrement + * + * @param mantissa + * The increment to which to round numbers. + * @param magnitude + * The power of 10 of the ones digit of the mantissa. + * @return A precision for chaining or passing to the NumberFormatter precision() setter. + * @draft ICU 71 + */ + static IncrementPrecision incrementExact(uint64_t mantissa, int16_t magnitude); +#endif // U_HIDE_DRAFT_API + /** * Show numbers rounded and padded according to the rules for the currency unit. The most common * rounding precision settings for currencies include Precision::fixedFraction(2), @@ -659,16 +687,14 @@ class U_I18N_API Precision : public UMemory { */ static CurrencyPrecision currency(UCurrencyUsage currencyUsage); -#ifndef U_HIDE_DRAFT_API /** * Configure how trailing zeros are displayed on numbers. For example, to hide trailing zeros * when the number is an integer, use UNUM_TRAILING_ZERO_HIDE_IF_WHOLE. * * @param trailingZeroDisplay Option to configure the display of trailing zeros. - * @draft ICU 69 + * @stable ICU 69 */ Precision trailingZeroDisplay(UNumberTrailingZeroDisplay trailingZeroDisplay) const; -#endif // U_HIDE_DRAFT_API private: enum PrecisionType { @@ -707,16 +733,23 @@ class U_I18N_API Precision : public UMemory { impl::digits_t fMaxSig; /** @internal (private) */ UNumberRoundingPriority fPriority; + /** + * Whether to retain trailing zeros based on the looser strategy. + * @internal (private) + */ + bool fRetain; } fracSig; /** @internal (private) */ struct IncrementSettings { // For RND_INCREMENT, RND_INCREMENT_ONE, and RND_INCREMENT_FIVE + // Note: This is a union, so we shouldn't own memory, since + // the default destructor would leak it. /** @internal (private) */ - double fIncrement; + uint64_t fIncrement; /** @internal (private) */ - impl::digits_t fMinFrac; + impl::digits_t fIncrementMagnitude; /** @internal (private) */ - impl::digits_t fMaxFrac; + impl::digits_t fMinFrac; } increment; UCurrencyUsage currencyUsage; // For RND_CURRENCY UErrorCode errorCode; // For RND_ERROR @@ -759,9 +792,10 @@ class U_I18N_API Precision : public UMemory { const FractionPrecision &base, int32_t minSig, int32_t maxSig, - UNumberRoundingPriority priority); + UNumberRoundingPriority priority, + bool retain); - static IncrementPrecision constructIncrement(double increment, int32_t minFrac); + static IncrementPrecision constructIncrement(uint64_t increment, impl::digits_t magnitude); static CurrencyPrecision constructCurrency(UCurrencyUsage usage); @@ -801,7 +835,6 @@ class U_I18N_API Precision : public UMemory { */ class U_I18N_API FractionPrecision : public Precision { public: -#ifndef U_HIDE_DRAFT_API /** * Override maximum fraction digits with maximum significant digits depending on the magnitude * of the number. See UNumberRoundingPriority. @@ -814,13 +847,12 @@ class U_I18N_API FractionPrecision : public Precision { * How to disambiguate between fraction digits and significant digits. * @return A precision for chaining or passing to the NumberFormatter precision() setter. * - * @draft ICU 69 + * @stable ICU 69 */ Precision withSignificantDigits( int32_t minSignificantDigits, int32_t maxSignificantDigits, UNumberRoundingPriority priority) const; -#endif // U_HIDE_DRAFT_API /** * Ensure that no less than this number of significant digits are retained when rounding @@ -1170,31 +1202,32 @@ class U_I18N_API Scale : public UMemory { namespace impl { -// Do not enclose entire StringProp with #ifndef U_HIDE_INTERNAL_API, needed for a protected field +// Do not enclose entire StringProp with #ifndef U_HIDE_INTERNAL_API, needed for a protected field. +// And do not enclose its class boilerplate within #ifndef U_HIDE_INTERNAL_API. /** * Manages NumberFormatterSettings::usage()'s char* instance on the heap. * @internal */ class U_I18N_API StringProp : public UMemory { -#ifndef U_HIDE_INTERNAL_API - public: + /** @internal */ + ~StringProp(); + /** @internal */ StringProp(const StringProp &other); /** @internal */ StringProp &operator=(const StringProp &other); +#ifndef U_HIDE_INTERNAL_API + /** @internal */ StringProp(StringProp &&src) U_NOEXCEPT; /** @internal */ StringProp &operator=(StringProp &&src) U_NOEXCEPT; - /** @internal */ - ~StringProp(); - /** @internal */ int16_t length() const { return fLength; @@ -2735,14 +2768,20 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { */ MeasureUnit getOutputUnit(UErrorCode& status) const; -#ifndef U_HIDE_INTERNAL_API +#ifndef U_HIDE_DRAFT_API + /** - * Gets the gender of the formatted output. Returns "" when the gender is - * unknown, or for ungendered languages. + * Gets the noun class of the formatted output. Returns `OTHER` when the noun class + * is not supported yet. * - * @internal ICU 69 technology preview. + * @return `NounClass` + * @draft ICU 71. */ - const char *getGender(UErrorCode& status) const; + NounClass getNounClass(UErrorCode &status) const; + +#endif // U_HIDE_DRAFT_API + +#ifndef U_HIDE_INTERNAL_API /** * Gets the raw DecimalQuantity for plural rule selection. @@ -2758,6 +2797,18 @@ class U_I18N_API FormattedNumber : public UMemory, public FormattedValue { #endif /* U_HIDE_INTERNAL_API */ +#ifndef U_HIDE_DEPRECATED_API + + /** + * Gets the gender of the formatted output. Returns "" when the gender is + * unknown, or for ungendered languages. + * + * @deprecated This API is for ICU internal use only. + */ + const char *getGender(UErrorCode &status) const; + +#endif /* U_HIDE_DEPRECATED_API */ + private: // Can't use LocalPointer because UFormattedNumberData is forward-declared const impl::UFormattedNumberData *fData; diff --git a/deps/icu-small/source/i18n/unicode/rbtz.h b/deps/icu-small/source/i18n/unicode/rbtz.h index 1eca70c338bf60..4fbf330cef1c7b 100644 --- a/deps/icu-small/source/i18n/unicode/rbtz.h +++ b/deps/icu-small/source/i18n/unicode/rbtz.h @@ -303,16 +303,14 @@ class U_I18N_API RuleBasedTimeZone : public BasicTimeZone { virtual void getTimeZoneRules(const InitialTimeZoneRule*& initial, const TimeZoneRule* trsrules[], int32_t& trscount, UErrorCode& status) const override; -#ifndef U_FORCE_HIDE_DRAFT_API /** * Get time zone offsets from local wall time. - * @draft ICU 69 + * @stable ICU 69 */ virtual void getOffsetFromLocal( UDate date, UTimeZoneLocalOption nonExistingTimeOpt, UTimeZoneLocalOption duplicatedTimeOpt, int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const override; -#endif /* U_FORCE_HIDE_DRAFT_API */ private: void deleteRules(void); diff --git a/deps/icu-small/source/i18n/unicode/simpletz.h b/deps/icu-small/source/i18n/unicode/simpletz.h index f5c155de466923..f73d823ee58f1c 100644 --- a/deps/icu-small/source/i18n/unicode/simpletz.h +++ b/deps/icu-small/source/i18n/unicode/simpletz.h @@ -620,16 +620,14 @@ class U_I18N_API SimpleTimeZone: public BasicTimeZone { virtual void getOffset(UDate date, UBool local, int32_t& rawOffset, int32_t& dstOffset, UErrorCode& ec) const override; -#ifndef U_FORCE_HIDE_DRAFT_API /** * Get time zone offsets from local wall time. - * @draft ICU 69 + * @stable ICU 69 */ virtual void getOffsetFromLocal( UDate date, UTimeZoneLocalOption nonExistingTimeOpt, UTimeZoneLocalOption duplicatedTimeOpt, int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const override; -#endif /* U_FORCE_HIDE_DRAFT_API */ /** * Returns the TimeZone's raw GMT offset (i.e., the number of milliseconds to add diff --git a/deps/icu-small/source/i18n/unicode/ucal.h b/deps/icu-small/source/i18n/unicode/ucal.h index 94abae83919a85..3a4fb69fc3ed9d 100644 --- a/deps/icu-small/source/i18n/unicode/ucal.h +++ b/deps/icu-small/source/i18n/unicode/ucal.h @@ -1617,25 +1617,23 @@ U_CAPI int32_t U_EXPORT2 ucal_getTimeZoneIDForWindowsID(const UChar* winid, int32_t len, const char* region, UChar* id, int32_t idCapacity, UErrorCode* status); -#ifndef U_FORCE_HIDE_DRAFT_API /** * Options used by ucal_getTimeZoneOffsetFromLocal and BasicTimeZone::getOffsetFromLocal() * to specify how to interpret an input time when it does not exist, or when it is ambiguous, * around a time zone transition. - * @draft ICU 69 + * @stable ICU 69 */ enum UTimeZoneLocalOption { -#ifndef U_HIDE_DRAFT_API /** * An input time is always interpreted as local time before * a time zone transition. - * @draft ICU 69 + * @stable ICU 69 */ UCAL_TZ_LOCAL_FORMER = 0x04, /** * An input time is always interpreted as local time after * a time zone transition. - * @draft ICU 69 + * @stable ICU 69 */ UCAL_TZ_LOCAL_LATTER = 0x0C, /** @@ -1644,7 +1642,7 @@ enum UTimeZoneLocalOption { * sides of a time zone transition are standard time, * or daylight saving time, the local time before the * transition is used. - * @draft ICU 69 + * @stable ICU 69 */ UCAL_TZ_LOCAL_STANDARD_FORMER = UCAL_TZ_LOCAL_FORMER | 0x01, /** @@ -1653,7 +1651,7 @@ enum UTimeZoneLocalOption { * sides of a time zone transition are standard time, * or daylight saving time, the local time after the * transition is used. - * @draft ICU 69 + * @stable ICU 69 */ UCAL_TZ_LOCAL_STANDARD_LATTER = UCAL_TZ_LOCAL_LATTER | 0x01, /** @@ -1662,7 +1660,7 @@ enum UTimeZoneLocalOption { * sides of a time zone transition are standard time, * or daylight saving time, the local time before the * transition is used. - * @draft ICU 69 + * @stable ICU 69 */ UCAL_TZ_LOCAL_DAYLIGHT_FORMER = UCAL_TZ_LOCAL_FORMER | 0x03, /** @@ -1671,19 +1669,11 @@ enum UTimeZoneLocalOption { * sides of a time zone transition are standard time, * or daylight saving time, the local time after the * transition is used. - * @draft ICU 69 + * @stable ICU 69 */ UCAL_TZ_LOCAL_DAYLIGHT_LATTER = UCAL_TZ_LOCAL_LATTER | 0x03, -#else /* U_HIDE_DRAFT_API */ - /** - * Dummy value to prevent empty enum if U_HIDE_DRAFT_API. - * This will go away when draft conditionals are removed. - * @internal - */ - UCAL_TZ_LOCAL_NONE = 0, -#endif /* U_HIDE_DRAFT_API */ }; -typedef enum UTimeZoneLocalOption UTimeZoneLocalOption; /**< @draft ICU 69 */ +typedef enum UTimeZoneLocalOption UTimeZoneLocalOption; /**< @stable ICU 69 */ /** * Returns the time zone raw and GMT offset for the given moment @@ -1710,7 +1700,7 @@ typedef enum UTimeZoneLocalOption UTimeZoneLocalOption; /**< @draft ICU 69 */ * typically one hour. * If the status is set to one of the error code, the value set is unspecified. * @param status A pointer to a UErrorCode to receive any errors. -* @draft ICU 69 +* @stable ICU 69 */ U_CAPI void U_EXPORT2 ucal_getTimeZoneOffsetFromLocal( @@ -1718,7 +1708,6 @@ ucal_getTimeZoneOffsetFromLocal( UTimeZoneLocalOption nonExistingTimeOpt, UTimeZoneLocalOption duplicatedTimeOpt, int32_t* rawOffset, int32_t* dstOffset, UErrorCode* status); -#endif /* U_FORCE_HIDE_DRAFT_API */ #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/unicode/ucol.h b/deps/icu-small/source/i18n/unicode/ucol.h index 6d22eb6069ec87..24963312216941 100644 --- a/deps/icu-small/source/i18n/unicode/ucol.h +++ b/deps/icu-small/source/i18n/unicode/ucol.h @@ -397,7 +397,7 @@ typedef enum { * @param status A pointer to a UErrorCode to receive any errors * @return A pointer to a UCollator, or 0 if an error occurred. * @see ucol_openRules - * @see ucol_safeClone + * @see ucol_clone * @see ucol_close * @stable ICU 2.0 */ @@ -425,7 +425,7 @@ ucol_open(const char *loc, UErrorCode *status); * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case * of error - please use status argument to check for errors. * @see ucol_open - * @see ucol_safeClone + * @see ucol_clone * @see ucol_close * @stable ICU 2.0 */ @@ -521,7 +521,7 @@ ucol_getContractionsAndExpansions( const UCollator *coll, * @param coll The UCollator to close. * @see ucol_open * @see ucol_openRules - * @see ucol_safeClone + * @see ucol_clone * @stable ICU 2.0 */ U_CAPI void U_EXPORT2 @@ -985,7 +985,6 @@ ucol_getShortDefinitionString(const UCollator *coll, * * @deprecated ICU 54 */ - U_DEPRECATED int32_t U_EXPORT2 ucol_normalizeShortDefinitionString(const char *source, char *destination, @@ -1310,6 +1309,20 @@ U_DEPRECATED void U_EXPORT2 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); #endif /* U_HIDE_DEPRECATED_API */ +/** + * Thread safe cloning operation. The result is a clone of a given collator. + * @param coll collator to be cloned + * @param status to indicate whether the operation went on smoothly or there were errors + * @return pointer to the new clone + * @see ucol_open + * @see ucol_openRules + * @see ucol_close + * @stable ICU 71 + */ +U_CAPI UCollator* U_EXPORT2 ucol_clone(const UCollator *coll, UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API + /** * Thread safe cloning operation. The result is a clone of a given collator. * @param coll collator to be cloned @@ -1325,21 +1338,20 @@ ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *stat * If *pBufferSize is not enough for a stack-based safe clone, * new memory will be allocated. * @param status to indicate whether the operation went on smoothly or there were errors - * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any - * allocations were necessary. + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used + * if pBufferSize != NULL and any allocations were necessary * @return pointer to the new clone * @see ucol_open * @see ucol_openRules * @see ucol_close - * @stable ICU 2.0 + * @deprecated ICU 71 Use ucol_clone() instead. */ -U_CAPI UCollator* U_EXPORT2 +U_DEPRECATED UCollator* U_EXPORT2 ucol_safeClone(const UCollator *coll, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status); -#ifndef U_HIDE_DEPRECATED_API /** default memory size for the new clone. * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. diff --git a/deps/icu-small/source/i18n/unicode/udatpg.h b/deps/icu-small/source/i18n/unicode/udatpg.h index efe4357bfeecd2..684a905e42602e 100644 --- a/deps/icu-small/source/i18n/unicode/udatpg.h +++ b/deps/icu-small/source/i18n/unicode/udatpg.h @@ -492,6 +492,11 @@ udatpg_getFieldDisplayName(const UDateTimePatternGenerator *dtpg, * for those two skeletons, so the result is put together with this pattern, * resulting in "d-MMM h:mm". * + * There are four DateTimeFormats in a UDateTimePatternGenerator object, + * corresponding to date styles UDAT_FULL..UDAT_SHORT. This method sets + * all of them to the specified pattern. To set them individually, see + * udatpg_setDateTimeFormatForStyle. + * * @param dtpg a pointer to UDateTimePatternGenerator. * @param dtFormat * message format pattern, here {1} will be replaced by the date @@ -505,6 +510,12 @@ udatpg_setDateTimeFormat(const UDateTimePatternGenerator *dtpg, /** * Getter corresponding to setDateTimeFormat. + * + * There are four DateTimeFormats in a UDateTimePatternGenerator object, + * corresponding to date styles UDAT_FULL..UDAT_SHORT. This method gets + * the style for UDAT_MEDIUM (the default). To get them individually, see + * udatpg_getDateTimeFormatForStyle. + * * @param dtpg a pointer to UDateTimePatternGenerator. * @param pLength A pointer that will receive the length of the format * @return dateTimeFormat. @@ -514,6 +525,70 @@ U_CAPI const UChar * U_EXPORT2 udatpg_getDateTimeFormat(const UDateTimePatternGenerator *dtpg, int32_t *pLength); +#if !UCONFIG_NO_FORMATTING +#ifndef U_HIDE_DRAFT_API +/** + * dateTimeFormats are message patterns used to compose combinations of date + * and time patterns. There are four length styles, corresponding to the + * inferred style of the date pattern; these are UDateFormatStyle values: + * - UDAT_FULL (for date pattern with weekday and long month), else + * - UDAT_LONG (for a date pattern with long month), else + * - UDAT_MEDIUM (for a date pattern with abbreviated month), else + * - UDAT_SHORT (for any other date pattern). + * For details on dateTimeFormats, see + * https://www.unicode.org/reports/tr35/tr35-dates.html#dateTimeFormats. + * The default pattern in the root locale for all styles is "{1} {0}". + * + * @param udtpg + * a pointer to the UDateTimePatternGenerator + * @param style + * one of UDAT_FULL..UDAT_SHORT. Error if out of range. + * @param dateTimeFormat + * the new dateTimeFormat to set for the the specified style + * @param length + * the length of dateTimeFormat, or -1 if unknown and pattern + * is null-terminated + * @param pErrorCode + * a pointer to the UErrorCode (in/out parameter); if no failure + * status is already set, it will be set according to result of the + * function (e.g. U_ILLEGAL_ARGUMENT_ERROR for style out of range). + * @draft ICU 71 + */ +U_CAPI void U_EXPORT2 +udatpg_setDateTimeFormatForStyle(UDateTimePatternGenerator *udtpg, + UDateFormatStyle style, + const UChar *dateTimeFormat, int32_t length, + UErrorCode *pErrorCode); + +/** + * Getter corresponding to udatpg_setDateTimeFormatForStyle. + * + * @param udtpg + * a pointer to the UDateTimePatternGenerator + * @param style + * one of UDAT_FULL..UDAT_SHORT. Error if out of range. + * @param pLength + * a pointer that will receive the length of the format. May be NULL + * if length is not desired. + * @param pErrorCode + * a pointer to the UErrorCode (in/out parameter); if no failure + * status is already set, it will be set according to result of the + * function (e.g. U_ILLEGAL_ARGUMENT_ERROR for style out of range). + * @return + * pointer to the current dateTimeFormat (0 terminated) for the specified + * style, or empty string in case of error. The pointer and its contents + * may no longer be valid if udatpg_setDateTimeFormat is called, or + * udatpg_setDateTimeFormatForStyle for the same style is called, or the + * UDateTimePatternGenerator object is closed. + * @draft ICU 71 + */ +U_CAPI const UChar* U_EXPORT2 +udatpg_getDateTimeFormatForStyle(const UDateTimePatternGenerator *udtpg, + UDateFormatStyle style, int32_t *pLength, + UErrorCode *pErrorCode); +#endif /* U_HIDE_DRAFT_API */ +#endif /* #if !UCONFIG_NO_FORMATTING */ + /** * The decimal value is used in formatting fractions of seconds. If the * skeleton contains fractional seconds, then this is used with the diff --git a/deps/icu-small/source/i18n/unicode/unounclass.h b/deps/icu-small/source/i18n/unicode/unounclass.h new file mode 100644 index 00000000000000..1721dbd584fc4c --- /dev/null +++ b/deps/icu-small/source/i18n/unicode/unounclass.h @@ -0,0 +1,43 @@ +// © 2022 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef __UNOUNCLASS_H__ +#define __UNOUNCLASS_H__ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/uversion.h" + +U_NAMESPACE_BEGIN + +#ifndef U_HIDE_DRAFT_API + +/** + * Represents all the grammatical noun classes that are supported by CLDR. + * + * @draft ICU 71. + */ +enum NounClass { + OTHER = 0, + NEUTER = 1, + FEMININE = 2, + MASCULINE = 3, + ANIMATE = 4, + INANIMATE = 5, + PERSONAL = 6, + COMMON = 7, +}; + +#endif // U_HIDE_DRAFT_API + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_FORMATTING */ + +#endif /* U_SHOW_CPLUSPLUS_API */ + +#endif // __UNOUNCLASS_H__ diff --git a/deps/icu-small/source/i18n/unicode/unum.h b/deps/icu-small/source/i18n/unicode/unum.h index 14f76168b61b5f..863695591ab2a5 100644 --- a/deps/icu-small/source/i18n/unicode/unum.h +++ b/deps/icu-small/source/i18n/unicode/unum.h @@ -303,23 +303,21 @@ typedef enum UNumberFormatRoundingMode { * @stable ICU 4.8 */ UNUM_ROUND_UNNECESSARY, -#ifndef U_HIDE_DRAFT_API /** * Rounds ties toward the odd number. - * @draft ICU 69 + * @stable ICU 69 */ UNUM_ROUND_HALF_ODD, /** * Rounds ties toward +∞. - * @draft ICU 69 + * @stable ICU 69 */ UNUM_ROUND_HALF_CEILING, /** * Rounds ties toward -∞. - * @draft ICU 69 + * @stable ICU 69 */ UNUM_ROUND_HALF_FLOOR, -#endif // U_HIDE_DRAFT_API } UNumberFormatRoundingMode; /** The possible number format pad positions. @@ -401,13 +399,24 @@ typedef enum UNumberFormatFields { UNUM_MEASURE_UNIT_FIELD, /** @stable ICU 64 */ UNUM_COMPACT_FIELD, +#ifndef U_HIDE_DRAFT_API + /** + * Approximately sign. In ICU 70, this was categorized under the generic SIGN field. + * @draft ICU 71 + */ + UNUM_APPROXIMATELY_SIGN_FIELD, +#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UNumberFormatFields value. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UNUM_FIELD_COUNT = UNUM_SIGN_FIELD + 3 +#ifndef U_HIDE_DRAFT_API + UNUM_FIELD_COUNT = UNUM_COMPACT_FIELD + 2 +#else // U_HIDE_DRAFT_API (for UNUM_APPROXIMATELY_SIGN_FIELD) + UNUM_FIELD_COUNT = UNUM_COMPACT_FIELD + 1 +#endif // U_HIDE_DRAFT_API (for UNUM_APPROXIMATELY_SIGN_FIELD) #endif /* U_HIDE_DEPRECATED_API */ } UNumberFormatFields; diff --git a/deps/icu-small/source/i18n/unicode/unumberformatter.h b/deps/icu-small/source/i18n/unicode/unumberformatter.h index cb980cd94ddfc8..58a75baf073c43 100644 --- a/deps/icu-small/source/i18n/unicode/unumberformatter.h +++ b/deps/icu-small/source/i18n/unicode/unumberformatter.h @@ -78,7 +78,6 @@ * */ -#ifndef U_FORCE_HIDE_DRAFT_API /** * An enum declaring how to resolve conflicts between maximum fraction digits and maximum * significant digits. @@ -115,24 +114,23 @@ * Here, RELAXED favors Max-Fraction and STRICT favors Max-Significant. Note that this larger * number caused the two modes to favor the opposite result. * - * @draft ICU 69 + * @stable ICU 69 */ typedef enum UNumberRoundingPriority { /** * Favor greater precision by relaxing one of the rounding constraints. * - * @draft ICU 69 + * @stable ICU 69 */ UNUM_ROUNDING_PRIORITY_RELAXED, /** * Favor adherence to all rounding constraints by producing lower precision. * - * @draft ICU 69 + * @stable ICU 69 */ UNUM_ROUNDING_PRIORITY_STRICT, } UNumberRoundingPriority; -#endif // U_FORCE_HIDE_DRAFT_API /** * An enum declaring how to render units, including currencies. Example outputs when formatting 123 USD and 123 @@ -435,21 +433,19 @@ typedef enum UNumberSignDisplay { */ UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO, -#ifndef U_HIDE_DRAFT_API /** * Same as AUTO, but do not show the sign on negative zero. * - * @draft ICU 69 + * @stable ICU 69 */ UNUM_SIGN_NEGATIVE, /** * Same as ACCOUNTING, but do not show the sign on negative zero. * - * @draft ICU 69 + * @stable ICU 69 */ UNUM_SIGN_ACCOUNTING_NEGATIVE, -#endif // U_HIDE_DRAFT_API // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, // needed for unconditionalized struct MacroProps @@ -498,31 +494,29 @@ typedef enum UNumberDecimalSeparatorDisplay { UNUM_DECIMAL_SEPARATOR_COUNT } UNumberDecimalSeparatorDisplay; -#ifndef U_FORCE_HIDE_DRAFT_API /** * An enum declaring how to render trailing zeros. * * - UNUM_TRAILING_ZERO_AUTO: 0.90, 1.00, 1.10 * - UNUM_TRAILING_ZERO_HIDE_IF_WHOLE: 0.90, 1, 1.10 * - * @draft ICU 69 + * @stable ICU 69 */ typedef enum UNumberTrailingZeroDisplay { /** * Display trailing zeros according to the settings for minimum fraction and significant digits. * - * @draft ICU 69 + * @stable ICU 69 */ UNUM_TRAILING_ZERO_AUTO, /** * Same as AUTO, but hide trailing zeros after the decimal separator if they are all zero. * - * @draft ICU 69 + * @stable ICU 69 */ UNUM_TRAILING_ZERO_HIDE_IF_WHOLE, } UNumberTrailingZeroDisplay; -#endif // U_FORCE_HIDE_DRAFT_API struct UNumberFormatter; /** diff --git a/deps/icu-small/source/i18n/unicode/vtzone.h b/deps/icu-small/source/i18n/unicode/vtzone.h index e7d2f515410ee1..ecf335bbe3428c 100644 --- a/deps/icu-small/source/i18n/unicode/vtzone.h +++ b/deps/icu-small/source/i18n/unicode/vtzone.h @@ -264,16 +264,14 @@ class U_I18N_API VTimeZone : public BasicTimeZone { virtual void getOffset(UDate date, UBool local, int32_t& rawOffset, int32_t& dstOffset, UErrorCode& ec) const override; -#ifndef U_FORCE_HIDE_DRAFT_API /** * Get time zone offsets from local wall time. - * @draft ICU 69 + * @stable ICU 69 */ virtual void getOffsetFromLocal( UDate date, UTimeZoneLocalOption nonExistingTimeOpt, UTimeZoneLocalOption duplicatedTimeOpt, int32_t& rawOffset, int32_t& dstOffset, UErrorCode& status) const override; -#endif /* U_FORCE_HIDE_DRAFT_API */ /** * Sets the TimeZone's raw GMT offset (i.e., the number of milliseconds to add diff --git a/deps/icu-small/source/i18n/units_complexconverter.cpp b/deps/icu-small/source/i18n/units_complexconverter.cpp index 78cefbf7ebb733..ecbe3c787941a7 100644 --- a/deps/icu-small/source/i18n/units_complexconverter.cpp +++ b/deps/icu-small/source/i18n/units_complexconverter.cpp @@ -183,7 +183,7 @@ MaybeStackVector ComplexUnitsConverter::convert(double quantity, } else { quantity = remainder; } - } + } } applyRounder(intValues, quantity, rounder, status); @@ -210,7 +210,6 @@ MaybeStackVector ComplexUnitsConverter::convert(double quantity, } } - // Transfer values into result and return: for(int32_t i = 0, n = unitsConverters_.length(); i < n; ++i) { U_ASSERT(tmpResult[i] != nullptr); @@ -224,6 +223,12 @@ MaybeStackVector ComplexUnitsConverter::convert(double quantity, void ComplexUnitsConverter::applyRounder(MaybeStackArray &intValues, double &quantity, icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const { + if (uprv_isInfinite(quantity) || uprv_isNaN(quantity)) { + // Inf and NaN can't be rounded, and calculating `carry` below is known + // to fail on Gentoo on HPPA and OpenSUSE on riscv64. Nothing to do. + return; + } + if (rounder == nullptr) { // Nothing to do for the quantity. return; diff --git a/deps/icu-small/source/i18n/units_complexconverter.h b/deps/icu-small/source/i18n/units_complexconverter.h index 5c669b45ddd7df..d56ce8d4ce378f 100644 --- a/deps/icu-small/source/i18n/units_complexconverter.h +++ b/deps/icu-small/source/i18n/units_complexconverter.h @@ -108,13 +108,15 @@ class U_I18N_API ComplexUnitsConverter : public UMemory { MaybeStackVector convert(double quantity, icu::number::impl::RoundingImpl *rounder, UErrorCode &status) const; - private: + // TODO(ICU-21937): Make it private after submitting the public units conversion API. MaybeStackVector unitsConverters_; + // TODO(ICU-21937): Make it private after submitting the public units conversion API. // Individual units of mixed units, sorted big to small, with indices // indicating the requested output mixed unit order. MaybeStackVector units_; + private: // Sorts units_, which must be populated before calling this, and populates // unitsConverters_. void init(const MeasureUnitImpl &inputUnit, const ConversionRates &ratesInfo, UErrorCode &status); diff --git a/deps/icu-small/source/i18n/units_converter.cpp b/deps/icu-small/source/i18n/units_converter.cpp index 7e946e584bb76a..82b8eea3d8cf6c 100644 --- a/deps/icu-small/source/i18n/units_converter.cpp +++ b/deps/icu-small/source/i18n/units_converter.cpp @@ -9,6 +9,7 @@ #include "cmemory.h" #include "double-conversion-string-to-double.h" #include "measunit_impl.h" +#include "putilimp.h" #include "uassert.h" #include "unicode/errorcode.h" #include "unicode/localpointer.h" @@ -588,10 +589,7 @@ double UnitsConverter::convert(double inputValue) const { if (conversionRate_.reciprocal) { if (result == 0) { - // TODO: demonstrate the resulting behaviour in tests... and figure - // out desired behaviour. (Theoretical result should be infinity, - // not 0.) - return 0.0; + return uprv_getInfinity(); } result = 1.0 / result; } @@ -603,10 +601,7 @@ double UnitsConverter::convertInverse(double inputValue) const { double result = inputValue; if (conversionRate_.reciprocal) { if (result == 0) { - // TODO: demonstrate the resulting behaviour in tests... and figure - // out desired behaviour. (Theoretical result should be infinity, - // not 0.) - return 0.0; + return uprv_getInfinity(); } result = 1.0 / result; } diff --git a/deps/icu-small/source/i18n/units_router.h b/deps/icu-small/source/i18n/units_router.h index b3300f7e27737a..d9fcffb2aa9e26 100644 --- a/deps/icu-small/source/i18n/units_router.h +++ b/deps/icu-small/source/i18n/units_router.h @@ -30,8 +30,6 @@ namespace units { struct RouteResult : UMemory { // A list of measures: a single measure for single units, multiple measures // for mixed units. - // - // TODO(icu-units/icu#21): figure out the right mixed unit API. MaybeStackVector measures; // The output unit for this RouteResult. This may be a MIXED unit - for diff --git a/deps/icu-small/source/i18n/uspoof_conf.cpp b/deps/icu-small/source/i18n/uspoof_conf.cpp index 04081cabfb0738..172c0711afbad3 100644 --- a/deps/icu-small/source/i18n/uspoof_conf.cpp +++ b/deps/icu-small/source/i18n/uspoof_conf.cpp @@ -63,23 +63,24 @@ U_NAMESPACE_USE // at the same time // -SPUString::SPUString(UnicodeString *s) { - fStr = s; +SPUString::SPUString(LocalPointer s) { + fStr = std::move(s); fCharOrStrTableIndex = 0; } SPUString::~SPUString() { - delete fStr; } -SPUStringPool::SPUStringPool(UErrorCode &status) : fVec(NULL), fHash(NULL) { - fVec = new UVector(status); - if (fVec == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; +SPUStringPool::SPUStringPool(UErrorCode &status) : fVec(nullptr), fHash(nullptr) { + LocalPointer vec(new UVector(status), status); + if (U_FAILURE(status)) { return; } + vec->setDeleter( + [](void *obj) {delete (SPUString *)obj;}); + fVec = vec.orphan(); fHash = uhash_open(uhash_hashUnicodeString, // key hash function uhash_compareUnicodeString, // Key Comparator NULL, // Value Comparator @@ -88,11 +89,6 @@ SPUStringPool::SPUStringPool(UErrorCode &status) : fVec(NULL), fHash(NULL) { SPUStringPool::~SPUStringPool() { - int i; - for (i=fVec->size()-1; i>=0; i--) { - SPUString *s = static_cast(fVec->elementAt(i)); - delete s; - } delete fVec; uhash_close(fHash); } @@ -135,18 +131,21 @@ void SPUStringPool::sort(UErrorCode &status) { SPUString *SPUStringPool::addString(UnicodeString *src, UErrorCode &status) { + LocalPointer lpSrc(src); + if (U_FAILURE(status)) { + return nullptr; + } SPUString *hashedString = static_cast(uhash_get(fHash, src)); - if (hashedString != NULL) { - delete src; - } else { - hashedString = new SPUString(src); - if (hashedString == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uhash_put(fHash, src, hashedString, &status); - fVec->addElementX(hashedString, status); + if (hashedString != nullptr) { + return hashedString; + } + LocalPointer spuStr(new SPUString(std::move(lpSrc)), status); + hashedString = spuStr.getAlias(); + fVec->adoptElement(spuStr.orphan(), status); + if (U_FAILURE(status)) { + return nullptr; } + uhash_put(fHash, src, hashedString, &status); return hashedString; } diff --git a/deps/icu-small/source/i18n/uspoof_conf.h b/deps/icu-small/source/i18n/uspoof_conf.h index 600d7ea42a430d..1eeecdfd5e40d4 100644 --- a/deps/icu-small/source/i18n/uspoof_conf.h +++ b/deps/icu-small/source/i18n/uspoof_conf.h @@ -39,11 +39,12 @@ U_NAMESPACE_BEGIN // Instances of SPUString exist during the compilation process only. struct SPUString : public UMemory { - UnicodeString *fStr; // The actual string. - int32_t fCharOrStrTableIndex; // Index into the final runtime data for this - // string (or, for length 1, the single string char - // itself, there being no string table entry for it.) - SPUString(UnicodeString *s); + LocalPointer fStr; // The actual string. + int32_t fCharOrStrTableIndex; // Index into the final runtime data for this + // string (or, for length 1, the single string char + // itself, there being no string table entry for it.) + + SPUString(LocalPointer s); ~SPUString(); }; diff --git a/deps/icu-small/source/i18n/uspoof_impl.cpp b/deps/icu-small/source/i18n/uspoof_impl.cpp index b283d813210d9d..f96826f86ccc12 100644 --- a/deps/icu-small/source/i18n/uspoof_impl.cpp +++ b/deps/icu-small/source/i18n/uspoof_impl.cpp @@ -945,7 +945,7 @@ uspoof_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *ou uint32_t magic = ds->readUInt32(spoofDH->fMagic); ds->writeUInt32((uint32_t *)&outputDH->fMagic, magic); - if (outputDH->fFormatVersion != spoofDH->fFormatVersion) { + if (inBytes != outBytes) { uprv_memcpy(outputDH->fFormatVersion, spoofDH->fFormatVersion, sizeof(spoofDH->fFormatVersion)); } // swap starting at fLength diff --git a/deps/icu-small/source/i18n/vtzone.cpp b/deps/icu-small/source/i18n/vtzone.cpp index 9111e08848f99a..06f0b84c0f5d06 100644 --- a/deps/icu-small/source/i18n/vtzone.cpp +++ b/deps/icu-small/source/i18n/vtzone.cpp @@ -24,14 +24,6 @@ U_NAMESPACE_BEGIN -// This is the deleter that will be use to remove TimeZoneRule -U_CDECL_BEGIN -static void U_CALLCONV -deleteTimeZoneRule(void* obj) { - delete (TimeZoneRule*) obj; -} -U_CDECL_END - // Smybol characters used by RFC2445 VTIMEZONE static const UChar COLON = 0x3A; /* : */ static const UChar SEMICOLON = 0x3B; /* ; */ @@ -976,22 +968,19 @@ VTimeZone::VTimeZone(const VTimeZone& source) if (source.vtzlines != nullptr) { UErrorCode status = U_ZERO_ERROR; int32_t size = source.vtzlines->size(); - vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status); - if (vtzlines == nullptr) { + LocalPointer lpVtzLines( + new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status), status); + if (U_FAILURE(status)) { return; } - if (U_SUCCESS(status)) { - for (int32_t i = 0; i < size; i++) { - UnicodeString *line = (UnicodeString*)source.vtzlines->elementAt(i); - vtzlines->addElementX(line->clone(), status); - if (U_FAILURE(status)) { - break; - } + for (int32_t i = 0; i < size; i++) { + UnicodeString *line = ((UnicodeString*)source.vtzlines->elementAt(i))->clone(); + lpVtzLines->adoptElement(line, status); + if (U_FAILURE(status) || line == nullptr) { + return; } } - if (U_FAILURE(status) && vtzlines != nullptr) { - delete vtzlines; - } + vtzlines = lpVtzLines.orphan(); } } @@ -1020,23 +1009,25 @@ VTimeZone::operator=(const VTimeZone& right) { } if (vtzlines != nullptr) { delete vtzlines; + vtzlines = nullptr; } if (right.vtzlines != nullptr) { UErrorCode status = U_ZERO_ERROR; int32_t size = right.vtzlines->size(); - vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status); - if (vtzlines != nullptr && U_SUCCESS(status)) { + LocalPointer lpVtzLines( + new UVector(uprv_deleteUObject, uhash_compareUnicodeString, size, status), status); + if (U_SUCCESS(status)) { for (int32_t i = 0; i < size; i++) { - UnicodeString *line = (UnicodeString*)right.vtzlines->elementAt(i); - vtzlines->addElementX(line->clone(), status); + LocalPointer line( + ((UnicodeString*)right.vtzlines->elementAt(i))->clone(), status); + lpVtzLines->adoptElement(line.orphan(), status); if (U_FAILURE(status)) { break; } } - } - if (U_FAILURE(status) && vtzlines != nullptr) { - delete vtzlines; - vtzlines = nullptr; + if (U_SUCCESS(status)) { + vtzlines = lpVtzLines.orphan(); + } } } tzurl = right.tzurl; @@ -1272,10 +1263,9 @@ VTimeZone::getTimeZoneRules(const InitialTimeZoneRule*& initial, void VTimeZone::load(VTZReader& reader, UErrorCode& status) { - vtzlines = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status); - if (vtzlines == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - } + U_ASSERT(vtzlines == nullptr); + LocalPointer lpVtzLines( + new UVector(uprv_deleteUObject, uhash_compareUnicodeString, DEFAULT_VTIMEZONE_LINES, status), status); if (U_FAILURE(status)) { return; } @@ -1290,14 +1280,10 @@ VTimeZone::load(VTZReader& reader, UErrorCode& status) { // end of file if (start && line.startsWith(ICAL_END_VTIMEZONE, -1)) { LocalPointer element(new UnicodeString(line), status); + lpVtzLines->adoptElement(element.orphan(), status); if (U_FAILURE(status)) { - goto cleanupVtzlines; - } - vtzlines->addElementX(element.getAlias(), status); - if (U_FAILURE(status)) { - goto cleanupVtzlines; + return; } - element.orphan(); // on success, vtzlines owns the object. success = TRUE; } break; @@ -1312,14 +1298,10 @@ VTimeZone::load(VTZReader& reader, UErrorCode& status) { if (start) { if (line.length() > 0) { LocalPointer element(new UnicodeString(line), status); + lpVtzLines->adoptElement(element.orphan(), status); if (U_FAILURE(status)) { - goto cleanupVtzlines; + return; } - vtzlines->addElementX(element.getAlias(), status); - if (U_FAILURE(status)) { - goto cleanupVtzlines; - } - element.orphan(); // on success, vtzlines owns the object. } } line.remove(); @@ -1335,28 +1317,20 @@ VTimeZone::load(VTZReader& reader, UErrorCode& status) { if (start) { if (line.startsWith(ICAL_END_VTIMEZONE, -1)) { LocalPointer element(new UnicodeString(line), status); + lpVtzLines->adoptElement(element.orphan(), status); if (U_FAILURE(status)) { - goto cleanupVtzlines; - } - vtzlines->addElementX(element.getAlias(), status); - if (U_FAILURE(status)) { - goto cleanupVtzlines; + return; } - element.orphan(); // on success, vtzlines owns the object. success = TRUE; break; } } else { if (line.startsWith(ICAL_BEGIN_VTIMEZONE, -1)) { LocalPointer element(new UnicodeString(line), status); + lpVtzLines->adoptElement(element.orphan(), status); if (U_FAILURE(status)) { - goto cleanupVtzlines; - } - vtzlines->addElementX(element.getAlias(), status); - if (U_FAILURE(status)) { - goto cleanupVtzlines; + return; } - element.orphan(); // on success, vtzlines owns the object. line.remove(); start = TRUE; eol = FALSE; @@ -1371,14 +1345,10 @@ VTimeZone::load(VTZReader& reader, UErrorCode& status) { if (U_SUCCESS(status)) { status = U_INVALID_STATE_ERROR; } - goto cleanupVtzlines; + return; } + vtzlines = lpVtzLines.orphan(); parse(status); - return; - -cleanupVtzlines: - delete vtzlines; - vtzlines = nullptr; } // parser state @@ -1398,8 +1368,6 @@ VTimeZone::parse(UErrorCode& status) { status = U_INVALID_STATE_ERROR; return; } - InitialTimeZoneRule *initialRule = nullptr; - RuleBasedTimeZone *rbtz = nullptr; // timezone ID UnicodeString tzid; @@ -1418,28 +1386,16 @@ VTimeZone::parse(UErrorCode& status) { UnicodeString name; // RFC2445 prop name UnicodeString value; // RFC2445 prop value - UVector *dates = nullptr; // list of RDATE or RRULE strings - UVector *rules = nullptr; // list of TimeZoneRule instances - int32_t finalRuleIdx = -1; int32_t finalRuleCount = 0; - rules = new UVector(status); - if (rules == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - } - if (U_FAILURE(status)) { - goto cleanupParse; - } - // Set the deleter to remove TimeZoneRule vectors to avoid memory leaks due to unowned TimeZoneRules. - rules->setDeleter(deleteTimeZoneRule); + // Set the deleter on rules to remove TimeZoneRule vectors to avoid memory leaks due to unowned TimeZoneRules. + UVector rules(uprv_deleteUObject, nullptr, status); - dates = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status); - if (dates == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - } + // list of RDATE or RRULE strings + UVector dates(uprv_deleteUObject, uhash_compareUnicodeString, status); if (U_FAILURE(status)) { - goto cleanupParse; + return; } for (n = 0; n < vtzlines->size(); n++) { @@ -1469,18 +1425,18 @@ VTimeZone::parse(UErrorCode& status) { // can be any value. lastmod = parseDateTimeString(value, 0, status); if (U_FAILURE(status)) { - goto cleanupParse; + return; } } else if (name.compare(ICAL_BEGIN, -1) == 0) { UBool isDST = (value.compare(ICAL_DAYLIGHT, -1) == 0); if (value.compare(ICAL_STANDARD, -1) == 0 || isDST) { // tzid must be ready at this point if (tzid.length() == 0) { - goto cleanupParse; + return; } // initialize current zone properties - if (dates->size() != 0) { - dates->removeAllElements(); + if (dates.size() != 0) { + dates.removeAllElements(); } isRRULE = FALSE; from.remove(); @@ -1491,7 +1447,7 @@ VTimeZone::parse(UErrorCode& status) { } else { // BEGIN property other than STANDARD/DAYLIGHT // must not be there. - goto cleanupParse; + return; } } else if (name.compare(ICAL_END, -1) == 0) { break; @@ -1509,50 +1465,42 @@ VTimeZone::parse(UErrorCode& status) { } else if (name.compare(ICAL_RDATE, -1) == 0) { // RDATE mixed with RRULE is not supported if (isRRULE) { - goto cleanupParse; + return; } // RDATE value may contain multiple date delimited // by comma UBool nextDate = TRUE; int32_t dstart = 0; - UnicodeString *dstr = nullptr; + LocalPointer dstr; while (nextDate) { int32_t dend = value.indexOf(COMMA, dstart); if (dend == -1) { - dstr = new UnicodeString(value, dstart); + dstr.adoptInsteadAndCheckErrorCode(new UnicodeString(value, dstart), status); nextDate = FALSE; } else { - dstr = new UnicodeString(value, dstart, dend - dstart); - } - if (dstr == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - dates->addElementX(dstr, status); + dstr.adoptInsteadAndCheckErrorCode(new UnicodeString(value, dstart, dend - dstart), status); } + dates.adoptElement(dstr.orphan(), status); if (U_FAILURE(status)) { - goto cleanupParse; + return; } dstart = dend + 1; } } else if (name.compare(ICAL_RRULE, -1) == 0) { // RRULE mixed with RDATE is not supported - if (!isRRULE && dates->size() != 0) { - goto cleanupParse; + if (!isRRULE && dates.size() != 0) { + return; } isRRULE = true; LocalPointer element(new UnicodeString(value), status); + dates.adoptElement(element.orphan(), status); if (U_FAILURE(status)) { - goto cleanupParse; - } - dates->addElementX(element.getAlias(), status); - if (U_FAILURE(status)) { - goto cleanupParse; + return; } - element.orphan(); // on success, dates owns the object. } else if (name.compare(ICAL_END, -1) == 0) { // Mandatory properties if (dtstart.length() == 0 || from.length() == 0 || to.length() == 0) { - goto cleanupParse; + return; } // if zonename is not available, create one from tzid if (zonename.length() == 0) { @@ -1560,7 +1508,7 @@ VTimeZone::parse(UErrorCode& status) { } // create a time zone rule - TimeZoneRule *rule = nullptr; + LocalPointer rule; int32_t fromOffset = 0; int32_t toOffset = 0; int32_t rawOffset = 0; @@ -1571,7 +1519,7 @@ VTimeZone::parse(UErrorCode& status) { fromOffset = offsetStrToMillis(from, status); toOffset = offsetStrToMillis(to, status); if (U_FAILURE(status)) { - goto cleanupParse; + return; } if (dst) { @@ -1592,18 +1540,20 @@ VTimeZone::parse(UErrorCode& status) { // start time start = parseDateTimeString(dtstart, fromOffset, status); if (U_FAILURE(status)) { - goto cleanupParse; + return; } // Create the rule UDate actualStart = MAX_MILLIS; if (isRRULE) { - rule = createRuleByRRULE(zonename, rawOffset, dstSavings, start, dates, fromOffset, status); + rule.adoptInsteadAndCheckErrorCode( + createRuleByRRULE(zonename, rawOffset, dstSavings, start, &dates, fromOffset, status), status); } else { - rule = createRuleByRDATE(zonename, rawOffset, dstSavings, start, dates, fromOffset, status); + rule.adoptInsteadAndCheckErrorCode( + createRuleByRDATE(zonename, rawOffset, dstSavings, start, &dates, fromOffset, status), status); } - if (U_FAILURE(status) || rule == nullptr) { - goto cleanupParse; + if (U_FAILURE(status)) { + return; } else { UBool startAvail = rule->getFirstStart(fromOffset, 0, actualStart); if (startAvail && actualStart < firstStart) { @@ -1626,9 +1576,9 @@ VTimeZone::parse(UErrorCode& status) { } } } - rules->addElementX(rule, status); + rules.adoptElement(rule.orphan(), status); if (U_FAILURE(status)) { - goto cleanupParse; + return; } state = VTZ; } @@ -1636,28 +1586,31 @@ VTimeZone::parse(UErrorCode& status) { } } // Must have at least one rule - if (rules->size() == 0) { - goto cleanupParse; + if (rules.size() == 0) { + return; } // Create a initial rule getDefaultTZName(tzid, FALSE, zonename); - initialRule = new InitialTimeZoneRule(zonename, initialRawOffset, initialDSTSavings); - if (initialRule == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - goto cleanupParse; + LocalPointer initialRule( + new InitialTimeZoneRule(zonename, initialRawOffset, initialDSTSavings), status); + if (U_FAILURE(status)) { + return; } // Finally, create the RuleBasedTimeZone - rbtz = new RuleBasedTimeZone(tzid, initialRule); - if (rbtz == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - goto cleanupParse; + // C++ awkwardness on memory allocation failure: the constructor wont be run, meaning + // that initialRule wont be adopted/deleted, as it normally would be. + LocalPointer rbtz( + new RuleBasedTimeZone(tzid, initialRule.getAlias()), status); + if (U_SUCCESS(status)) { + initialRule.orphan(); + } else { + return; } - initialRule = nullptr; // already adopted by RBTZ, no need to delete - for (n = 0; n < rules->size(); n++) { - TimeZoneRule *r = (TimeZoneRule*)rules->elementAt(n); + for (n = 0; n < rules.size(); n++) { + TimeZoneRule *r = (TimeZoneRule*)rules.elementAt(n); AnnualTimeZoneRule *atzrule = dynamic_cast(r); if (atzrule != nullptr) { if (atzrule->getEndYear() == AnnualTimeZoneRule::MAX_YEAR) { @@ -1669,18 +1622,18 @@ VTimeZone::parse(UErrorCode& status) { if (finalRuleCount > 2) { // Too many final rules status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanupParse; + return; } if (finalRuleCount == 1) { - if (rules->size() == 1) { + if (rules.size() == 1) { // Only one final rule, only governs the initial rule, // which is already initialized, thus, we do not need to // add this transition rule - rules->removeAllElements(); + rules.removeAllElements(); } else { // Normalize the final rule - AnnualTimeZoneRule *finalRule = (AnnualTimeZoneRule*)rules->elementAt(finalRuleIdx); + AnnualTimeZoneRule *finalRule = (AnnualTimeZoneRule*)rules.elementAt(finalRuleIdx); int32_t tmpRaw = finalRule->getRawOffset(); int32_t tmpDST = finalRule->getDSTSavings(); @@ -1688,11 +1641,11 @@ VTimeZone::parse(UErrorCode& status) { UDate finalStart, start; finalRule->getFirstStart(initialRawOffset, initialDSTSavings, finalStart); start = finalStart; - for (n = 0; n < rules->size(); n++) { + for (n = 0; n < rules.size(); n++) { if (finalRuleIdx == n) { continue; } - TimeZoneRule *r = (TimeZoneRule*)rules->elementAt(n); + TimeZoneRule *r = (TimeZoneRule*)rules.elementAt(n); UDate lastStart; r->getFinalStart(tmpRaw, tmpDST, lastStart); if (lastStart > start) { @@ -1704,78 +1657,58 @@ VTimeZone::parse(UErrorCode& status) { } } - TimeZoneRule *newRule = nullptr; + LocalPointer newRule; UnicodeString tznam; if (start == finalStart) { // Transform this into a single transition - newRule = new TimeArrayTimeZoneRule( - finalRule->getName(tznam), - finalRule->getRawOffset(), - finalRule->getDSTSavings(), - &finalStart, - 1, - DateTimeRule::UTC_TIME); + newRule.adoptInsteadAndCheckErrorCode( + new TimeArrayTimeZoneRule( + finalRule->getName(tznam), + finalRule->getRawOffset(), + finalRule->getDSTSavings(), + &finalStart, + 1, + DateTimeRule::UTC_TIME), + status); } else { // Update the end year int32_t y, m, d, dow, doy, mid; Grego::timeToFields(start, y, m, d, dow, doy, mid); - newRule = new AnnualTimeZoneRule( - finalRule->getName(tznam), - finalRule->getRawOffset(), - finalRule->getDSTSavings(), - *(finalRule->getRule()), - finalRule->getStartYear(), - y); + newRule.adoptInsteadAndCheckErrorCode( + new AnnualTimeZoneRule( + finalRule->getName(tznam), + finalRule->getRawOffset(), + finalRule->getDSTSavings(), + *(finalRule->getRule()), + finalRule->getStartYear(), + y), + status); } - if (newRule == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - goto cleanupParse; + if (U_FAILURE(status)) { + return; } - rules->removeElementAt(finalRuleIdx); - rules->addElementX(newRule, status); + rules.removeElementAt(finalRuleIdx); + rules.adoptElement(newRule.orphan(), status); if (U_FAILURE(status)) { - delete newRule; - goto cleanupParse; + return; } } } - while (!rules->isEmpty()) { - TimeZoneRule *tzr = (TimeZoneRule*)rules->orphanElementAt(0); + while (!rules.isEmpty()) { + TimeZoneRule *tzr = (TimeZoneRule*)rules.orphanElementAt(0); rbtz->addTransitionRule(tzr, status); if (U_FAILURE(status)) { - goto cleanupParse; + return; } } rbtz->complete(status); if (U_FAILURE(status)) { - goto cleanupParse; + return; } - delete rules; - delete dates; - tz = rbtz; + tz = rbtz.orphan(); setID(tzid); - return; - -cleanupParse: - if (rules != nullptr) { - while (!rules->isEmpty()) { - TimeZoneRule *r = (TimeZoneRule*)rules->orphanElementAt(0); - delete r; - } - delete rules; - } - if (dates != nullptr) { - delete dates; - } - if (initialRule != nullptr) { - delete initialRule; - } - if (rbtz != nullptr) { - delete rbtz; - } - return; } void @@ -1809,7 +1742,7 @@ VTimeZone::write(VTZWriter& writer, UErrorCode& status) const { icutzprop.append(u'['); icutzprop.append(icutzver); icutzprop.append(u']'); - customProps.addElementX(&icutzprop, status); + customProps.addElement(&icutzprop, status); } writeZone(writer, *tz, &customProps, status); } @@ -1827,34 +1760,35 @@ VTimeZone::write(UDate start, VTZWriter& writer, UErrorCode& status) const { // Extract rules applicable to dates after the start time getTimeZoneRulesAfter(start, initial, transitionRules, status); + LocalPointer lpInitial(initial); + LocalPointer lpTransitionRules(transitionRules); if (U_FAILURE(status)) { return; } // Create a RuleBasedTimeZone with the subset rule getID(tzid); - RuleBasedTimeZone rbtz(tzid, initial); - if (transitionRules != nullptr) { - while (!transitionRules->isEmpty()) { - TimeZoneRule *tr = (TimeZoneRule*)transitionRules->orphanElementAt(0); + RuleBasedTimeZone rbtz(tzid, lpInitial.orphan()); + if (lpTransitionRules.isValid()) { + U_ASSERT(transitionRules->hasDeleter()); // Assumed for U_FAILURE early return, below. + while (!lpTransitionRules->isEmpty()) { + TimeZoneRule *tr = (TimeZoneRule*)lpTransitionRules->orphanElementAt(0); rbtz.addTransitionRule(tr, status); if (U_FAILURE(status)) { - goto cleanupWritePartial; + return; } } - delete transitionRules; - transitionRules = nullptr; } rbtz.complete(status); if (U_FAILURE(status)) { - goto cleanupWritePartial; + return; } if (olsonzid.length() > 0 && icutzver.length() > 0) { UnicodeString *icutzprop = new UnicodeString(ICU_TZINFO_PROP); if (icutzprop == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; - goto cleanupWritePartial; + return; } icutzprop->append(olsonzid); icutzprop->append((UChar)0x005B/*'['*/); @@ -1862,23 +1796,12 @@ VTimeZone::write(UDate start, VTZWriter& writer, UErrorCode& status) const { icutzprop->append(ICU_TZINFO_PARTIAL, -1); appendMillis(start, *icutzprop); icutzprop->append((UChar)0x005D/*']'*/); - customProps.addElementX(icutzprop, status); + customProps.adoptElement(icutzprop, status); if (U_FAILURE(status)) { - delete icutzprop; - goto cleanupWritePartial; + return; } } writeZone(writer, rbtz, &customProps, status); - return; - -cleanupWritePartial: - if (initial != nullptr) { - delete initial; - } - if (transitionRules != nullptr) { - U_ASSERT(transitionRules->hasDeleter()); - delete transitionRules; - } } void diff --git a/deps/icu-small/source/i18n/zonemeta.cpp b/deps/icu-small/source/i18n/zonemeta.cpp index b8afa4760f1823..e60215c9988e6d 100644 --- a/deps/icu-small/source/i18n/zonemeta.cpp +++ b/deps/icu-small/source/i18n/zonemeta.cpp @@ -97,21 +97,13 @@ deleteUCharString(void *obj) { uprv_free(entry); } -/** - * Deleter for UVector - */ -static void U_CALLCONV -deleteUVector(void *obj) { - delete (icu::UVector*) obj; -} - /** * Deleter for OlsonToMetaMappingEntry */ static void U_CALLCONV deleteOlsonToMetaMappingEntry(void *obj) { icu::OlsonToMetaMappingEntry *entry = (icu::OlsonToMetaMappingEntry*)obj; - uprv_free(entry); + delete entry; } U_CDECL_END @@ -477,11 +469,11 @@ ZoneMeta::getCanonicalCountry(const UnicodeString &tzid, UnicodeString &country, UErrorCode ec = U_ZERO_ERROR; if (singleZone) { if (!gSingleZoneCountries->contains((void*)region)) { - gSingleZoneCountries->addElementX((void*)region, ec); + gSingleZoneCountries->addElement((void*)region, ec); } } else { if (!gMultiZonesCountries->contains((void*)region)) { - gMultiZonesCountries->addElementX((void*)region, ec); + gMultiZonesCountries->addElement((void*)region, ec); } } } @@ -550,7 +542,7 @@ static void U_CALLCONV olsonToMetaInit(UErrorCode &status) { gOlsonToMeta = NULL; } else { uhash_setKeyDeleter(gOlsonToMeta, deleteUCharString); - uhash_setValueDeleter(gOlsonToMeta, deleteUVector); + uhash_setValueDeleter(gOlsonToMeta, uprv_deleteUObject); } } @@ -625,7 +617,7 @@ ZoneMeta::getMetazoneMappings(const UnicodeString &tzid) { UVector* ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) { - UVector *mzMappings = NULL; + LocalPointer mzMappings; UErrorCode status = U_ZERO_ERROR; UnicodeString canonicalID; @@ -677,41 +669,32 @@ ZoneMeta::createMetazoneMappings(const UnicodeString &tzid) { continue; } - OlsonToMetaMappingEntry *entry = (OlsonToMetaMappingEntry*)uprv_malloc(sizeof(OlsonToMetaMappingEntry)); - if (entry == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + LocalPointer entry(new OlsonToMetaMappingEntry, status); + if (U_FAILURE(status)) { break; } entry->mzid = mz_name; entry->from = from; entry->to = to; - if (mzMappings == NULL) { - mzMappings = new UVector(deleteOlsonToMetaMappingEntry, NULL, status); + if (mzMappings.isNull()) { + mzMappings.adoptInsteadAndCheckErrorCode( + new UVector(deleteOlsonToMetaMappingEntry, nullptr, status), status); if (U_FAILURE(status)) { - delete mzMappings; - mzMappings = NULL; - uprv_free(entry); break; } } - mzMappings->addElementX(entry, status); + mzMappings->adoptElement(entry.orphan(), status); if (U_FAILURE(status)) { break; } } ures_close(mz); - if (U_FAILURE(status)) { - if (mzMappings != NULL) { - delete mzMappings; - mzMappings = NULL; - } - } } } ures_close(rb); - return mzMappings; + return U_SUCCESS(status) ? mzMappings.orphan() : nullptr; } UnicodeString& U_EXPORT2 @@ -775,6 +758,7 @@ static void U_CALLCONV initAvailableMetaZoneIDs () { // No valueDeleter, because the vector maintain the value objects gMetaZoneIDs = new UVector(NULL, uhash_compareUChars, status); if (U_FAILURE(status) || gMetaZoneIDs == NULL) { + delete gMetaZoneIDs; gMetaZoneIDs = NULL; uhash_close(gMetaZoneIDTable); gMetaZoneIDTable = NULL; @@ -792,20 +776,22 @@ static void U_CALLCONV initAvailableMetaZoneIDs () { } const char *mzID = ures_getKey(res.getAlias()); int32_t len = static_cast(uprv_strlen(mzID)); - UChar *uMzID = (UChar*)uprv_malloc(sizeof(UChar) * (len + 1)); - if (uMzID == NULL) { + LocalMemory uMzID((UChar*)uprv_malloc(sizeof(UChar) * (len + 1))); + if (uMzID.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; break; } - u_charsToUChars(mzID, uMzID, len); + u_charsToUChars(mzID, uMzID.getAlias(), len); uMzID[len] = 0; - UnicodeString *usMzID = new UnicodeString(uMzID); - if (uhash_get(gMetaZoneIDTable, usMzID) == NULL) { - gMetaZoneIDs->addElementX((void *)uMzID, status); - uhash_put(gMetaZoneIDTable, (void *)usMzID, (void *)uMzID, &status); - } else { - uprv_free(uMzID); - delete usMzID; + LocalPointer usMzID(new UnicodeString(uMzID.getAlias()), status); + if (U_FAILURE(status)) { + break; + } + if (uhash_get(gMetaZoneIDTable, usMzID.getAlias()) == NULL) { + // Note: gMetaZoneIDTable adopts its keys, but not its values. + // gMetaZoneIDs adopts its values. + uhash_put(gMetaZoneIDTable, usMzID.orphan(), uMzID.getAlias(), &status); + gMetaZoneIDs->adoptElement(uMzID.orphan(), status); } } ures_close(bundle); diff --git a/deps/icu-small/source/i18n/zonemeta.h b/deps/icu-small/source/i18n/zonemeta.h index f21399342b9e67..dd4fec957fedae 100644 --- a/deps/icu-small/source/i18n/zonemeta.h +++ b/deps/icu-small/source/i18n/zonemeta.h @@ -18,11 +18,11 @@ U_NAMESPACE_BEGIN -typedef struct OlsonToMetaMappingEntry { +struct OlsonToMetaMappingEntry : public UMemory { const UChar *mzid; // const because it's a reference to a resource bundle string. UDate from; UDate to; -} OlsonToMetaMappingEntry; +}; class UVector; class TimeZone; diff --git a/deps/icu-small/source/stubdata/BUILD b/deps/icu-small/source/stubdata/BUILD.bazel similarity index 100% rename from deps/icu-small/source/stubdata/BUILD rename to deps/icu-small/source/stubdata/BUILD.bazel diff --git a/deps/icu-small/source/tools/icuexportdata/icuexportdata.cpp b/deps/icu-small/source/tools/icuexportdata/icuexportdata.cpp index ef933676115e1b..7431ac74ab86f4 100644 --- a/deps/icu-small/source/tools/icuexportdata/icuexportdata.cpp +++ b/deps/icu-small/source/tools/icuexportdata/icuexportdata.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "toolutil.h" #include "uoptions.h" #include "cmemory.h" @@ -11,8 +12,10 @@ #include "unicode/uchar.h" #include "unicode/errorcode.h" #include "unicode/uniset.h" +#include "unicode/uscript.h" #include "unicode/putil.h" #include "unicode/umutablecptrie.h" +#include "ucase.h" #include "writesrc.h" U_NAMESPACE_USE @@ -27,6 +30,75 @@ UBool haveCopyright = TRUE; UCPTrieType trieType = UCPTRIE_TYPE_SMALL; const char* destdir = ""; +// Mask constants for modified values in the Script CodePointTrie, values are logically 12-bits. +int16_t DATAEXPORT_SCRIPT_X_WITH_COMMON = 0x0400; +int16_t DATAEXPORT_SCRIPT_X_WITH_INHERITED = 0x0800; +int16_t DATAEXPORT_SCRIPT_X_WITH_OTHER = 0x0c00; + +// TODO(ICU-21821): Replace this with a call to a library function +int32_t scxCodePoints[] = { + 7415, 7377, 7380, 7387, 7390, 7391, 7394, 7395, 7396, 7397, + 7398, 7399, 7400, 7403, 7404, 7406, 7407, 7408, 7409, 113824, + 113825, 113826, 113827, 834, 837, 7616, 7617, 12294, 12350, 12351, + 12688, 12689, 12690, 12691, 12692, 12693, 12694, 12695, 12696, 12697, + 12698, 12699, 12700, 12701, 12702, 12703, 12736, 12737, 12738, 12739, + 12740, 12741, 12742, 12743, 12744, 12745, 12746, 12747, 12748, 12749, + 12750, 12751, 12752, 12753, 12754, 12755, 12756, 12757, 12758, 12759, + 12760, 12761, 12762, 12763, 12764, 12765, 12766, 12767, 12768, 12769, + 12770, 12771, 12832, 12833, 12834, 12835, 12836, 12837, 12838, 12839, + 12840, 12841, 12842, 12843, 12844, 12845, 12846, 12847, 12848, 12849, + 12850, 12851, 12852, 12853, 12854, 12855, 12856, 12857, 12858, 12859, + 12860, 12861, 12862, 12863, 12864, 12865, 12866, 12867, 12868, 12869, + 12870, 12871, 12928, 12929, 12930, 12931, 12932, 12933, 12934, 12935, + 12936, 12937, 12938, 12939, 12940, 12941, 12942, 12943, 12944, 12945, + 12946, 12947, 12948, 12949, 12950, 12951, 12952, 12953, 12954, 12955, + 12956, 12957, 12958, 12959, 12960, 12961, 12962, 12963, 12964, 12965, + 12966, 12967, 12968, 12969, 12970, 12971, 12972, 12973, 12974, 12975, + 12976, 12992, 12993, 12994, 12995, 12996, 12997, 12998, 12999, 13000, + 13001, 13002, 13003, 13055, 13144, 13145, 13146, 13147, 13148, 13149, + 13150, 13151, 13152, 13153, 13154, 13155, 13156, 13157, 13158, 13159, + 13160, 13161, 13162, 13163, 13164, 13165, 13166, 13167, 13168, 13179, + 13180, 13181, 13182, 13183, 13280, 13281, 13282, 13283, 13284, 13285, + 13286, 13287, 13288, 13289, 13290, 13291, 13292, 13293, 13294, 13295, + 13296, 13297, 13298, 13299, 13300, 13301, 13302, 13303, 13304, 13305, + 13306, 13307, 13308, 13309, 13310, 119648, 119649, 119650, 119651, 119652, + 119653, 119654, 119655, 119656, 119657, 119658, 119659, 119660, 119661, 119662, + 119663, 119664, 119665, 127568, 127569, 867, 868, 869, 870, 871, + 872, 873, 874, 875, 876, 877, 878, 879, 7418, 7674, + 66272, 66273, 66274, 66275, 66276, 66277, 66278, 66279, 66280, 66281, + 66282, 66283, 66284, 66285, 66286, 66287, 66288, 66289, 66290, 66291, + 66292, 66293, 66294, 66295, 66296, 66297, 66298, 66299, 1748, 64830, + 64831, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, + 1620, 1621, 1648, 65010, 65021, 7381, 7382, 7384, 7393, 7402, + 7405, 7413, 7414, 43249, 12330, 12331, 12332, 12333, 43471, 65794, + 65847, 65848, 65849, 65850, 65851, 65852, 65853, 65854, 65855, 1156, + 1159, 11843, 42607, 1157, 1158, 1155, 7672, 7379, 7411, 7416, + 7417, 7401, 7383, 7385, 7388, 7389, 7392, 43251, 4347, 3046, + 3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055, 3056, + 3057, 3058, 3059, 70401, 70403, 70459, 70460, 73680, 73681, 73683, + 2790, 2791, 2792, 2793, 2794, 2795, 2796, 2797, 2798, 2799, + 2662, 2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671, + 42752, 42753, 42754, 42755, 42756, 42757, 42758, 42759, 12337, 12338, + 12339, 12340, 12341, 12441, 12442, 12443, 12444, 12448, 12540, 65392, + 65438, 65439, 3302, 3303, 3304, 3305, 3306, 3307, 3308, 3309, + 3310, 3311, 8239, 68338, 6146, 6147, 6149, 1564, 1632, 1633, + 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 2534, 2535, + 2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543, 4160, 4161, + 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 65792, 65793, + 65799, 65800, 65801, 65802, 65803, 65804, 65805, 65806, 65807, 65808, + 65809, 65810, 65811, 65812, 65813, 65814, 65815, 65816, 65817, 65818, + 65819, 65820, 65821, 65822, 65823, 65824, 65825, 65826, 65827, 65828, + 65829, 65830, 65831, 65832, 65833, 65834, 65835, 65836, 65837, 65838, + 65839, 65840, 65841, 65842, 65843, 7412, 8432, 12348, 12349, 43310, + 7376, 7378, 5941, 5942, 2406, 2407, 2408, 2409, 2410, 2411, + 2412, 2413, 2414, 2415, 12291, 12307, 12316, 12317, 12318, 12319, + 12336, 12343, 65093, 65094, 1548, 1563, 12289, 12290, 12296, 12297, + 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12308, 12309, + 12310, 12311, 12312, 12313, 12314, 12315, 12539, 65377, 65378, 65379, + 65380, 65381, 7386, 1567, 7410, 1600, 43062, 43063, 43064, 43065, + 2386, 2385, 43059, 43060, 43061, 43056, 43057, 43058, 2404, 2405 + }; + void handleError(ErrorCode& status, const char* context) { if (status.isFailure()) { std::cerr << "Error: " << context << ": " << status.errorName() << std::endl; @@ -96,6 +168,110 @@ void dumpEnumeratedProperty(UProperty uproperty, FILE* f) { usrc_writeUCPTrie(f, shortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML); } +void dumpScriptExtensions(FILE* f) { + IcuToolErrorCode status("icuexportdata: dumpScriptExtensions"); + + fputs("[[script_extensions]]\n", f); + const char* scxFullPropName = u_getPropertyName(UCHAR_SCRIPT_EXTENSIONS, U_LONG_PROPERTY_NAME); + const char* scxShortPropName = u_getPropertyName(UCHAR_SCRIPT_EXTENSIONS, U_SHORT_PROPERTY_NAME); + fprintf(f, "long_name = \"%s\"\n", scxFullPropName); + if (scxShortPropName) fprintf(f, "short_name = \"%s\"\n", scxShortPropName); + + // We want to use 16 bits for our exported trie of sc/scx data because we + // need 12 bits to match the 12 bits of data stored for sc/scx in the trie + // in the uprops.icu data file. + UCPTrieValueWidth scWidth = UCPTRIE_VALUE_BITS_16; + + // Create a mutable UCPTrie builder populated with Script property values data. + const UCPMap* scInvMap = u_getIntPropertyMap(UCHAR_SCRIPT, status); + handleError(status, scxFullPropName); + LocalUMutableCPTriePointer builder(umutablecptrie_fromUCPMap(scInvMap, status)); + handleError(status, scxFullPropName); + + // The values for the output scx companion array. + // Invariant is that all subvectors are distinct. + std::vector< std::vector > outputDedupVec; + + // The sc/scx companion array is an array of arrays (of script codes) + fputs("script_code_array = [\n", f); + for(const UChar32 cp : scxCodePoints) { + // Get the Script value + uint32_t scVal = umutablecptrie_get(builder.getAlias(), cp); + // Get the Script_Extensions value (array of Script codes) + const int32_t SCX_ARRAY_CAPACITY = 32; + UScriptCode scxValArray[SCX_ARRAY_CAPACITY]; + int32_t numScripts = uscript_getScriptExtensions(cp, scxValArray, SCX_ARRAY_CAPACITY, status); + handleError(status, scxFullPropName); + + // Convert the scx array into a vector + std::vector scxValVec; + for(int i = 0; i < numScripts; i++) { + scxValVec.push_back(scxValArray[i]); + } + // Ensure that it is sorted + std::sort(scxValVec.begin(), scxValVec.end()); + // Copy the Script value into the first position of the scx array only + // if we have the "other" case (Script value is not Common nor Inherited). + // This offers faster access when users want only the Script value. + if (scVal != USCRIPT_COMMON && scVal != USCRIPT_INHERITED) { + scxValVec.insert(scxValVec.begin(), scVal); + } + + // See if there is already an scx value array matching the newly built one. + // If there is, then use its index. + // If not, then append the new value array. + bool isScxValUnique = true; + size_t outputIndex = 0; + for (outputIndex = 0; outputIndex < outputDedupVec.size(); outputIndex++) { + if (outputDedupVec[outputIndex] == scxValVec) { + isScxValUnique = false; + break; + } + } + + if (isScxValUnique) { + outputDedupVec.push_back(scxValVec); + usrc_writeArray(f, " [", scxValVec.data(), 16, scxValVec.size(), " ", "],\n"); + } + + // We must update the value in the UCPTrie for the code point to contain: + // 9..0 the Script code in the lower 10 bits when 11..10 is 0, else it is + // the index into the companion array + // 11..10 the same higher-order 2 bits in the trie in uprops.icu indicating whether + // 3: other + // 2: Script=Inherited + // 1: Script=Common + // 0: Script=value in 9..0 (N/A because we are in this loop to create the companion array for non-0 cases) + uint16_t mask = 0; + if (scVal == USCRIPT_COMMON) { + mask = DATAEXPORT_SCRIPT_X_WITH_COMMON; + } else if (scVal == USCRIPT_INHERITED) { + mask = DATAEXPORT_SCRIPT_X_WITH_INHERITED; + } else { + mask = DATAEXPORT_SCRIPT_X_WITH_OTHER; + } + + // The new trie value is the index into the new array with the high order bits set + uint32_t newScVal = outputIndex | mask; + + // Update the code point in the mutable trie builder with the trie value + umutablecptrie_set(builder.getAlias(), cp, newScVal, status); + handleError(status, scxFullPropName); + } + fputs("]\n\n", f); // Print the TOML close delimiter for the outer array. + + // Convert from mutable trie builder to immutable trie. + LocalUCPTriePointer utrie(umutablecptrie_buildImmutable( + builder.getAlias(), + trieType, + scWidth, + status)); + handleError(status, scxFullPropName); + + fputs("[script_extensions.code_point_trie]\n", f); + usrc_writeUCPTrie(f, scxShortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML); +} + FILE* prepareOutputFile(const char* basename) { IcuToolErrorCode status("icuexportdata"); CharString outFileName; @@ -158,45 +334,42 @@ static UOption options[]={ UOPTION_QUIET, }; -int main(int argc, char* argv[]) { - U_MAIN_INIT_ARGS(argc, argv); - - /* preset then read command line options */ - options[OPT_DESTDIR].value=u_getDataDirectory(); - argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); - - if(options[OPT_VERSION].doesOccur) { - printf("icuexportdata version %s, ICU tool to dump data files for external consumers\n", - U_ICU_DATA_VERSION); - printf("%s\n", U_COPYRIGHT_STRING); - exit(0); - } - - /* error handling, printing usage message */ - if(argc<0) { - fprintf(stderr, - "error in command line argument \"%s\"\n", - argv[-argc]); - } else if(argc<2) { - argc=-1; - } - - /* get the options values */ - haveCopyright = options[OPT_COPYRIGHT].doesOccur; - destdir = options[OPT_DESTDIR].value; - VERBOSE = options[OPT_VERBOSE].doesOccur; - QUIET = options[OPT_QUIET].doesOccur; +void printHelp(FILE* stdfile, const char* program) { + fprintf(stdfile, + "usage: %s -m mode [-options] [--all | properties...]\n" + "\tdump Unicode property data to .toml files\n" + "options:\n" + "\t-h or -? or --help this usage text\n" + "\t-V or --version show a version message\n" + "\t-m or --mode mode: currently only 'uprops' and 'ucase', but more may be added\n" + "\t --trie-type set the trie type (small or fast, default small)\n" + "\t-d or --destdir destination directory, followed by the path\n" + "\t --all write out all properties known to icuexportdata\n" + "\t --index write an _index.toml summarizing all data exported\n" + "\t-c or --copyright include a copyright notice\n" + "\t-v or --verbose Turn on verbose output\n" + "\t-q or --quiet do not display warnings and progress\n", + program); +} +int exportUprops(int argc, char* argv[]) { // Load list of Unicode properties std::vector propNames; for (int i=1; i(i); const char* propName = u_getPropertyName(uprop, U_SHORT_PROPERTY_NAME); if (propName == NULL) { @@ -207,47 +380,10 @@ int main(int argc, char* argv[]) { } if (propName != NULL) { propNames.push_back(propName); + } else { + std::cerr << "Warning: Could not find name for: " << uprop << std::endl; } - } - } - - if (propNames.empty() - || options[OPT_HELP_H].doesOccur - || options[OPT_HELP_QUESTION_MARK].doesOccur - || !options[OPT_MODE].doesOccur) { - FILE *stdfile=argc<0 ? stderr : stdout; - fprintf(stdfile, - "usage: %s -m uprops [-options] [--all | properties...]\n" - "\tdump Unicode property data to .toml files\n" - "options:\n" - "\t-h or -? or --help this usage text\n" - "\t-V or --version show a version message\n" - "\t-m or --mode mode: currently only 'uprops', but more may be added\n" - "\t --trie-type set the trie type (small or fast, default small)\n" - "\t-d or --destdir destination directory, followed by the path\n" - "\t --all write out all properties known to icuexportdata\n" - "\t --index write an _index.toml summarizing all data exported\n" - "\t-c or --copyright include a copyright notice\n" - "\t-v or --verbose Turn on verbose output\n" - "\t-q or --quiet do not display warnings and progress\n", - argv[0]); - return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; - } - - const char* mode = options[OPT_MODE].value; - if (uprv_strcmp(mode, "uprops") != 0) { - fprintf(stderr, "Invalid option for --mode (must be uprops)\n"); - return U_ILLEGAL_ARGUMENT_ERROR; - } - - if (options[OPT_TRIE_TYPE].doesOccur) { - if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) { - trieType = UCPTRIE_TYPE_FAST; - } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) { - trieType = UCPTRIE_TYPE_SMALL; - } else { - fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n"); - return U_ILLEGAL_ARGUMENT_ERROR; + i++; } } @@ -272,6 +408,8 @@ int main(int argc, char* argv[]) { dumpBinaryProperty(propEnum, f); } else if (UCHAR_INT_START <= propEnum && propEnum <= UCHAR_INT_LIMIT) { dumpEnumeratedProperty(propEnum, f); + } else if (propEnum == UCHAR_SCRIPT_EXTENSIONS) { + dumpScriptExtensions(f); } else { std::cerr << "Don't know how to write property: " << propEnum << std::endl; return U_INTERNAL_PROGRAM_ERROR; @@ -293,3 +431,134 @@ int main(int argc, char* argv[]) { return 0; } + +struct AddRangeHelper { + UMutableCPTrie* ucptrie; +}; + +static UBool U_CALLCONV +addRangeToUCPTrie(const void* context, UChar32 start, UChar32 end, uint32_t value) { + IcuToolErrorCode status("addRangeToUCPTrie"); + UMutableCPTrie* ucptrie = ((const AddRangeHelper*) context)->ucptrie; + umutablecptrie_setRange(ucptrie, start, end, value, status); + handleError(status, "setRange"); + + return TRUE; +} + +int exportCase(int argc, char* argv[]) { + if (argc > 1) { + fprintf(stderr, "ucase mode does not expect additional arguments\n"); + return U_ILLEGAL_ARGUMENT_ERROR; + } + (void) argv; // Suppress unused variable warning + + IcuToolErrorCode status("icuexportdata"); + LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status)); + handleError(status, "exportCase"); + + int32_t exceptionsLength, unfoldLength; + const UCaseProps *caseProps = ucase_getSingleton(&exceptionsLength, &unfoldLength); + const UTrie2* caseTrie = &caseProps->trie; + + AddRangeHelper helper = { builder.getAlias() }; + utrie2_enum(caseTrie, NULL, addRangeToUCPTrie, &helper); + + UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_16; + LocalUCPTriePointer utrie(umutablecptrie_buildImmutable( + builder.getAlias(), + trieType, + width, + status)); + handleError(status, "exportCase"); + + FILE* f = prepareOutputFile("ucase"); + + UVersionInfo versionInfo; + u_getUnicodeVersion(versionInfo); + char uvbuf[U_MAX_VERSION_STRING_LENGTH]; + u_versionToString(versionInfo, uvbuf); + fprintf(f, "icu_version = \"%s\"\nunicode_version = \"%s\"\n\n", + U_ICU_VERSION, + uvbuf); + + fputs("[ucase.code_point_trie]\n", f); + usrc_writeUCPTrie(f, "case_trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML); + fputs("\n", f); + + const char* indent = " "; + const char* suffix = "\n]\n"; + + fputs("[ucase.exceptions]\n", f); + const char* exceptionsPrefix = "exceptions = [\n "; + int32_t exceptionsWidth = 16; + usrc_writeArray(f, exceptionsPrefix, caseProps->exceptions, exceptionsWidth, + exceptionsLength, indent, suffix); + fputs("\n", f); + + fputs("[ucase.unfold]\n", f); + const char* unfoldPrefix = "unfold = [\n "; + int32_t unfoldWidth = 16; + usrc_writeArray(f, unfoldPrefix, caseProps->unfold, unfoldWidth, + unfoldLength, indent, suffix); + + return 0; +} + +int main(int argc, char* argv[]) { + U_MAIN_INIT_ARGS(argc, argv); + + /* preset then read command line options */ + options[OPT_DESTDIR].value=u_getDataDirectory(); + argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); + + if(options[OPT_VERSION].doesOccur) { + printf("icuexportdata version %s, ICU tool to dump data files for external consumers\n", + U_ICU_DATA_VERSION); + printf("%s\n", U_COPYRIGHT_STRING); + exit(0); + } + + /* error handling, printing usage message */ + if(argc<0) { + fprintf(stderr, + "error in command line argument \"%s\"\n", + argv[-argc]); + } + + if (argc < 0 + || options[OPT_HELP_H].doesOccur + || options[OPT_HELP_QUESTION_MARK].doesOccur + || !options[OPT_MODE].doesOccur) { + FILE *stdfile=argc<0 ? stderr : stdout; + printHelp(stdfile, argv[0]); + return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR; + } + + /* get the options values */ + haveCopyright = options[OPT_COPYRIGHT].doesOccur; + destdir = options[OPT_DESTDIR].value; + VERBOSE = options[OPT_VERBOSE].doesOccur; + QUIET = options[OPT_QUIET].doesOccur; + + if (options[OPT_TRIE_TYPE].doesOccur) { + if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) { + trieType = UCPTRIE_TYPE_FAST; + } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) { + trieType = UCPTRIE_TYPE_SMALL; + } else { + fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n"); + return U_ILLEGAL_ARGUMENT_ERROR; + } + } + + const char* mode = options[OPT_MODE].value; + if (uprv_strcmp(mode, "uprops") == 0) { + return exportUprops(argc, argv); + } else if (uprv_strcmp(mode, "ucase") == 0) { + return exportCase(argc, argv); + } + + fprintf(stderr, "Invalid option for --mode (must be uprops or ucase)\n"); + return U_ILLEGAL_ARGUMENT_ERROR; +} diff --git a/deps/icu-small/source/tools/toolutil/BUILD b/deps/icu-small/source/tools/toolutil/BUILD.bazel similarity index 100% rename from deps/icu-small/source/tools/toolutil/BUILD rename to deps/icu-small/source/tools/toolutil/BUILD.bazel diff --git a/deps/icu-small/source/tools/toolutil/toolutil.cpp b/deps/icu-small/source/tools/toolutil/toolutil.cpp index 1fc68aa69c84ff..a9dc37377a840a 100644 --- a/deps/icu-small/source/tools/toolutil/toolutil.cpp +++ b/deps/icu-small/source/tools/toolutil/toolutil.cpp @@ -228,18 +228,19 @@ uprv_compareGoldenFiles( std::ifstream ifs(goldenFilePath, std::ifstream::in); int32_t pos = 0; char c; - while ((c = ifs.get()) != std::char_traits::eof() && pos < bufferLen) { + while (ifs.get(c) && pos < bufferLen) { if (c != buffer[pos]) { // Files differ at this position - return pos; + break; } pos++; } - if (pos < bufferLen || c != std::char_traits::eof()) { - // Files are different lengths - return pos; + if (pos == bufferLen && ifs.eof()) { + // Files are same lengths + pos = -1; } - return -1; + ifs.close(); + return pos; } /*U_CAPI UDate U_EXPORT2 diff --git a/tools/icu/current_ver.dep b/tools/icu/current_ver.dep index d135e99146e041..9d2ca52eda4153 100644 --- a/tools/icu/current_ver.dep +++ b/tools/icu/current_ver.dep @@ -1,6 +1,6 @@ [ { - "url": "https://github.com/unicode-org/icu/releases/download/release-70-1/icu4c-70_1-src.tgz", - "md5": "65287befec8116d79af23a58aa50c60d" + "url": "https://github.com/unicode-org/icu/releases/download/release-71-1/icu4c-71_1-src.tgz", + "md5": "e06ffc96f59762bd3c929b217445aaec" } ]