Skip to content

Commit

Permalink
deps: update icu to 76.1
Browse files Browse the repository at this point in the history
PR-URL: nodejs#55551
Reviewed-By: Michaël Zasso <targos@protonmail.com>
Reviewed-By: Richard Lau <rlau@redhat.com>
  • Loading branch information
nodejs-github-bot authored and tpoisseau committed Nov 21, 2024
1 parent b88cc39 commit 8cbd3f8
Show file tree
Hide file tree
Showing 414 changed files with 42,422 additions and 16,007 deletions.
30 changes: 29 additions & 1 deletion deps/icu-small/LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,34 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

----------------------------------------------------------------------

JSON parsing library (nlohmann/json)

File: vendor/json/upstream/single_include/nlohmann/json.hpp (only for ICU4C)

MIT License

Copyright (c) 2013-2022 Niels Lohmann

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

----------------------------------------------------------------------

File: aclocal.m4 (only for ICU4C)
Section: pkg.m4 - Macros to locate and utilise pkg-config.

Expand Down Expand Up @@ -473,7 +501,7 @@ File: config.guess (only for ICU4C)

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful, but
Expand Down
4 changes: 2 additions & 2 deletions deps/icu-small/README-FULL-ICU.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
ICU sources - auto generated by shrink-icu-src.py

This directory contains the ICU subset used by --with-intl=full-icu
It is a strict subset of ICU 75 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt75l.dat.bz2 : compressed data file
It is a strict subset of ICU 76 source files with the following exception(s):
* deps/icu-small/source/data/in/icudt76l.dat.bz2 : compressed data file


To rebuild this directory, see ../../tools/icu/README.md
Expand Down
2 changes: 1 addition & 1 deletion deps/icu-small/source/common/appendable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Appendable::~Appendable() {}
UBool
Appendable::appendCodePoint(UChar32 c) {
if(c<=0xffff) {
return appendCodeUnit((char16_t)c);
return appendCodeUnit(static_cast<char16_t>(c));
} else {
return appendCodeUnit(U16_LEAD(c)) && appendCodeUnit(U16_TRAIL(c));
}
Expand Down
56 changes: 28 additions & 28 deletions deps/icu-small/source/common/bmpset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
int32_t trail=start&0x3f; // Named for UTF-8 2-byte trail byte with lower 6 bits.

// Set one bit indicating an all-one block.
uint32_t bits=(uint32_t)1<<lead;
uint32_t bits = static_cast<uint32_t>(1) << lead;
if((start+1)==limit) { // Single-character shortcut.
table[trail]|=bits;
return;
Expand All @@ -100,9 +100,9 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
++lead;
}
if(lead<limitLead) {
bits=~(((unsigned)1<<lead)-1);
bits = ~((static_cast<unsigned>(1) << lead) - 1);
if(limitLead<0x20) {
bits&=((unsigned)1<<limitLead)-1;
bits &= (static_cast<unsigned>(1) << limitLead) - 1;
}
for(trail=0; trail<64; ++trail) {
table[trail]|=bits;
Expand All @@ -111,7 +111,7 @@ static void set32x64Bits(uint32_t table[64], int32_t start, int32_t limit) {
// limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0.
// In that case, bits=1<<limitLead is undefined but the bits value
// is not used because trail<limitTrail is already false.
bits=(uint32_t)1<<((limitLead == 0x20) ? (limitLead - 1) : limitLead);
bits = static_cast<uint32_t>(1) << ((limitLead == 0x20) ? (limitLead - 1) : limitLead);
for(trail=0; trail<limitTrail; ++trail) {
table[trail]|=bits;
}
Expand Down Expand Up @@ -290,22 +290,22 @@ int32_t BMPSet::findCodePoint(UChar32 c, int32_t lo, int32_t hi) const {

UBool
BMPSet::contains(UChar32 c) const {
if((uint32_t)c<=0xff) {
return (UBool)latin1Contains[c];
} else if((uint32_t)c<=0x7ff) {
return (UBool)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0);
} else if((uint32_t)c<0xd800 || (c>=0xe000 && c<=0xffff)) {
if (static_cast<uint32_t>(c) <= 0xff) {
return latin1Contains[c];
} else if (static_cast<uint32_t>(c) <= 0x7ff) {
return (table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0;
} else if (static_cast<uint32_t>(c) < 0xd800 || (c >= 0xe000 && c <= 0xffff)) {
int lead=c>>12;
uint32_t twoBits=(bmpBlockBits[(c>>6)&0x3f]>>lead)&0x10001;
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
return (UBool)twoBits;
return twoBits;
} else {
// Look up the code point in its 4k block of code points.
return containsSlow(c, list4kStarts[lead], list4kStarts[lead+1]);
}
} else if((uint32_t)c<=0x10ffff) {
} else if (static_cast<uint32_t>(c) <= 0x10ffff) {
// surrogate or supplementary code point
return containsSlow(c, list4kStarts[0xd], list4kStarts[0x11]);
} else {
Expand All @@ -332,7 +332,7 @@ BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCon
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
Expand Down Expand Up @@ -372,7 +372,7 @@ BMPSet::span(const char16_t *s, const char16_t *limit, USetSpanCondition spanCon
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
Expand Down Expand Up @@ -421,7 +421,7 @@ BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spa
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))==0) {
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) == 0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
Expand Down Expand Up @@ -464,7 +464,7 @@ BMPSet::spanBack(const char16_t *s, const char16_t *limit, USetSpanCondition spa
break;
}
} else if(c<=0x7ff) {
if((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) {
if ((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) {
break;
}
} else if(c<0xd800 || c>=0xe000) {
Expand Down Expand Up @@ -527,7 +527,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
b=*s;
} while(U8_IS_SINGLE(b));
}
length=(int32_t)(limit-s);
length = static_cast<int32_t>(limit - s);
}

if(spanCondition!=USET_SPAN_NOT_CONTAINED) {
Expand All @@ -547,7 +547,7 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
* the truncated sequence.
*/
b=*(limit-1);
if((int8_t)b<0) {
if (static_cast<int8_t>(b) < 0) {
// b>=0x80: lead or trail byte
if(b<0xc0) {
// single trail byte, check for preceding 3- or 4-byte lead byte
Expand Down Expand Up @@ -602,15 +602,15 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
if(b>=0xe0) {
if(b<0xf0) {
if( /* handle U+0000..U+FFFF inline */
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f
) {
b&=0xf;
uint32_t twoBits=(bmpBlockBits[t1]>>b)&0x10001;
if(twoBits<=1) {
// All 64 code points with this lead byte and middle trail byte
// are either in the set or not.
if(twoBits!=(uint32_t)spanCondition) {
if (twoBits != static_cast<uint32_t>(spanCondition)) {
return s-1;
}
} else {
Expand All @@ -624,12 +624,12 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
continue;
}
} else if( /* handle U+10000..U+10FFFF inline */
(t1=(uint8_t)(s[0]-0x80)) <= 0x3f &&
(t2=(uint8_t)(s[1]-0x80)) <= 0x3f &&
(t3=(uint8_t)(s[2]-0x80)) <= 0x3f
(t1 = static_cast<uint8_t>(s[0] - 0x80)) <= 0x3f &&
(t2 = static_cast<uint8_t>(s[1] - 0x80)) <= 0x3f &&
(t3 = static_cast<uint8_t>(s[2] - 0x80)) <= 0x3f
) {
// Give an illegal sequence the same value as the result of contains(FFFD).
UChar32 c=((UChar32)(b-0xf0)<<18)|((UChar32)t1<<12)|(t2<<6)|t3;
UChar32 c = (static_cast<UChar32>(b - 0xf0) << 18) | (static_cast<UChar32>(t1) << 12) | (t2 << 6) | t3;
if( ( (0x10000<=c && c<=0x10ffff) ?
containsSlow(c, list4kStarts[0x10], list4kStarts[0x11]) :
containsFFFD
Expand All @@ -643,9 +643,9 @@ BMPSet::spanUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanConditi
} else {
if( /* handle U+0000..U+07FF inline */
b>=0xc0 &&
(t1=(uint8_t)(*s-0x80)) <= 0x3f
(t1 = static_cast<uint8_t>(*s - 0x80)) <= 0x3f
) {
if((USetSpanCondition)((table7FF[t1]&((uint32_t)1<<(b&0x1f)))!=0) != spanCondition) {
if (static_cast<USetSpanCondition>((table7FF[t1] & (static_cast<uint32_t>(1) << (b & 0x1f))) != 0) != spanCondition) {
return s-1;
}
++s;
Expand Down Expand Up @@ -711,7 +711,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
c=utf8_prevCharSafeBody(s, 0, &length, b, -3);
// c is a valid code point, not ASCII, not a surrogate
if(c<=0x7ff) {
if((USetSpanCondition)((table7FF[c&0x3f]&((uint32_t)1<<(c>>6)))!=0) != spanCondition) {
if (static_cast<USetSpanCondition>((table7FF[c & 0x3f] & (static_cast<uint32_t>(1) << (c >> 6))) != 0) != spanCondition) {
return prev+1;
}
} else if(c<=0xffff) {
Expand All @@ -720,7 +720,7 @@ BMPSet::spanBackUTF8(const uint8_t *s, int32_t length, USetSpanCondition spanCon
if(twoBits<=1) {
// All 64 code points with the same bits 15..6
// are either in the set or not.
if(twoBits!=(uint32_t)spanCondition) {
if (twoBits != static_cast<uint32_t>(spanCondition)) {
return prev+1;
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion deps/icu-small/source/common/bmpset.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ class BMPSet : public UMemory {
};

inline UBool BMPSet::containsSlow(UChar32 c, int32_t lo, int32_t hi) const {
return (UBool)(findCodePoint(c, lo, hi) & 1);
return findCodePoint(c, lo, hi) & 1;
}

U_NAMESPACE_END
Expand Down
18 changes: 9 additions & 9 deletions deps/icu-small/source/common/brkeng.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ UnhandledEngine::findBreaks( UText *text,
if (U_FAILURE(status)) return 0;
utext_setNativeIndex(text, startPos);
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
while (static_cast<int32_t>(utext_getNativeIndex(text)) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
c = utext_current32(text);
}
Expand Down Expand Up @@ -146,7 +146,7 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) {
Mutex m(&gBreakEngineMutex);
int32_t i = fEngines->size();
while (--i >= 0) {
lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
lbe = static_cast<const LanguageBreakEngine*>(fEngines->elementAt(i));
if (lbe != nullptr && lbe->handles(c, locale)) {
return lbe;
}
Expand Down Expand Up @@ -259,7 +259,7 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
CharString ext;
const char16_t *extStart = u_memrchr(dictfname, 0x002e, dictnlength); // last dot
if (extStart != nullptr) {
int32_t len = (int32_t)(extStart - dictfname);
int32_t len = static_cast<int32_t>(extStart - dictfname);
ext.appendInvariantChars(UnicodeString(false, extStart + 1, dictnlength - len - 1), status);
dictnlength = len;
}
Expand All @@ -269,18 +269,18 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
if (U_SUCCESS(status)) {
// build trie
const uint8_t *data = (const uint8_t *)udata_getMemory(file);
const int32_t *indexes = (const int32_t *)data;
const uint8_t* data = static_cast<const uint8_t*>(udata_getMemory(file));
const int32_t* indexes = reinterpret_cast<const int32_t*>(data);
const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
DictionaryMatcher *m = nullptr;
if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
const char *characters = (const char *)(data + offset);
const char* characters = reinterpret_cast<const char*>(data + offset);
m = new BytesDictionaryMatcher(characters, transform, file);
}
else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
const char16_t *characters = (const char16_t *)(data + offset);
const char16_t* characters = reinterpret_cast<const char16_t*>(data + offset);
m = new UCharsDictionaryMatcher(characters, file);
}
if (m == nullptr) {
Expand Down Expand Up @@ -337,12 +337,12 @@ int32_t BreakEngineWrapper::findBreaks(
// extends towards the start or end of the text, depending on 'reverse'.

utext_setNativeIndex(text, startPos);
int32_t start = (int32_t)utext_getNativeIndex(text);
int32_t start = static_cast<int32_t>(utext_getNativeIndex(text));
int32_t current;
int32_t rangeStart;
int32_t rangeEnd;
UChar32 c = utext_current32(text);
while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) {
while ((current = static_cast<int32_t>(utext_getNativeIndex(text))) < endPos && delegate->handles(c)) {
utext_next32(text); // TODO: recast loop for postincrement
c = utext_current32(text);
}
Expand Down
4 changes: 2 additions & 2 deletions deps/icu-small/source/common/brkiter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
// Get the actual string
brkfname = ures_getString(brkName, &size, &status);
U_ASSERT((size_t)size<sizeof(fnbuff));
if ((size_t)size>=sizeof(fnbuff)) {
if (static_cast<size_t>(size) >= sizeof(fnbuff)) {
size=0;
if (U_SUCCESS(status)) {
status = U_BUFFER_OVERFLOW_ERROR;
Expand All @@ -99,7 +99,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st
char16_t* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
if (extStart != nullptr){
len = (int)(extStart-brkfname);
len = static_cast<int>(extStart - brkfname);
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
u_UCharsToChars(brkfname, fnbuff, len);
}
Expand Down
10 changes: 5 additions & 5 deletions deps/icu-small/source/common/bytesinkutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return false;
}
return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
return appendChange(static_cast<int32_t>(limit - s), s16, s16Length, sink, edits, errorCode);
}

void
Expand All @@ -81,15 +81,15 @@ ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *
namespace {

// See unicode/utf8.h U8_APPEND_UNSAFE().
inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
inline uint8_t getTwoByteLead(UChar32 c) { return static_cast<uint8_t>((c >> 6) | 0xc0); }
inline uint8_t getTwoByteTrail(UChar32 c) { return static_cast<uint8_t>((c & 0x3f) | 0x80); }

} // namespace

void
ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
char s8[2] = {static_cast<char>(getTwoByteLead(c)), static_cast<char>(getTwoByteTrail(c))};
sink.Append(s8, 2);
}

Expand All @@ -114,7 +114,7 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
return false;
}
int32_t length = (int32_t)(limit - s);
int32_t length = static_cast<int32_t>(limit - s);
if (length > 0) {
appendNonEmptyUnchanged(s, length, sink, options, edits);
}
Expand Down
2 changes: 1 addition & 1 deletion deps/icu-small/source/common/bytesinkutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class U_COMMON_API ByteSinkUtil {
/** The few bytes at [src, nextSrc[ were mapped/changed to valid code point c. */
static inline void appendCodePoint(const uint8_t *src, const uint8_t *nextSrc, UChar32 c,
ByteSink &sink, Edits *edits = nullptr) {
appendCodePoint((int32_t)(nextSrc - src), c, sink, edits);
appendCodePoint(static_cast<int32_t>(nextSrc - src), c, sink, edits);
}

/** Append the two-byte character (U+0080..U+07FF). */
Expand Down
Loading

0 comments on commit 8cbd3f8

Please sign in to comment.