Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified eng/prebuilts/wasm/icudt.dat
Binary file not shown.
Binary file modified eng/prebuilts/wasm/icudt_CJK.dat
Binary file not shown.
Binary file modified eng/prebuilts/wasm/icudt_EFIGS.dat
Binary file not shown.
Binary file modified eng/prebuilts/wasm/icudt_no_CJK.dat
Binary file not shown.
3 changes: 2 additions & 1 deletion icu-filters/icudt_CJK.json
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@
"-/*/*",
"+/collations/default",
"+/collations/standard",
"+/collations/private-kana"
"+/collations/private-kana",
"-/UCARules"
]
},
{
Expand Down
3 changes: 2 additions & 1 deletion icu-filters/icudt_EFIGS.json
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@
"rules": [
"-/*/*",
"+/collations/default",
"+/collations/standard"
"+/collations/standard",
"-/UCARules"
]
},
{
Expand Down
3 changes: 2 additions & 1 deletion icu-filters/icudt_mobile.json
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,8 @@
"-/*/*",
"+/collations/default",
"+/collations/standard",
"+/collations/private-kana"
"+/collations/private-kana",
"-/UCARules"
]
},
{
Expand Down
3 changes: 2 additions & 1 deletion icu-filters/icudt_no_CJK.json
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@
"rules": [
"-/*/*",
"+/collations/default",
"+/collations/standard"
"+/collations/standard",
"-/UCARules"
]
},
{
Expand Down
3 changes: 2 additions & 1 deletion icu-filters/icudt_wasm.json
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,8 @@
"-/*/*",
"+/collations/default",
"+/collations/standard",
"+/collations/private-kana"
"+/collations/private-kana",
"-/UCARules"
]
},
{
Expand Down
34 changes: 30 additions & 4 deletions icu/icu4c/source/common/caniter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -430,16 +430,42 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
}

Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {
Hashtable visited(status);
if (U_FAILURE(status)) {
return NULL;
}
return collectEquivalentsRecursive(fillinResult, segment, segLen, visited, status);
}

Hashtable *CanonicalIterator::collectEquivalentsRecursive(Hashtable *fillinResult, const UChar *segment, int32_t segLen, Hashtable &visited, UErrorCode &status) {

if (U_FAILURE(status)) {
return NULL;
}

//if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));

if (segment == NULL || segLen <= 0) {
return fillinResult;
}

UnicodeString toPut(segment, segLen);

fillinResult->put(toPut, new UnicodeString(toPut), status);
if (visited.containsKey(toPut)) {
return fillinResult;
}

visited.put(toPut, nullptr, status);
if (U_FAILURE(status)) {
return NULL;
}

UnicodeString *initialValue = new UnicodeString(toPut);
if (initialValue == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
fillinResult->put(toPut, initialValue, status);

UnicodeSet starts;

Expand All @@ -457,7 +483,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
UChar32 cp2 = iter.getCodepoint();
Hashtable remainder(status);
remainder.setValueDeleter(uprv_deleteUObject);
if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
if (extract(&remainder, cp2, segment, segLen, i, visited, status) == NULL) {
continue;
}

Expand Down Expand Up @@ -497,7 +523,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UCh
* (with canonical rearrangement!)
* If so, take the remainder, and return the equivalents
*/
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, const UChar *segment, int32_t segLen, int32_t segmentPos, Hashtable &visited, UErrorCode &status) {
//Hashtable *CanonicalIterator::extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status) {
//if (PROGRESS) printf(" extract: %s, ", UToS(Tr(UnicodeString(comp))));
//if (PROGRESS) printf("%s, %i\n", UToS(Tr(segment)), segmentPos);
Expand Down Expand Up @@ -578,7 +604,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
return NULL;
}

return getEquivalents2(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, status);
return collectEquivalentsRecursive(fillinResult, temp.getBuffer()+inputLen, temp.length()-inputLen, visited, status);
}

U_NAMESPACE_END
Expand Down
3 changes: 2 additions & 1 deletion icu/icu4c/source/common/unicode/caniter.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ class U_COMMON_API CanonicalIterator U_FINAL : public UObject {

//Set getEquivalents2(String segment);
Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
Hashtable *collectEquivalentsRecursive(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, Hashtable &visited, UErrorCode &status);
//Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);

/**
Expand All @@ -198,7 +199,7 @@ class U_COMMON_API CanonicalIterator U_FINAL : public UObject {
* If so, take the remainder, and return the equivalents
*/
//Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, Hashtable &visited, UErrorCode &status);
//Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);

void cleanPieces();
Expand Down
46 changes: 45 additions & 1 deletion icu/icu4c/source/i18n/collationbuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "unicode/utf16.h"
#include "unicode/uversion.h"
#include "cmemory.h"
#include "hash.h"
#include "collation.h"
#include "collationbuilder.h"
#include "collationdata.h"
Expand Down Expand Up @@ -203,7 +204,8 @@ CollationBuilder::CollationBuilder(const CollationTailoring *b, UBool icu4xMode,
icu4xMode(icu4xMode),
errorReason(NULL),
cesLength(0),
rootPrimaryIndexes(errorCode), nodes(errorCode) {
rootPrimaryIndexes(errorCode), nodes(errorCode),
closureVisited(nullptr) {
nfcImpl.ensureCanonIterData(errorCode);
if(U_FAILURE(errorCode)) {
errorReason = "CollationBuilder fields initialization failed";
Expand All @@ -225,6 +227,7 @@ CollationBuilder::CollationBuilder(const CollationTailoring *b, UErrorCode &erro

CollationBuilder::~CollationBuilder() {
delete dataBuilder;
delete closureVisited;
}

CollationTailoring *
Expand Down Expand Up @@ -1119,6 +1122,47 @@ CollationBuilder::addOnlyClosure(const UnicodeString &nfdPrefix, const UnicodeSt
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return ce32; }

if(closureVisited == nullptr) {
UErrorCode status = U_ZERO_ERROR;
closureVisited = new Hashtable(status);
if(closureVisited == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return ce32;
}
if(U_FAILURE(status)) {
delete closureVisited;
closureVisited = nullptr;
errorCode = status;
return ce32;
}
}

UnicodeString closureKey(nfdPrefix);
closureKey.append((UChar)0);
closureKey.append(nfdString);
// Skip if we are already in the middle of processing this prefix/string pair.
if(closureVisited->containsKey(closureKey)) {
return ce32;
}

UErrorCode tableStatus = U_ZERO_ERROR;
closureVisited->put(closureKey, nullptr, tableStatus);
if(U_FAILURE(tableStatus)) {
errorCode = tableStatus;
return ce32;
}
struct ClosureKeyCleanup {
Hashtable *table;
UnicodeString key;
ClosureKeyCleanup(Hashtable *t, const UnicodeString &k)
: table(t), key(k) {}
~ClosureKeyCleanup() {
if(table != nullptr) {
table->remove(key);
}
}
} cleanup(closureVisited, closureKey);

// Map from canonically equivalent input to the CEs. (But not from the all-NFD input.)
if(nfdPrefix.isEmpty()) {
CanonicalIterator stringIter(nfdString, errorCode);
Expand Down
2 changes: 2 additions & 0 deletions icu/icu4c/source/i18n/collationbuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class CEFinalizer;
class CollationDataBuilder;
class Normalizer2;
class Normalizer2Impl;
class Hashtable;

class U_I18N_API CollationBuilder : public CollationRuleParser::Sink {
public:
Expand Down Expand Up @@ -309,6 +310,7 @@ class U_I18N_API CollationBuilder : public CollationRuleParser::Sink {

int64_t ces[Collation::MAX_EXPANSION_LENGTH];
int32_t cesLength;
Hashtable *closureVisited;

/**
* Indexes of nodes with root primary weights, sorted by primary.
Expand Down
86 changes: 82 additions & 4 deletions icu/icu4c/source/i18n/collationruleparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include "unicode/uloc.h"
#include "unicode/unistr.h"
#include "unicode/utf16.h"
#include <stdio.h>
#include <string>
#include "charstr.h"
#include "cmemory.h"
#include "collation.h"
Expand All @@ -32,6 +34,7 @@
#include "collationsettings.h"
#include "collationtailoring.h"
#include "cstring.h"
#include "hash.h"
#include "patternprops.h"
#include "uassert.h"
#include "uvectr32.h"
Expand Down Expand Up @@ -59,12 +62,13 @@ CollationRuleParser::CollationRuleParser(const CollationData *base, UErrorCode &
: nfd(*Normalizer2::getNFDInstance(errorCode)),
nfc(*Normalizer2::getNFCInstance(errorCode)),
rules(NULL), baseData(base), settings(NULL),
parseError(NULL), errorReason(NULL),
sink(NULL), importer(NULL),
ruleIndex(0) {
parseError(NULL), errorReason(NULL),
sink(NULL), importer(NULL),
ruleIndex(0), importRecursionGuard(nullptr) {
}

CollationRuleParser::~CollationRuleParser() {
delete importRecursionGuard;
}

void
Expand Down Expand Up @@ -641,8 +645,82 @@ CollationRuleParser::parseSetting(UErrorCode &errorCode) {
if(importer == NULL) {
setParseError("[import langTag] is not supported", errorCode);
} else {
const char *resolvedType = (length > 0 ? collationType : "standard");

char normalizedBase[ULOC_FULLNAME_CAPACITY];
UErrorCode normalizeStatus = U_ZERO_ERROR;
int32_t normalizedBaseLength = uloc_toLanguageTag(baseID, normalizedBase,
ULOC_FULLNAME_CAPACITY,
false, &normalizeStatus);
if(U_FAILURE(normalizeStatus) || normalizedBaseLength <= 0) {
normalizeStatus = U_ZERO_ERROR;
uprv_strncpy(normalizedBase, baseID, ULOC_FULLNAME_CAPACITY - 1);
normalizedBase[ULOC_FULLNAME_CAPACITY - 1] = 0;
}
for(char *p = normalizedBase; *p != 0; ++p) {
if(*p == '_') {
*p = '-';
} else {
*p = static_cast<char>(uprv_tolower(*p));
}
}

std::string normalizedType(resolvedType);
for(char &ch : normalizedType) {
if(ch == '_') {
ch = '-';
} else {
ch = static_cast<char>(uprv_tolower(ch));
}
}

if(importRecursionGuard == nullptr) {
UErrorCode tableStatus = U_ZERO_ERROR;
importRecursionGuard = new Hashtable(tableStatus);
if(importRecursionGuard == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
if(U_FAILURE(tableStatus)) {
delete importRecursionGuard;
importRecursionGuard = nullptr;
errorCode = tableStatus;
return;
}
}

UnicodeString importKey(normalizedBase, -1, US_INV);
importKey.append((UChar)0);
importKey.append(UnicodeString(normalizedType.c_str(), -1, US_INV));

UBool alreadyInFlight = importRecursionGuard->containsKey(importKey);
if(alreadyInFlight) {
fprintf(stderr, "[collationruleparser] recursive import detected for %s / %s\n",
normalizedBase, normalizedType.c_str());
ruleIndex = j;
return;
}

UErrorCode tableStatus = U_ZERO_ERROR;
// Store a non-null sentinel so the hash table retains the key.
importRecursionGuard->put(importKey, const_cast<CollationRuleParser *>(this), tableStatus);
if(U_FAILURE(tableStatus)) {
errorCode = tableStatus;
return;
}
struct ImportKeyCleanup {
Hashtable *table;
UnicodeString key;
ImportKeyCleanup(Hashtable *t, const UnicodeString &k)
: table(t), key(k) {}
~ImportKeyCleanup() {
if(table != nullptr) {
table->remove(key);
}
}
} cleanup(importRecursionGuard, importKey);
UnicodeString importedRules;
importer->getRules(baseID, length > 0 ? collationType : "standard",
importer->getRules(baseID, resolvedType,
importedRules, errorReason, errorCode);
if(U_FAILURE(errorCode)) {
if(errorReason == NULL) {
Expand Down
3 changes: 3 additions & 0 deletions icu/icu4c/source/i18n/collationruleparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ U_NAMESPACE_BEGIN
struct CollationData;
struct CollationTailoring;

class Hashtable;
class Locale;
class Normalizer2;

Expand Down Expand Up @@ -188,6 +189,8 @@ class U_I18N_API CollationRuleParser : public UMemory {
Sink *sink;
Importer *importer;

Hashtable *importRecursionGuard;

int32_t ruleIndex;
};

Expand Down