Skip to content

Commit

Permalink
Cherry-pick: ICU-20250 make UnicodeSet(intprop=value) faster
Browse files Browse the repository at this point in the history
Url: unicode-org/icu@98f9170
github pull request: unicode-org/icu#278

Changes:
- fastpath for UnicodeSet.add(new last range)
- fewer UnicodeSet memory allocations:
  initial internal list array, exponential array growth,
  allocate strings list/set only when first one is added
- faster CodePointTrie.getRange(): fewer calls to filter function
- revert UnicodeSet(intprop=value) from trie ranges to range starts + lookup
- cache per-int-prop range starts: fewer lookups

Bug: 119388329
Test: CtsIcuTestCases
Change-Id: I75b87d4574c183a171b0743a14f156f02cdcc43e
  • Loading branch information
gvictor committed Nov 19, 2018
1 parent 58cfed0 commit d762fd8
Show file tree
Hide file tree
Showing 21 changed files with 1,075 additions and 784 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// License & terms of use: http://www.unicode.org/copyright.html#License
package android.icu.impl;

import android.icu.lang.UCharacter;
import android.icu.lang.UProperty;
import android.icu.text.UnicodeSet;

/**
Expand All @@ -11,13 +13,16 @@
* @hide Only a subset of ICU is exposed in Android
*/
public final class CharacterPropertiesImpl {
private static final int NUM_INCLUSIONS = UCharacterProperty.SRC_COUNT +
UProperty.INT_LIMIT - UProperty.INT_START;

/**
* A set of all characters _except_ the second through last characters of
* certain ranges. These ranges are ranges of characters whose
* properties are all exactly alike, e.g. CJK Ideographs from
* U+4E00 to U+9FA5.
*/
private static final UnicodeSet inclusions[] = new UnicodeSet[UCharacterProperty.SRC_COUNT];
private static final UnicodeSet inclusions[] = new UnicodeSet[NUM_INCLUSIONS];

/** For {@link UnicodeSet#setDefaultXSymbolTable}. */
public static synchronized void clear() {
Expand All @@ -26,7 +31,7 @@ public static synchronized void clear() {
}
}

private static synchronized UnicodeSet getInclusionsForSource(int src) {
private static UnicodeSet getInclusionsForSource(int src) {
if (inclusions[src] == null) {
UnicodeSet incl = new UnicodeSet();
switch(src) {
Expand Down Expand Up @@ -73,16 +78,48 @@ private static synchronized UnicodeSet getInclusionsForSource(int src) {
// We do not freeze() the set because we only iterate over it,
// rather than testing contains(),
// so the extra time and memory to optimize that are not necessary.
inclusions[src] = incl;
inclusions[src] = incl.compact();
}
return inclusions[src];
}

private static UnicodeSet getIntPropInclusions(int prop) {
assert(UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT);
int inclIndex = UCharacterProperty.SRC_COUNT + prop - UProperty.INT_START;
if (inclusions[inclIndex] != null) {
return inclusions[inclIndex];
}
int src = UCharacterProperty.INSTANCE.getSource(prop);
UnicodeSet incl = getInclusionsForSource(src);

UnicodeSet intPropIncl = new UnicodeSet(0, 0);
int numRanges = incl.getRangeCount();
int prevValue = 0;
for (int i = 0; i < numRanges; ++i) {
int rangeEnd = incl.getRangeEnd(i);
for (int c = incl.getRangeStart(i); c <= rangeEnd; ++c) {
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
int value = UCharacter.getIntPropertyValue(c, prop);
if (value != prevValue) {
intPropIncl.add(c);
prevValue = value;
}
}
}

// Compact for caching.
return inclusions[inclIndex] = intPropIncl.compact();
}

/**
* Returns a mutable UnicodeSet -- do not modify!
*/
public static UnicodeSet getInclusionsForProperty(int prop) {
int src = UCharacterProperty.INSTANCE.getSource(prop);
return getInclusionsForSource(src);
public static synchronized UnicodeSet getInclusionsForProperty(int prop) {
if (UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT) {
return getIntPropInclusions(prop);
} else {
int src = UCharacterProperty.INSTANCE.getSource(prop);
return getInclusionsForSource(src);
}
}
}
Loading

0 comments on commit d762fd8

Please sign in to comment.