Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change icuexportdata trie format to improve normalizer performance #5813

Merged
merged 33 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
d6f37cb
Rearrange the trie value bits
hsivonen Nov 11, 2024
c7fb1bb
Get rid of normalization data supplements
hsivonen Nov 11, 2024
6fc18fc
Perform a trie lookup by UTF-16 code unit on the fast path
hsivonen Nov 11, 2024
82b90d6
Perform trie lookup with UTF-16 code unit in the composing case
hsivonen Nov 12, 2024
07ab6ec
Get rid of undecomposed_starter_valid
hsivonen Nov 12, 2024
9bbc2aa
Optimize UTF-8 error cases
hsivonen Nov 13, 2024
2a7f490
Add a marker value for Hangul syllables
hsivonen Nov 13, 2024
b3c9743
Cleanup
hsivonen Nov 13, 2024
6c21106
Add documentation for the trie value format
hsivonen Nov 13, 2024
1cea9d0
Sync data
hsivonen Nov 13, 2024
8fe9ff9
Sync the collator with the normalizer data changes
hsivonen Nov 13, 2024
505f1cf
Update icu_harfbuzz
hsivonen Nov 14, 2024
380df55
Merge branch 'main' into normalizerdata
hsivonen Nov 14, 2024
e5faa23
Mention trie-value-format.md in various places
hsivonen Nov 14, 2024
852834c
Avoid a doc comment where a normal comment is needed
hsivonen Nov 14, 2024
29435a2
Merge branch 'main' into normalizerdata
hsivonen Nov 14, 2024
d8c0e55
Merge branch 'main' into normalizerdata
hsivonen Nov 18, 2024
0f76b27
Merge branch 'main' into normalizerdata
hsivonen Nov 27, 2024
7c37c7a
Merge branch 'main' into normalizerdata
hsivonen Dec 5, 2024
4f7ab10
Make transliterator tests compile
hsivonen Dec 5, 2024
db70f68
Update components/normalizer/trie-value-format.md
hsivonen Dec 11, 2024
fa0f538
Make the documentation for singleton decomposition more precise
hsivonen Dec 11, 2024
849c1c9
Merge branch 'main' into normalizerdata
hsivonen Dec 11, 2024
1d9d656
Correct the remark about the REPLACEMENT CHARACTER in properties.rs
hsivonen Dec 11, 2024
d710462
Correct the remark about the REPLACEMENT CHARACTER in properties.rs e…
hsivonen Dec 11, 2024
70c0706
Remove the remark about the REPLACEMENT CHARACTER in properties.rs as…
hsivonen Dec 11, 2024
677f00f
Merge branch 'main' into normalizerdata
sffc Dec 16, 2024
4534db0
Update ICU tag
sffc Dec 16, 2024
9f6f754
cargo make download-repo-sources
sffc Dec 16, 2024
d2b5c66
cargo make testdata; cargo make bakeddata
sffc Dec 16, 2024
b23c598
Address clippy lint
hsivonen Dec 17, 2024
abf8b9c
Merge branch 'main' into normalizerdata
hsivonen Dec 17, 2024
28e024d
Merge branch 'main' into normalizerdata
hsivonen Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions components/collator/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ use crate::provider::CollationTailoringV1Marker;
use crate::{AlternateHandling, CollatorOptions, MaxVariable, ResolvedCollatorOptions, Strength};
use core::cmp::Ordering;
use core::convert::TryFrom;
use icu_normalizer::provider::CanonicalDecompositionDataV1Marker;
use icu_normalizer::provider::CanonicalDecompositionDataV2Marker;
use icu_normalizer::provider::CanonicalDecompositionTablesV1Marker;
use icu_normalizer::provider::DecompositionDataV1;
use icu_normalizer::provider::DecompositionDataV2;
use icu_normalizer::provider::DecompositionTablesV1;
use icu_normalizer::Decomposition;
use icu_provider::prelude::*;
Expand Down Expand Up @@ -220,7 +220,7 @@ pub struct Collator {
diacritics: DataPayload<CollationDiacriticsV1Marker>,
options: CollatorOptionsBitField,
reordering: Option<DataPayload<CollationReorderingV1Marker>>,
decompositions: DataPayload<CanonicalDecompositionDataV1Marker>,
decompositions: DataPayload<CanonicalDecompositionDataV2Marker>,
tables: DataPayload<CanonicalDecompositionTablesV1Marker>,
lithuanian_dot_above: bool,
}
Expand Down Expand Up @@ -276,7 +276,7 @@ impl Collator {
+ DataProvider<CollationJamoV1Marker>
+ DataProvider<CollationMetadataV1Marker>
+ DataProvider<CollationReorderingV1Marker>
+ DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ ?Sized,
{
Expand All @@ -296,7 +296,7 @@ impl Collator {
fn try_new_unstable_internal<D>(
provider: &D,
root: DataPayload<CollationRootV1Marker>,
decompositions: DataPayload<CanonicalDecompositionDataV1Marker>,
decompositions: DataPayload<CanonicalDecompositionDataV2Marker>,
tables: DataPayload<CanonicalDecompositionTablesV1Marker>,
jamo: DataPayload<CollationJamoV1Marker>,
special_primaries: impl FnOnce() -> Result<
Expand Down Expand Up @@ -364,7 +364,7 @@ pub struct CollatorBorrowed<'a> {
diacritics: &'a CollationDiacriticsV1<'a>,
options: CollatorOptionsBitField,
reordering: Option<&'a CollationReorderingV1<'a>>,
decompositions: &'a DecompositionDataV1<'a>,
decompositions: &'a DecompositionDataV2<'a>,
tables: &'a DecompositionTablesV1<'a>,
lithuanian_dot_above: bool,
}
Expand All @@ -381,7 +381,7 @@ impl CollatorBorrowed<'static> {

let provider = &crate::provider::Baked;
let decompositions =
icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V1_MARKER;
icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_DATA_V2_MARKER;
let tables =
icu_normalizer::provider::Baked::SINGLETON_CANONICAL_DECOMPOSITION_TABLES_V1_MARKER;
let root = crate::provider::Baked::SINGLETON_COLLATION_ROOT_V1_MARKER;
Expand Down
189 changes: 84 additions & 105 deletions components/collator/src/elements.rs

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions components/collator/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ const _: () = {

icu_normalizer_data::impl_canonical_compositions_v1_marker!(TestingProvider);
icu_normalizer_data::impl_non_recursive_decomposition_supplement_v1_marker!(TestingProvider);
icu_normalizer_data::impl_canonical_decomposition_data_v1_marker!(TestingProvider);
icu_normalizer_data::impl_canonical_decomposition_data_v2_marker!(TestingProvider);
icu_normalizer_data::impl_canonical_decomposition_tables_v1_marker!(TestingProvider);
icu_normalizer_data::impl_compatibility_decomposition_supplement_v1_marker!(TestingProvider);
icu_normalizer_data::impl_compatibility_decomposition_data_v2_marker!(TestingProvider);
icu_normalizer_data::impl_compatibility_decomposition_tables_v1_marker!(TestingProvider);
icu_normalizer_data::impl_uts46_decomposition_supplement_v1_marker!(TestingProvider);
icu_normalizer_data::impl_uts46_decomposition_data_v2_marker!(TestingProvider);
};

type StackString = arraystring::ArrayString<arraystring::typenum::U32>;
Expand Down
10 changes: 5 additions & 5 deletions components/experimental/src/transliterate/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl Direction {
/// {$AE} [:Lowercase:] → Ae;
/// {$OE} [:Lowercase:] → Oe;
/// {$UE} [:Lowercase:] → Ue;
///
///
/// $AE → AE;
/// $OE → OE;
/// $UE → UE;
Expand Down Expand Up @@ -232,8 +232,8 @@ impl RuleCollection {
+ DataProvider<ScriptWithExtensionsPropertyV1Marker>
+ DataProvider<XidStartV1Marker>,
NP: ?Sized
+ DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
+ DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>,
Expand Down Expand Up @@ -414,8 +414,8 @@ macro_rules! redirect {
}

redirect!(
CanonicalDecompositionDataV1Marker,
CompatibilityDecompositionSupplementV1Marker,
CanonicalDecompositionDataV2Marker,
CompatibilityDecompositionDataV2Marker,
CanonicalDecompositionTablesV1Marker,
CompatibilityDecompositionTablesV1Marker,
CanonicalCompositionsV1Marker
Expand Down
30 changes: 14 additions & 16 deletions components/experimental/src/transliterate/transliterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ struct ComposingTransliterator(ComposingNormalizer);
impl ComposingTransliterator {
fn try_nfc<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
P: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
+ ?Sized,
Expand All @@ -63,8 +63,7 @@ impl ComposingTransliterator {

fn try_nfkc<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
P: DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand All @@ -90,7 +89,7 @@ struct DecomposingTransliterator(DecomposingNormalizer);
impl DecomposingTransliterator {
fn try_nfd<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
P: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ ?Sized,
{
Expand All @@ -101,8 +100,7 @@ impl DecomposingTransliterator {

fn try_nfkd<P>(provider: &P) -> Result<Self, DataError>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
P: DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ ?Sized,
Expand Down Expand Up @@ -279,8 +277,8 @@ impl Transliterator {
) -> Result<Self, DataError>
where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
PN: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand Down Expand Up @@ -391,8 +389,8 @@ impl Transliterator {
) -> Result<Transliterator, DataError>
where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
PN: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand All @@ -415,8 +413,8 @@ impl Transliterator {
) -> Result<Transliterator, DataError>
where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
PN: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand Down Expand Up @@ -451,8 +449,8 @@ impl Transliterator {
) -> Result<DataPayload<TransliteratorRulesV1Marker>, DataError>
where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
PN: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand Down Expand Up @@ -500,8 +498,8 @@ impl Transliterator {
normalizer_provider: &P,
) -> Option<Result<InternalTransliterator, DataError>>
where
P: DataProvider<CanonicalDecompositionDataV1Marker>
+ DataProvider<CompatibilityDecompositionSupplementV1Marker>
P: DataProvider<CanonicalDecompositionDataV2Marker>
+ DataProvider<CompatibilityDecompositionDataV2Marker>
+ DataProvider<CanonicalDecompositionTablesV1Marker>
+ DataProvider<CompatibilityDecompositionTablesV1Marker>
+ DataProvider<CanonicalCompositionsV1Marker>
Expand Down
Loading
Loading