Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace DataLocale by LanguageIdentifier on DataRequest #4984

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
37 changes: 14 additions & 23 deletions components/calendar/src/any_calendar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1073,32 +1073,23 @@ impl AnyCalendarKind {
.and_then(Self::get_for_bcp47_value)
}

/// Extract the calendar component from a [`DataLocale`]
///
/// Returns `None` if the calendar is not specified or unknown. If you prefer an error, use
/// [`CalendarError::unknown_any_calendar_kind`].
fn get_for_data_locale(l: &DataLocale) -> Option<Self> {
l.get_unicode_ext(&key!("ca"))
.and_then(|v| Self::get_for_bcp47_value(&v))
}

// Do not make public, this will eventually need fallback
// data from the provider
fn from_data_locale_with_fallback(l: &DataLocale) -> Self {
if let Some(kind) = Self::get_for_data_locale(l) {
kind
} else {
let lang = l.language();
if lang == language!("th") {
Self::Buddhist
} else if lang == language!("sa") {
Self::IslamicUmmAlQura
} else if lang == language!("af") || lang == language!("ir") {
Self::Persian
} else {
Self::Gregorian
}
}
l.keywords
.get(&key!("ca"))
.and_then(Self::get_for_bcp47_value)
.unwrap_or_else(|| {
if l.id.language == language!("th") {
Self::Buddhist
} else if l.id.language == language!("sa") {
Self::IslamicUmmAlQura
} else if l.id.language == language!("af") || l.id.language == language!("ir") {
Self::Persian
} else {
Self::Gregorian
}
})
}
}

Expand Down
6 changes: 3 additions & 3 deletions components/calendar/src/week_of.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl WeekCalculator {
DataProvider::<WeekDataV1Marker>::load(
&provider.as_downcasting(),
DataRequest {
locale,
langid: &locale.id,
..Default::default()
},
)
Expand All @@ -85,7 +85,7 @@ impl WeekCalculator {
DataProvider::<WeekDataV1Marker>::load(
&provider.as_deserializing(),
DataRequest {
locale,
langid: &locale.id,
..Default::default()
},
)
Expand All @@ -102,7 +102,7 @@ impl WeekCalculator {
{
provider
.load(DataRequest {
locale,
langid: &locale.id,
..Default::default()
})
.and_then(DataResponse::take_payload)
Expand Down
1 change: 1 addition & 0 deletions components/collator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ all-features = true
[dependencies]
displaydoc = { workspace = true }
icu_collections = { workspace = true }
icu_locale_core = { workspace = true }
icu_normalizer = { workspace = true }
icu_properties = { workspace = true }
icu_provider = { workspace = true, features = ["macros"] }
Expand Down
29 changes: 11 additions & 18 deletions components/collator/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,7 @@
use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};

use icu::collator::*;
use icu::locale::Locale;
use icu_provider::DataLocale;

fn to_data_locale(locale_str: &str) -> DataLocale {
locale_str
.parse::<Locale>()
.expect("Failed to parse locale")
.into()
}
use icu::locale::locale;

pub fn collator_with_locale(criterion: &mut Criterion) {
// Load file content in reverse order vector.
Expand Down Expand Up @@ -99,36 +91,36 @@ pub fn collator_with_locale(criterion: &mut Criterion) {
Strength::Identical,
];
let performance_parameters = [
(to_data_locale("en_US"), vec![&content_latin], &all_strength),
(to_data_locale("da_DK"), vec![&content_latin], &all_strength),
(to_data_locale("fr_CA"), vec![&content_latin], &all_strength),
(locale!("en_US"), vec![&content_latin], &all_strength),
(locale!("da_DK"), vec![&content_latin], &all_strength),
(locale!("fr_CA"), vec![&content_latin], &all_strength),
(
to_data_locale("ja_JP"),
locale!("ja_JP"),
vec![&content_latin, &content_jp_h, &content_jp_k, &content_asian],
&all_strength,
),
(
to_data_locale("zh-u-co-pinyin"),
locale!("zh-u-co-pinyin"),
vec![&content_latin, &content_chinese],
&all_strength,
), // zh_CN
(
to_data_locale("zh-u-co-stroke"),
locale!("zh-u-co-stroke"),
vec![&content_latin, &content_chinese],
&all_strength,
), // zh_TW
(
to_data_locale("ru_RU"),
locale!("ru_RU"),
vec![&content_latin, &content_russian],
&all_strength,
),
(
to_data_locale("th"),
locale!("th"),
vec![&content_latin, &content_thai],
&all_strength,
),
(
to_data_locale("ko_KR"),
locale!("ko_KR"),
vec![&content_latin, &content_korean],
&all_strength,
),
Expand All @@ -137,6 +129,7 @@ pub fn collator_with_locale(criterion: &mut Criterion) {
let (locale_under_bench, files_under_bench, benched_strength) = perf_parameter;

let mut group = criterion.benchmark_group(locale_under_bench.to_string());
let locale_under_bench = locale_under_bench.into();

for content_under_bench in files_under_bench {
let (file_name, elements) = black_box(content_under_bench);
Expand Down
49 changes: 43 additions & 6 deletions components/collator/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use crate::provider::CollationSpecialPrimariesV1Marker;
use crate::{AlternateHandling, CollatorOptions, MaxVariable, ResolvedCollatorOptions, Strength};
use core::cmp::Ordering;
use core::convert::TryFrom;
use icu_locale_core::extensions::unicode::key;
use icu_normalizer::provider::CanonicalDecompositionDataV1Marker;
use icu_normalizer::provider::CanonicalDecompositionTablesV1Marker;
use icu_normalizer::Decomposition;
Expand Down Expand Up @@ -145,25 +146,61 @@ impl Collator {
+ ?Sized,
{
let req = DataRequest {
locale,
..Default::default()
langid: &locale.id,
key_attributes: &locale
.keywords
.get(&key!("co"))
.map(DataKeyAttributes::from_unicode_value)
.unwrap_or_default(),
metadata: {
let mut metadata = DataRequestMetadata::default();
metadata.silent = true;
metadata
},
};

let metadata_payload: DataPayload<crate::provider::CollationMetadataV1Marker> =
provider.load(req)?.take_payload()?;
let metadata_payload: DataPayload<crate::provider::CollationMetadataV1Marker> = provider
.load(req)
.or_else(|_| {
provider.load(DataRequest {
langid: req.langid,
..Default::default()
})
})?
.take_payload()?;
Comment on lines +162 to +170
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion, here and elsewhere: perhaps refactor to make more clear why you are making two requests

Suggested change
let metadata_payload: DataPayload<crate::provider::CollationMetadataV1Marker> = provider
.load(req)
.or_else(|_| {
provider.load(DataRequest {
langid: req.langid,
..Default::default()
})
})?
.take_payload()?;
let metadata_payload: DataPayload<crate::provider::CollationMetadataV1Marker> = provider
.load(req_specific)
.or_else(|_| {
provider.load(req_default)
})?
.take_payload()?;


let metadata = metadata_payload.get();

let tailoring: Option<DataPayload<crate::provider::CollationDataV1Marker>> =
if metadata.tailored() {
Some(provider.load(req)?.take_payload()?)
Some(
provider
.load(req)
.or_else(|_| {
provider.load(DataRequest {
langid: req.langid,
..Default::default()
})
})?
.take_payload()?,
)
} else {
None
};

let reordering: Option<DataPayload<crate::provider::CollationReorderingV1Marker>> =
if metadata.reordering() {
Some(provider.load(req)?.take_payload()?)
Some(
provider
.load(req)
.or_else(|_| {
provider.load(DataRequest {
langid: req.langid,
..Default::default()
})
})?
.take_payload()?,
)
} else {
None
};
Expand Down
4 changes: 0 additions & 4 deletions components/collator/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ fn data_ce_to_primary(data_ce: u64, c: char) -> u32 {
#[icu_provider::data_struct(marker(
CollationDataV1Marker,
"collator/data@1",
extension_key = "co",
fallback_by = "collation",
fallback_supplement = "collation"
))]
Expand Down Expand Up @@ -232,7 +231,6 @@ impl<'data> CollationDataV1<'data> {
#[icu_provider::data_struct(marker(
CollationDiacriticsV1Marker,
"collator/dia@1",
extension_key = "co",
fallback_by = "collation",
fallback_supplement = "collation",
))]
Expand Down Expand Up @@ -276,7 +274,6 @@ pub struct CollationJamoV1<'data> {
#[icu_provider::data_struct(marker(
CollationReorderingV1Marker,
"collator/reord@1",
extension_key = "co",
fallback_by = "collation",
fallback_supplement = "collation"
))]
Expand Down Expand Up @@ -366,7 +363,6 @@ impl<'data> CollationReorderingV1<'data> {
#[icu_provider::data_struct(marker(
CollationMetadataV1Marker,
"collator/meta@1",
extension_key = "co",
fallback_by = "collation",
fallback_supplement = "collation"
))]
Expand Down
Loading
Loading