Skip to content

Commit

Permalink
Merge branch 'main' into tiny
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Jul 1, 2024
2 parents a13b83b + a65c5f5 commit f2b687c
Show file tree
Hide file tree
Showing 13 changed files with 132 additions and 71 deletions.
4 changes: 2 additions & 2 deletions components/calendar/src/islamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2149,7 +2149,7 @@ mod test {
}
}

#[ignore]
#[ignore] // slow
#[test]
fn test_days_in_provided_year_observational() {
let calendar = IslamicObservational::new();
Expand Down Expand Up @@ -2179,7 +2179,7 @@ mod test {
);
}

#[ignore]
#[ignore] // slow
#[test]
fn test_days_in_provided_year_ummalqura() {
let calendar = IslamicUmmAlQura::new();
Expand Down
36 changes: 2 additions & 34 deletions components/collator/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -622,16 +622,12 @@ fn test_ja_chooon_kigoo() {
}
}

// TODO: This test should eventually test fallback
// TODO: Test Swedish and Chinese also, since they have unusual
// variant defaults. (But are currently not part of the test data.)
#[ignore]
#[test]
fn test_region_fallback() {
// There's no explicit fi-FI data.
let locale: Locale = "fi-u-co-standard".parse().unwrap();

// let locale = locale!("fi-FI").into();
let locale = locale!("fi-FI");

let collator = Collator::try_new(&locale.into(), CollatorOptions::new()).unwrap();
assert_eq!(collator.compare("ä", "z"), Ordering::Greater);
Expand Down Expand Up @@ -694,8 +690,8 @@ fn test_vi() {
}
}

#[ignore]
#[test]
// See DatagenProvider test_zh_non_baked for gb2312 and big5han tests
fn test_zh() {
// Note: ㄅ is Bopomofo.

Expand Down Expand Up @@ -735,20 +731,6 @@ fn test_zh() {
assert_eq!(collator.compare("佰", "ㄅ"), Ordering::Less);
assert_eq!(collator.compare("不", "把"), Ordering::Greater);
}
{
let locale: Locale = "zh-u-co-gb2312".parse().unwrap();
let collator = Collator::try_new(&locale.into(), CollatorOptions::new()).unwrap();
assert_eq!(collator.compare("艾", "a"), Ordering::Greater);
assert_eq!(collator.compare("佰", "a"), Ordering::Greater);
assert_eq!(collator.compare("ㄅ", "a"), Ordering::Greater);
assert_eq!(collator.compare("ㄅ", "ж"), Ordering::Greater);
assert_eq!(collator.compare("艾", "佰"), Ordering::Less);
// In GB2312 proper, Bopomofo comes before Han, but the
// collation leaves Bopomofo unreordered, so it comes after.
assert_eq!(collator.compare("艾", "ㄅ"), Ordering::Less);
assert_eq!(collator.compare("佰", "ㄅ"), Ordering::Less);
assert_eq!(collator.compare("不", "把"), Ordering::Greater);
}
{
let locale: Locale = "zh-u-co-stroke".parse().unwrap();
let collator = Collator::try_new(&locale.into(), CollatorOptions::new()).unwrap();
Expand Down Expand Up @@ -785,18 +767,6 @@ fn test_zh() {
assert_eq!(collator.compare("佰", "ㄅ"), Ordering::Less);
assert_eq!(collator.compare("不", "把"), Ordering::Less);
}
{
let locale: Locale = "zh-u-co-big5han".parse().unwrap();
let collator = Collator::try_new(&locale.into(), CollatorOptions::new()).unwrap();
assert_eq!(collator.compare("艾", "a"), Ordering::Greater);
assert_eq!(collator.compare("佰", "a"), Ordering::Greater);
assert_eq!(collator.compare("ㄅ", "a"), Ordering::Greater);
assert_eq!(collator.compare("ㄅ", "ж"), Ordering::Less);
assert_eq!(collator.compare("艾", "佰"), Ordering::Less);
assert_eq!(collator.compare("艾", "ㄅ"), Ordering::Less);
assert_eq!(collator.compare("佰", "ㄅ"), Ordering::Less);
assert_eq!(collator.compare("不", "把"), Ordering::Less);
}
// TODO: Test script and region aliases
}

Expand Down Expand Up @@ -1044,7 +1014,6 @@ fn test_tr_primary() {
}
}

#[ignore]
#[test]
fn test_lt_tertiary() {
let left = [
Expand Down Expand Up @@ -1079,7 +1048,6 @@ fn test_lt_tertiary() {
}
}

#[ignore]
#[test]
fn test_lt_primary() {
let left = ["ž"];
Expand Down
4 changes: 2 additions & 2 deletions components/datetime/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ macro_rules! size_test {
};
}
#[test]
#[cfg_attr(not(icu4x_run_size_tests), ignore)]
#[cfg_attr(not(icu4x_run_size_tests), ignore)] // Doesn't work on arbitrary Rust versions
fn $id() {
let size = core::mem::size_of::<$ty>();
let success = match option_env!("CI_TOOLCHAIN") {
Expand Down Expand Up @@ -67,7 +67,7 @@ macro_rules! size_test {
};
}
#[test]
#[cfg_attr(not(icu4x_run_size_tests), ignore)]
#[cfg_attr(not(icu4x_run_size_tests), ignore)] // Doesn't work on arbitrary Rust versions
fn $id() {
let size = core::mem::size_of::<$ty>();
let expected = $size;
Expand Down
10 changes: 9 additions & 1 deletion components/experimental/src/dimension/provider/pattern_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ impl AsULE for PatternKey {
PatternKey::Decimal(value) => {
let sign = if value < 0 { 0b0010_0000 } else { 0 };
debug_assert!(value > -32 && value < 32);
(0b01 << 6) | sign | (value as u8 & 0b0001_1111)
(0b01 << 6) | sign | (value.unsigned_abs() & 0b0001_1111)
}
PatternKey::Power { power, count } => {
let power_bits = {
Expand Down Expand Up @@ -230,6 +230,14 @@ fn test_pattern_key_ule() {
count: CompoundCount::Two,
}
);

let decimal_neg_1 = PatternKey::Decimal(-1);
let decimal_neg_1_ule = decimal_neg_1.to_unaligned();
assert_eq!(decimal_neg_1_ule.0, 0b0110_0001);

let decimal_neg_1 = PatternKey::from_unaligned(decimal_neg_1_ule);
assert_eq!(decimal_neg_1, PatternKey::Decimal(-1));

// Test invalid bytes
let unvalidated_bytes = [0b1100_0000];
assert_eq!(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ mod tests {
}

#[test]
#[ignore] // not clear from the spec whether this test is correct
fn test_multi_3_initial_should_still_only_be_2() -> Result<(), PersonNamesFormatterError> {
let mut person_data = LiteMap::new();
person_data.insert(
Expand All @@ -127,7 +126,9 @@ mod tests {
};
let result =
super::derive_missing_initials(&person_name, &requested_field, "{0}.", "{0} {1}");
assert_eq!(result, "M. J.");

// TODO(#3077): broken, this should be equal
assert_ne!(result, "M. J.");
Ok(())
}
}
2 changes: 1 addition & 1 deletion components/segmenter/tests/css_word_break.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ fn wordbreak_keepall() {
}

#[test]
#[cfg_attr(not(feature = "lstm"), ignore)]
#[cfg(feature = "lstm")]
fn wordbreak_keepall_lstm() {
// from css/css-text/word-break/word-break-keep-all-003.html
let s = "และและ";
Expand Down
25 changes: 12 additions & 13 deletions provider/baked/src/export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,12 @@ pub struct BakedExporter {
pretty: bool,
use_separate_crates: bool,
use_internal_fallback: bool,
// Temporary storage for put_payload: marker -> (bake -> {data id})
// Temporary storage for put_payload: marker -> (payload -> {data id})
data: Mutex<
HashMap<DataMarkerInfo, BTreeMap<SyncTokenStream, HashSet<DataIdentifierCow<'static>>>>,
HashMap<
DataMarkerInfo,
HashMap<DataPayload<ExportMarker>, HashSet<DataIdentifierCow<'static>>>,
>,
>,
/// (marker, file name) pairs to wire up in mod.rs. This is populated by `flush` and consumed by `close`.
impl_data: Mutex<BTreeMap<DataMarkerInfo, SyncTokenStream>>,
Expand Down Expand Up @@ -355,14 +358,12 @@ impl DataExporter for BakedExporter {
id: DataIdentifierBorrowed,
payload: &DataPayload<ExportMarker>,
) -> Result<(), DataError> {
let payload = payload.tokenize(&self.dependencies);
let payload = payload.to_string();
self.data
.lock()
.expect("poison")
.entry(marker)
.or_default()
.entry(payload)
.entry(payload.clone())
.or_default()
.insert(id.into_owned());
Ok(())
Expand Down Expand Up @@ -466,10 +467,8 @@ impl DataExporter for BakedExporter {

let ids_to_idents = deduplicated_values
.iter()
.flat_map(|(bake, ids)| {
let bake = bake.parse::<TokenStream>().unwrap();

let mut idents = ids
.flat_map(|(payload, ids)| {
let ident = ids
.iter()
.map(|id| {
format!("_{}_{}", id.marker_attributes.as_str(), id.locale)
Expand All @@ -483,11 +482,11 @@ impl DataExporter for BakedExporter {
})
.collect::<String>()
})
.collect::<Vec<_>>();
idents.sort();
let ident = proc_macro2::Ident::new(&idents[0], proc_macro2::Span::call_site());
.min()
.unwrap();
let ident = proc_macro2::Ident::new(&ident, proc_macro2::Span::call_site());

idents_to_bakes.push((ident.clone(), bake));
idents_to_bakes.push((ident.clone(), payload.tokenize(&self.dependencies)));
ids.iter().map(move |id| (id.clone(), ident.clone()))
})
.collect();
Expand Down
45 changes: 45 additions & 0 deletions provider/bikeshed/src/collator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,48 @@ collation_provider!(
),
toml_data
);

#[test]

fn test_zh_non_baked() {
use core::cmp::Ordering;
use icu::collator::{Collator, CollatorOptions};
use icu_provider_adapters::fallback::LocaleFallbackProvider;

let provider =
LocaleFallbackProvider::try_new_unstable(DatagenProvider::new_testing()).unwrap();

// Note: ㄅ is Bopomofo.
{
let locale = "zh-u-co-gb2312".parse().unwrap();
let collator =
Collator::try_new_unstable(&provider, &locale, CollatorOptions::new()).unwrap();
assert_eq!(collator.compare("艾", "a"), Ordering::Greater);
assert_eq!(collator.compare("佰", "a"), Ordering::Greater);
assert_eq!(collator.compare("ㄅ", "a"), Ordering::Greater);
assert_eq!(collator.compare("ㄅ", "ж"), Ordering::Greater);

// TODO(#5136): broken, these should be equal
assert_ne!(collator.compare("艾", "佰"), Ordering::Less);
// In GB2312 proper, Bopomofo comes before Han, but the
// collation leaves Bopomofo unreordered, so it comes after.
assert_ne!(collator.compare("艾", "ㄅ"), Ordering::Less);
assert_ne!(collator.compare("佰", "ㄅ"), Ordering::Less);
assert_ne!(collator.compare("不", "把"), Ordering::Greater);
}
{
let locale = "zh-u-co-big5han".parse().unwrap();
let collator =
Collator::try_new_unstable(&provider, &locale, CollatorOptions::new()).unwrap();
assert_eq!(collator.compare("艾", "a"), Ordering::Greater);
assert_eq!(collator.compare("佰", "a"), Ordering::Greater);
assert_eq!(collator.compare("ㄅ", "a"), Ordering::Greater);
assert_eq!(collator.compare("不", "把"), Ordering::Less);

// TODO(#5136): broken, these should be equal
assert_ne!(collator.compare("ㄅ", "ж"), Ordering::Less);
assert_ne!(collator.compare("艾", "佰"), Ordering::Less);
assert_ne!(collator.compare("艾", "ㄅ"), Ordering::Less);
assert_ne!(collator.compare("佰", "ㄅ"), Ordering::Less);
}
}
2 changes: 1 addition & 1 deletion provider/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ deserialize_bincode_1 = ["serde", "dep:bincode", "std"]
deserialize_postcard_1 = ["serde", "dep:postcard"]

# Dependencies for running data generation
export = ["serde", "dep:erased-serde", "dep:databake", "std", "sync"]
export = ["serde", "dep:erased-serde", "dep:databake", "std", "sync", "dep:postcard"]

[package.metadata.cargo-all-features]
denylist = ["macros"]
Expand Down
53 changes: 49 additions & 4 deletions provider/core/src/export/payload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use core::any::Any;

use crate::dynutil::UpcastDataPayload;
use crate::prelude::*;
use alloc::boxed::Box;
use alloc::sync::Arc;
use databake::{Bake, CrateEnv, TokenStream};
use yoke::trait_hack::YokeTraitHack;
use yoke::*;
Expand Down Expand Up @@ -62,9 +62,9 @@ where
}

#[doc(hidden)] // macro
#[derive(yoke::Yokeable)]
#[derive(yoke::Yokeable, Clone)]
pub struct ExportBox {
payload: Box<dyn ExportableDataPayload + Sync + Send>,
payload: Arc<dyn ExportableDataPayload + Sync + Send>,
}

impl PartialEq for ExportBox {
Expand All @@ -73,6 +73,8 @@ impl PartialEq for ExportBox {
}
}

impl Eq for ExportBox {}

impl core::fmt::Debug for ExportBox {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("ExportBox")
Expand All @@ -90,7 +92,7 @@ where
{
fn upcast(other: DataPayload<M>) -> DataPayload<ExportMarker> {
DataPayload::from_owned(ExportBox {
payload: Box::new(other),
payload: Arc::new(other),
})
}
}
Expand Down Expand Up @@ -165,6 +167,49 @@ impl DataPayload<ExportMarker> {
pub fn tokenize(&self, env: &CrateEnv) -> TokenStream {
self.get().payload.bake_yoke(env)
}

/// Returns the data size using postcard encoding
pub fn postcard_size(&self) -> usize {
use postcard::ser_flavors::{Flavor, Size};
let mut serializer = postcard::Serializer {
output: Size::default(),
};
let _infallible = self
.get()
.payload
.serialize_yoke(&mut <dyn erased_serde::Serializer>::erase(&mut serializer));

serializer.output.finalize().unwrap_or_default()
}
}

impl core::hash::Hash for DataPayload<ExportMarker> {
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
use postcard::ser_flavors::Flavor;

struct HashFlavor<'a, H>(&'a mut H);
impl<'a, H: core::hash::Hasher> Flavor for HashFlavor<'a, H> {
type Output = ();

fn try_push(&mut self, data: u8) -> postcard::Result<()> {
self.0.write_u8(data);
Ok(())
}

fn finalize(self) -> postcard::Result<Self::Output> {
Ok(())
}
}

let _infallible =
self.get()
.payload
.serialize_yoke(&mut <dyn erased_serde::Serializer>::erase(
&mut postcard::Serializer {
output: HashFlavor(state),
},
));
}
}

/// Marker type for [`ExportBox`].
Expand Down
2 changes: 1 addition & 1 deletion tools/bakeddata-scripts/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ edition = "2021"
[dependencies]
icu = { workspace = true, features = ["experimental"] }
icu_provider = { workspace = true }
icu_datagen = { workspace = true, features = ["baked_exporter", "fs_exporter", "rayon"] }
icu_datagen = { workspace = true, features = ["baked_exporter", "rayon"] }
icu_datagen_bikeshed = { workspace = true, features = ["networking", "experimental", "use_wasm"] }

log = { workspace = true }
Expand Down
Loading

0 comments on commit f2b687c

Please sign in to comment.