diff --git a/Cargo.lock b/Cargo.lock index 54eb86ed2e3..43cc082619b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1369,6 +1369,7 @@ dependencies = [ "iai", "icu", "icu_benchmark_macros", + "icu_provider", "litemap", "postcard", "potential_utf", diff --git a/components/experimental/src/duration/formatter.rs b/components/experimental/src/duration/formatter.rs index 4a19d5f2a50..aa633a1db35 100644 --- a/components/experimental/src/duration/formatter.rs +++ b/components/experimental/src/duration/formatter.rs @@ -147,7 +147,7 @@ impl DurationUnitFormatter { } } -impl From for icu_list::ListLength { +impl From for icu_list::ListFormatterOptions { fn from(style: BaseStyle) -> Self { // Section 1.1.13 // 1. Let lfOpts be OrdinaryObjectCreate(null). @@ -157,11 +157,12 @@ impl From for icu_list::ListLength { // a. Set listStyle to "short". // 5. Perform ! CreateDataPropertyOrThrow(lfOpts, "style", listStyle). // 6. Let lf be ! Construct(%ListFormat%, « durationFormat.[[Locale]], lfOpts »). - match style { + let length = match style { BaseStyle::Long => ListLength::Wide, BaseStyle::Short | BaseStyle::Digital => ListLength::Short, BaseStyle::Narrow => ListLength::Narrow, - } + }; + Self::default().with_length(length) } } @@ -194,11 +195,12 @@ impl DurationFormatter { })? .payload; + let temp_loc = locale.clone().into_locale(); Ok(Self { digital, options, unit: DurationUnitFormatter::try_new(locale, options)?, - list: ListFormatter::try_new_unit_with_length(locale, options.base.into())?, + list: ListFormatter::try_new_unit(temp_loc.into(), options.base.into())?, fdf: FixedDecimalFormatter::try_new(locale, Default::default())?, }) } @@ -223,13 +225,14 @@ impl DurationFormatter { })? .payload; + let temp_loc = locale.clone().into_locale(); Ok(Self { digital, options, unit: DurationUnitFormatter::try_new_unstable(provider, locale, options)?, - list: ListFormatter::try_new_unit_with_length_unstable( + list: ListFormatter::try_new_unit_unstable( provider, - locale, + temp_loc.into(), options.base.into(), )?, fdf: FixedDecimalFormatter::try_new_unstable(provider, locale, Default::default())?, diff --git a/components/list/README.md b/components/list/README.md index a0b4680f27c..f7645ee8299 100644 --- a/components/list/README.md +++ b/components/list/README.md @@ -12,9 +12,10 @@ and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latte ### Formatting *and* lists in Spanish ```rust -let list_formatter = ListFormatter::try_new_and_with_length( - &locale!("es").into(), - ListLength::Wide, +let list_formatter = ListFormatter::try_new_and( + locale!("es").into(), + ListFormatterOptions::default() + .with_length(ListLength::Wide) ) .expect("locale should be present"); @@ -33,9 +34,10 @@ assert_writeable_eq!( ### Formatting *or* lists in Thai ```rust -let list_formatter = ListFormatter::try_new_or_with_length( - &locale!("th").into(), - ListLength::Short, +let list_formatter = ListFormatter::try_new_or( + locale!("th").into(), + ListFormatterOptions::default() + .with_length(ListLength::Short) ) .expect("locale should be present"); @@ -46,9 +48,10 @@ assert_writeable_eq!(list_formatter.format(1..=3), "1, 2 หรือ 3",); ### Formatting unit lists in English ```rust -let list_formatter = ListFormatter::try_new_unit_with_length( - &locale!("en").into(), - ListLength::Wide, +let list_formatter = ListFormatter::try_new_unit( + locale!("en").into(), + ListFormatterOptions::default() + .with_length(ListLength::Wide) ) .expect("locale should be present"); diff --git a/components/list/examples/and_list.rs b/components/list/examples/and_list.rs index 2f9bbcfd0da..3722b0a065a 100644 --- a/components/list/examples/and_list.rs +++ b/components/list/examples/and_list.rs @@ -6,12 +6,15 @@ icu_benchmark_macros::instrument!(); use icu_benchmark_macros::println; -use icu::list::{ListFormatter, ListLength}; +use icu::list::{ListFormatter, ListFormatterOptions, ListLength}; use icu::locale::locale; fn main() { - let list_formatter = - ListFormatter::try_new_and_with_length(&locale!("es").into(), ListLength::Wide).unwrap(); + let list_formatter = ListFormatter::try_new_and( + locale!("es").into(), + ListFormatterOptions::default().with_length(ListLength::Wide), + ) + .unwrap(); println!( "{}", diff --git a/components/list/src/lib.rs b/components/list/src/lib.rs index b57e302a7d7..69969dc0a52 100644 --- a/components/list/src/lib.rs +++ b/components/list/src/lib.rs @@ -12,13 +12,14 @@ //! ## Formatting *and* lists in Spanish //! //! ``` -//! # use icu::list::{ListFormatter, ListLength}; +//! # use icu::list::{ListFormatter, ListFormatterOptions, ListLength}; //! # use icu::locale::locale; //! # use writeable::*; //! # -//! let list_formatter = ListFormatter::try_new_and_with_length( -//! &locale!("es").into(), -//! ListLength::Wide, +//! let list_formatter = ListFormatter::try_new_and( +//! locale!("es").into(), +//! ListFormatterOptions::default() +//! .with_length(ListLength::Wide) //! ) //! .expect("locale should be present"); //! @@ -37,13 +38,14 @@ //! ## Formatting *or* lists in Thai //! //! ``` -//! # use icu::list::{ListFormatter, ListLength}; +//! # use icu::list::{ListFormatter, ListFormatterOptions, ListLength}; //! # use icu::locale::locale; //! # use writeable::*; //! # -//! let list_formatter = ListFormatter::try_new_or_with_length( -//! &locale!("th").into(), -//! ListLength::Short, +//! let list_formatter = ListFormatter::try_new_or( +//! locale!("th").into(), +//! ListFormatterOptions::default() +//! .with_length(ListLength::Short) //! ) //! .expect("locale should be present"); //! @@ -54,13 +56,14 @@ //! ## Formatting unit lists in English //! //! ``` -//! # use icu::list::{ListFormatter, ListLength}; +//! # use icu::list::{ListFormatter, ListFormatterOptions, ListLength}; //! # use icu::locale::locale; //! # use writeable::*; //! # -//! let list_formatter = ListFormatter::try_new_unit_with_length( -//! &locale!("en").into(), -//! ListLength::Wide, +//! let list_formatter = ListFormatter::try_new_unit( +//! locale!("en").into(), +//! ListFormatterOptions::default() +//! .with_length(ListLength::Wide) //! ) //! .expect("locale should be present"); //! @@ -91,25 +94,10 @@ extern crate alloc; mod lazy_automaton; mod list_formatter; +mod options; mod patterns; pub mod provider; pub use list_formatter::*; - -/// Represents the style of a list. See the -/// [CLDR spec](https://unicode.org/reports/tr35/tr35-general.html#ListPatterns) -/// for an explanation of the different styles. -#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Default)] -#[non_exhaustive] -pub enum ListLength { - /// A typical list - #[default] - Wide, - /// A shorter list - Short, - /// The shortest type of list - Narrow, - // *Important*: When adding a variant here, make sure the code in - // ListFormatterPatterns::{start, middle, end, pair} stays panic-free! -} +pub use options::*; diff --git a/components/list/src/list_formatter.rs b/components/list/src/list_formatter.rs index b344b3ecb41..afb0dc0a98b 100644 --- a/components/list/src/list_formatter.rs +++ b/components/list/src/list_formatter.rs @@ -3,8 +3,9 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::provider::*; -use crate::ListLength; +use crate::{ListFormatterOptions, ListLength}; use core::fmt::{self, Write}; +use icu_locale_core::preferences::define_preferences; use icu_provider::marker::ErasedMarker; use icu_provider::prelude::*; use writeable::*; @@ -12,6 +13,12 @@ use writeable::*; #[cfg(doc)] extern crate writeable; +define_preferences!( + /// The preferences for list formatting. + ListFormatterPreferences, + {} +); + /// A formatter that renders sequences of items in an i18n-friendly way. See the /// [crate-level documentation](crate) for more details. #[derive(Debug)] @@ -22,7 +29,7 @@ pub struct ListFormatter { macro_rules! constructor { ($name: ident, $name_any: ident, $name_buffer: ident, $name_unstable: ident, $marker: ty, $doc: literal) => { icu_provider::gen_any_buffer_data_constructors!( - (locale, style: ListLength) -> error: DataError, + (prefs: ListFormatterPreferences, options: ListFormatterOptions) -> error: DataError, #[doc = concat!("Creates a new [`ListFormatter`] that produces a ", $doc, "-type list using compiled data.")] /// /// See the [CLDR spec](https://unicode.org/reports/tr35/tr35-general.html#ListPatterns) for @@ -43,18 +50,20 @@ macro_rules! constructor { #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::$name)] pub fn $name_unstable( provider: &(impl DataProvider<$marker> + ?Sized), - locale: &DataLocale, - length: ListLength, + prefs: ListFormatterPreferences, + options: ListFormatterOptions, ) -> Result { + let length = match options.length.unwrap_or_default() { + ListLength::Narrow => ListFormatterPatternsV2::NARROW, + ListLength::Short => ListFormatterPatternsV2::SHORT, + ListLength::Wide => ListFormatterPatternsV2::WIDE, + }; + let locale = get_data_locale_from_prefs(prefs); let data = provider .load(DataRequest { id: DataIdentifierBorrowed::for_marker_attributes_and_locale( - match length { - ListLength::Narrow => ListFormatterPatternsV2::NARROW, - ListLength::Short => ListFormatterPatternsV2::SHORT, - ListLength::Wide => ListFormatterPatternsV2::WIDE, - }, - locale), + length, + &locale), ..Default::default() })? .payload @@ -64,28 +73,39 @@ macro_rules! constructor { }; } +fn get_data_locale_from_prefs(prefs: ListFormatterPreferences) -> DataLocale { + // TODO(#5764): This should utilize region source priority. + DataLocale::from_subtags( + prefs.locale_prefs.language, + prefs.locale_prefs.script, + prefs.locale_prefs.region, + prefs.locale_prefs.variant, + prefs.locale_prefs.subdivision, + ) +} + impl ListFormatter { constructor!( - try_new_and_with_length, - try_new_and_with_length_with_any_provider, - try_new_and_with_length_with_buffer_provider, - try_new_and_with_length_unstable, + try_new_and, + try_new_and_with_any_provider, + try_new_and_with_buffer_provider, + try_new_and_unstable, AndListV2Marker, "and" ); constructor!( - try_new_or_with_length, - try_new_or_with_length_with_any_provider, - try_new_or_with_length_with_buffer_provider, - try_new_or_with_length_unstable, + try_new_or, + try_new_or_with_any_provider, + try_new_or_with_buffer_provider, + try_new_or_unstable, OrListV2Marker, "or" ); constructor!( - try_new_unit_with_length, - try_new_unit_with_length_with_any_provider, - try_new_unit_with_length_with_buffer_provider, - try_new_unit_with_length_unstable, + try_new_unit, + try_new_unit_with_any_provider, + try_new_unit_with_buffer_provider, + try_new_unit_unstable, UnitListV2Marker, "unit" ); @@ -102,9 +122,10 @@ impl ListFormatter { /// use icu::list::*; /// # use icu::locale::locale; /// # use writeable::*; - /// let formatteur = ListFormatter::try_new_and_with_length( - /// &locale!("fr").into(), - /// ListLength::Wide, + /// let formatteur = ListFormatter::try_new_and( + /// locale!("fr").into(), + /// ListFormatterOptions::default() + /// .with_length(ListLength::Wide) /// ) /// .unwrap(); /// let pays = ["Italie", "France", "Espagne", "Allemagne"]; @@ -356,8 +377,8 @@ mod tests { macro_rules! test { ($locale:literal, $type:ident, $(($input:expr, $output:literal),)+) => { let f = ListFormatter::$type( - &icu::locale::locale!($locale).into(), - ListLength::Wide + icu::locale::locale!($locale).into(), + Default::default(), ).unwrap(); $( assert_writeable_eq!(f.format($input.iter()), $output); @@ -367,14 +388,14 @@ mod tests { #[test] fn test_basic() { - test!("fr", try_new_or_with_length, (["A", "B"], "A ou B"),); + test!("fr", try_new_or, (["A", "B"], "A ou B"),); } #[test] fn test_spanish() { test!( "es", - try_new_and_with_length, + try_new_and, (["x", "Mallorca"], "x y Mallorca"), (["x", "Ibiza"], "x e Ibiza"), (["x", "Hidalgo"], "x e Hidalgo"), @@ -383,7 +404,7 @@ mod tests { test!( "es", - try_new_or_with_length, + try_new_or, (["x", "Ibiza"], "x o Ibiza"), (["x", "Okinawa"], "x u Okinawa"), (["x", "8 más"], "x u 8 más"), @@ -400,18 +421,14 @@ mod tests { (["x", "11.000,92"], "x u 11.000,92"), ); - test!( - "es-AR", - try_new_and_with_length, - (["x", "Ibiza"], "x e Ibiza"), - ); + test!("es-AR", try_new_and, (["x", "Ibiza"], "x e Ibiza"),); } #[test] fn test_hebrew() { test!( "he", - try_new_and_with_length, + try_new_and, (["x", "יפו"], "x ויפו"), (["x", "Ibiza"], "x ו‑Ibiza"), ); diff --git a/components/list/src/options.rs b/components/list/src/options.rs new file mode 100644 index 00000000000..03132457a84 --- /dev/null +++ b/components/list/src/options.rs @@ -0,0 +1,55 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +/// A list of options set by the developer to adjust the behavior of the ListFormatter. +/// +/// # Examples +/// ``` +/// use icu::list::{ListFormatterOptions, ListLength}; +/// +/// let options = ListFormatterOptions::default() +/// .with_length(ListLength::Wide); +/// ``` +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[non_exhaustive] +pub struct ListFormatterOptions { + /// The length variant should reflect available space for the list. + pub length: Option, +} + +impl Default for ListFormatterOptions { + fn default() -> Self { + Self::default() + } +} + +impl ListFormatterOptions { + /// Constructs a new [`ListFormatterOptions`] struct. + pub const fn default() -> Self { + Self { length: None } + } + + /// Auguments the struct with the set [`ListLength`]. + pub const fn with_length(mut self, length: ListLength) -> Self { + self.length = Some(length); + self + } +} + +/// Represents the style of a list. See the +/// [CLDR spec](https://unicode.org/reports/tr35/tr35-general.html#ListPatterns) +/// for an explanation of the different styles. +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, Default)] +#[non_exhaustive] +pub enum ListLength { + /// A typical list + #[default] + Wide, + /// A shorter list + Short, + /// The shortest type of list + Narrow, + // *Important*: When adding a variant here, make sure the code in + // ListFormatterPatterns::{start, middle, end, pair} stays panic-free! +} diff --git a/components/locale_core/Cargo.toml b/components/locale_core/Cargo.toml index 596eb74d440..a1f3b9af6da 100644 --- a/components/locale_core/Cargo.toml +++ b/components/locale_core/Cargo.toml @@ -25,15 +25,16 @@ litemap = { workspace = true, features = ["alloc"] } tinystr = { workspace = true, features = ["alloc"] } writeable = { workspace = true } -databake = { workspace = true, features = ["derive"], optional = true} +databake = { workspace = true, features = ["derive"], optional = true } serde = { workspace = true, features = ["alloc", "derive"], optional = true } zerovec = { workspace = true, optional = true } [dev-dependencies] iai = { workspace = true } icu = { path = "../../components/icu", default-features = false } +icu_provider = { workspace = true } icu_benchmark_macros = { path = "../../tools/benchmark/macros" } -litemap = { path = "../../utils/litemap", features = ["testing"]} +litemap = { path = "../../utils/litemap", features = ["testing"] } postcard = { workspace = true, features = ["use-std"] } potential_utf = { workspace = true } serde = { workspace = true, features = ["derive"] } @@ -51,7 +52,7 @@ zerovec = ["dep:zerovec", "tinystr/zerovec"] bench = ["serde"] [lib] -bench = false # This option is required for Benchmark CI +bench = false # This option is required for Benchmark CI [package.metadata.cargo-all-features] # Bench feature gets tested separately and is only relevant for CI diff --git a/components/locale_core/README.md b/components/locale_core/README.md index 919ad619264..f450134435a 100644 --- a/components/locale_core/README.md +++ b/components/locale_core/README.md @@ -8,7 +8,9 @@ This module is published as its own crate ([`icu_locale_core`](https://docs.rs/i and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. The module provides algorithms for parsing a string into a well-formed language or locale identifier -as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]. +as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]. Additionally +the module provides [`preferences`] interface for operations on locale preferences and conversions +from and to locale unicode extensions. [`Locale`] is the most common structure to use for storing information about a language, script, region, variants and extensions. In almost all cases, this struct should be used as the diff --git a/components/locale_core/src/extensions/mod.rs b/components/locale_core/src/extensions/mod.rs index ef99e1365db..e94b8f6b378 100644 --- a/components/locale_core/src/extensions/mod.rs +++ b/components/locale_core/src/extensions/mod.rs @@ -37,6 +37,14 @@ //! assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value)); //! ``` //! +//! # Syntactic vs Semantic Extension Handling +//! +//! This module is useful when you need to work with Locale extensions at a syntactic level, +//! perhaps for parsing or generating locale identifiers that include any syntactically valid +//! extensions. +//! For handling and validating known CLDR values with semantic meaning, see the +//! [`crate::preferences::extensions`] module. +//! //! [`LanguageIdentifier`]: super::LanguageIdentifier //! [`Locale`]: super::Locale //! [`subtags`]: super::subtags diff --git a/components/locale_core/src/extensions/unicode/keywords.rs b/components/locale_core/src/extensions/unicode/keywords.rs index a62e60c57da..7e6a354ab1e 100644 --- a/components/locale_core/src/extensions/unicode/keywords.rs +++ b/components/locale_core/src/extensions/unicode/keywords.rs @@ -237,7 +237,7 @@ impl Keywords { /// /// Returns the old Unicode extension keywords. /// - /// # Example + /// # Examples /// /// ``` /// use icu::locale::Locale; diff --git a/components/locale_core/src/extensions/unicode/subdivision.rs b/components/locale_core/src/extensions/unicode/subdivision.rs index 5fc32d8f140..bbd222f7f44 100644 --- a/components/locale_core/src/extensions/unicode/subdivision.rs +++ b/components/locale_core/src/extensions/unicode/subdivision.rs @@ -5,7 +5,7 @@ use core::str::FromStr; use crate::parser::ParseError; -use crate::subtags::Region; +use crate::subtags::{Region, Subtag}; impl_tinystr_subtag!( /// A subdivision suffix used in [`SubdivisionId`]. @@ -131,6 +131,12 @@ impl SubdivisionId { let suffix = SubdivisionSuffix::try_from_utf8(suffix_code_units)?; Ok(Self { region, suffix }) } + + /// Convert to [`Subtag`] + pub fn into_subtag(self) -> Subtag { + let result = self.region.to_tinystr().concat(self.suffix.to_tinystr()); + Subtag::from_tinystr_unvalidated(result) + } } impl writeable::Writeable for SubdivisionId { diff --git a/components/locale_core/src/lib.rs b/components/locale_core/src/lib.rs index 372a26e8f30..6344ec4a134 100644 --- a/components/locale_core/src/lib.rs +++ b/components/locale_core/src/lib.rs @@ -8,7 +8,9 @@ //! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. //! //! The module provides algorithms for parsing a string into a well-formed language or locale identifier -//! as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]. +//! as defined by [`UTS #35: Unicode LDML 3. Unicode Language and Locale Identifiers`]. Additionally +//! the module provides [`preferences`] interface for operations on locale preferences and conversions +//! from and to locale unicode extensions. //! //! [`Locale`] is the most common structure to use for storing information about a language, //! script, region, variants and extensions. In almost all cases, this struct should be used as the diff --git a/components/locale_core/src/parser/langid.rs b/components/locale_core/src/parser/langid.rs index 92ce6f9159b..68f77ca6274 100644 --- a/components/locale_core/src/parser/langid.rs +++ b/components/locale_core/src/parser/langid.rs @@ -231,7 +231,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f } else { break; } - iter = iter.next_const().0 + iter = iter.next_const().0; } if let Some(k) = key { keyword = Some((k, current_type)); diff --git a/components/locale_core/src/preferences/extensions/mod.rs b/components/locale_core/src/preferences/extensions/mod.rs index 1de18efadc3..0869820249b 100644 --- a/components/locale_core/src/preferences/extensions/mod.rs +++ b/components/locale_core/src/preferences/extensions/mod.rs @@ -2,6 +2,22 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -//! TODO +//! A set of extensions which correspond to preferences. +//! +//! The module provides structures that represent known values for each keyword +//! in Locale [`extensions`](crate::extensions) with semantic meaning. +//! +//! # Syntactic vs Semantic Extension Handling +//! +//! This module ensures that only valid, recognized values are used, providing semantic validation. +//! It would reject invalid values such as `-u-hc-BB` because `BB` is not a known hour cycle. This +//! is ideal for applications that require strict adherence to standardized values and need to +//! prevent invalid or unrecognized data. +//! +//! If you need to construct syntactically valid Locale extensions without semantic validation, +//! allowing any valid key-value pair regardless of recognition, consider using the +//! [`crate::extensions`] module. +//! +//! [`Locale`]: crate::Locale pub mod unicode; diff --git a/components/locale_core/src/preferences/extensions/unicode/errors.rs b/components/locale_core/src/preferences/extensions/unicode/errors.rs index a44da5f4c86..5454d50c56c 100644 --- a/components/locale_core/src/preferences/extensions/unicode/errors.rs +++ b/components/locale_core/src/preferences/extensions/unicode/errors.rs @@ -2,14 +2,12 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -//! TODO +//! Errors related to parsing of Preferences. +/// Error returned by parsers of unicode extensions as preferences. #[non_exhaustive] #[derive(Debug)] -/// TODO pub enum PreferencesParseError { - /// TODO - UnknownKeyword, - /// TODO + /// The given keyword value is not a valid preference variant. InvalidKeywordValue, } diff --git a/components/locale_core/src/preferences/extensions/unicode/keywords/calendar.rs b/components/locale_core/src/preferences/extensions/unicode/keywords/calendar.rs index 2273bfb43b7..dac4b30358f 100644 --- a/components/locale_core/src/preferences/extensions/unicode/keywords/calendar.rs +++ b/components/locale_core/src/preferences/extensions/unicode/keywords/calendar.rs @@ -6,36 +6,65 @@ use crate::preferences::extensions::unicode::enum_keyword; -// https://github.com/unicode-org/cldr/blob/main/common/bcp47/calendar.xml enum_keyword!( - /// TODO + /// Islamic Calendar sub-type + /// + /// The list is based on [`CLDR Calendars`](https://github.com/unicode-org/cldr/blob/main/common/bcp47/calendar.xml) IslamicCalendarAlgorithm { - "umalqura" => Umalqura, - "tbla" => Tbla, - "civil" => Civil, - "rgsa" => Rgsa + /// Islamic calendar, Umm al-Qura + ("umalqura" => Umalqura), + /// Hijri calendar, tabular (intercalary years \[2,5,7,10,13,16,18,21,24,26,29] - astronomical epoch) + ("tbla" => Tbla), + /// Islamic calendar, tabular (intercalary years \[2,5,7,10,13,16,18,21,24,26,29] - civil epoch) + ("civil" => Civil), + /// Hijri calendar, Saudi Arabia sighting + ("rgsa" => Rgsa) }); enum_keyword!( - /// TODO + /// A Unicode Calendar Identifier defines a type of calendar. + /// + /// This selects calendar-specific data within a locale used for formatting and parsing, + /// such as date/time symbols and patterns; it also selects supplemental calendarData used + /// for calendrical calculations. The value can affect the computation of the first day of the week. + /// + /// The valid values are listed in [LDML](https://unicode.org/reports/tr35/#UnicodeCalendarIdentifier). CalendarAlgorithm { - "buddhist" => Buddhist, - "chinese" => Chinese, - "coptic" => Coptic, - "dangi" => Dangi, - "ethioaa" => Ethioaa, - "ethiopic" => Ethiopic, - "gregory" => Gregory, - "hebrew" => Hebrew, - "indian" => Indian, - "islamic" => Islamic(IslamicCalendarAlgorithm) { - "umalqura" => Umalqura, - "tbla" => Tbla, - "civil" => Civil, - "rgsa" => Rgsa - }, - "iso8601" => Iso8601, - "japanese" => Japanese, - "persian" => Persian, - "roc" => Roc + /// Thai Buddhist calendar (same as Gregorian except for the year) + ("buddhist" => Buddhist), + /// Traditional Chinese calendar + ("chinese" => Chinese), + /// Coptic calendar + ("coptic" => Coptic), + /// Traditional Korean calendar + ("dangi" => Dangi), + /// Ethiopic calendar, Amete Alem (epoch approx. 5493 B.C.E) + ("ethioaa" => Ethioaa), + /// Ethiopic calendar, Amete Mihret (epoch approx, 8 C.E.) + ("ethiopic" => Ethiopic), + /// Gregorian calendar + ("gregory" => Gregory), + /// Traditional Hebrew calendar + ("hebrew" => Hebrew), + /// Indian calendar + ("indian" => Indian), + /// Islamic calendar + ("islamic" => Islamic(IslamicCalendarAlgorithm) { + /// Islamic calendar, Umm al-Qura + ("umalqura" => Umalqura), + /// Hijri calendar, tabular (intercalary years \[2,5,7,10,13,16,18,21,24,26,29] - astronomical epoch) + ("tbla" => Tbla), + /// Islamic calendar, tabular (intercalary years \[2,5,7,10,13,16,18,21,24,26,29] - civil epoch) + ("civil" => Civil), + /// Hijri calendar, Saudi Arabia sighting + ("rgsa" => Rgsa) + }), + /// ISO calendar (Gregorian calendar using the ISO 8601 calendar week rules) + ("iso8601" => Iso8601), + /// Japanese Imperial calendar + ("japanese" => Japanese), + /// Persian calendar + ("persian" => Persian), + /// Republic of China calendar + ("roc" => Roc) }, "ca"); diff --git a/components/locale_core/src/preferences/extensions/unicode/keywords/collation.rs b/components/locale_core/src/preferences/extensions/unicode/keywords/collation.rs index a1942009b2a..4a23c2f24c9 100644 --- a/components/locale_core/src/preferences/extensions/unicode/keywords/collation.rs +++ b/components/locale_core/src/preferences/extensions/unicode/keywords/collation.rs @@ -5,25 +5,38 @@ use crate::preferences::extensions::unicode::enum_keyword; enum_keyword!( - /// TODO + /// A Unicode Collation Identifier defines a type of collation (sort order). + /// + /// The valid values are listed in [LDML](https://unicode.org/reports/tr35/#UnicodeCollationIdentifier). CollationType { - "big5han" => Big5han, - "compat" => Compat, - "dict" => Dict, - "direct" => Direct, - "ducet" => Ducet, - "emoji" => Emoji, - "eor" => Eor, - "gb2312" => Gb2312, - "phonebk" => Phonebk, - "phonetic" => Phonetic, - "pinyin" => Pinyin, - "reformed" => Reformed, - "search" => Search, - "searchjl" => Searchjl, - "standard" => Standard, - "stroke" => Stroke, - "trad" => Trad, - "unihan" => Unihan, - "zhuyin" => Zhuyin, + /// A previous version of the ordering, for compatibility + ("compat" => Compat), + /// Dictionary style ordering (such as in Sinhala) + ("dict" => Dict), + /// The default Unicode collation element table order + ("ducet" => Ducet), + /// Recommended ordering for emoji characters + ("emoji" => Emoji), + /// European ordering rules + ("eor" => Eor), + /// Phonebook style ordering (such as in German) + ("phonebk" => Phonebk), + /// Phonetic ordering (sorting based on pronunciation) + ("phonetic" => Phonetic), + /// Pinyin ordering for Latin and for CJK characters (used in Chinese) + ("pinyin" => Pinyin), + /// Special collation type for string search + ("search" => Search), + /// Special collation type for Korean initial consonant search + ("searchjl" => Searchjl), + /// Default ordering for each language + ("standard" => Standard), + /// Pinyin ordering for Latin, stroke order for CJK characters (used in Chinese) + ("stroke" => Stroke), + /// Traditional style ordering (such as in Spanish) + ("trad" => Trad), + /// Pinyin ordering for Latin, Unihan radical-stroke ordering for CJK characters (used in Chinese) + ("unihan" => Unihan), + /// Pinyin ordering for Latin, zhuyin order for Bopomofo and CJK characters (used in Chinese) + ("zhuyin" => Zhuyin), }, "co"); diff --git a/components/locale_core/src/preferences/extensions/unicode/keywords/currency.rs b/components/locale_core/src/preferences/extensions/unicode/keywords/currency.rs index 917366f335c..38f210a300e 100644 --- a/components/locale_core/src/preferences/extensions/unicode/keywords/currency.rs +++ b/components/locale_core/src/preferences/extensions/unicode/keywords/currency.rs @@ -8,7 +8,9 @@ use crate::{extensions::unicode::Value, subtags::Subtag}; use tinystr::TinyAsciiStr; struct_keyword!( - /// TODO + /// A Unicode Currency Identifier defines a type of currency. + /// + /// The valid values are listed in [LDML](https://unicode.org/reports/tr35/#UnicodeCurrencyIdentifier). CurrencyType, "cu", TinyAsciiStr<3>, diff --git a/components/locale_core/src/preferences/extensions/unicode/keywords/currency_format.rs b/components/locale_core/src/preferences/extensions/unicode/keywords/currency_format.rs index 489366d7cc5..259941c9c3e 100644 --- a/components/locale_core/src/preferences/extensions/unicode/keywords/currency_format.rs +++ b/components/locale_core/src/preferences/extensions/unicode/keywords/currency_format.rs @@ -5,8 +5,12 @@ use crate::preferences::extensions::unicode::enum_keyword; enum_keyword!( - /// TODO + /// A Unicode Currency Format Identifier defines a style for currency formatting. + /// + /// The valid values are listed in [LDML](https://unicode.org/reports/tr35/#UnicodeCurrencyFormatIdentifier). CurrencyFormatStyle { - "standard" => Standard, - "account" => Account + /// Negative numbers use the minusSign symbol (the default) + ("standard" => Standard), + /// Negative numbers use parentheses or equivalent + ("account" => Account) }, "cf"); diff --git a/components/locale_core/src/preferences/extensions/unicode/keywords/dictionary_break.rs b/components/locale_core/src/preferences/extensions/unicode/keywords/dictionary_break.rs index 2c266507bc1..c280808f939 100644 --- a/components/locale_core/src/preferences/extensions/unicode/keywords/dictionary_break.rs +++ b/components/locale_core/src/preferences/extensions/unicode/keywords/dictionary_break.rs @@ -10,7 +10,10 @@ use alloc::vec::Vec; use core::str::FromStr; struct_keyword!( - /// TODO + /// A Unicode Dictionary Break Exclusion Identifier specifies + /// scripts to be excluded from dictionary-based text break (for words and lines). + /// + /// The valid values are of one or more items of type [`Script`](crate::subtags::Script). DictionaryBreakScriptExclusions, "dx", Vec