diff --git a/components/experimental/src/personnames/specifications/derive_locale.rs b/components/experimental/src/personnames/specifications/derive_locale.rs index 0499eed65f4..d4d9183a1bb 100644 --- a/components/experimental/src/personnames/specifications/derive_locale.rs +++ b/components/experimental/src/personnames/specifications/derive_locale.rs @@ -112,12 +112,12 @@ mod tests { let mut locale = locale!("fr"); lc.maximize(&mut locale.id); assert_eq!( - effective_locale(&locale!("de_Latn_ch"), &locale), - Ok(&locale!("de_Latn_ch")) + effective_locale(&locale!("de-Latn-ch"), &locale), + Ok(&locale!("de-Latn-ch")) ); assert_eq!( - effective_locale(&locale, &locale!("de_Latn_ch")), - Ok(&locale!("fr_Latn_FR")) + effective_locale(&locale, &locale!("de-Latn-ch")), + Ok(&locale!("fr-Latn-FR")) ); } @@ -127,11 +127,11 @@ mod tests { let mut locale = locale!("ja"); lc.maximize(&mut locale.id); assert_eq!( - effective_locale(&locale!("de_Latn_ch"), &locale), + effective_locale(&locale!("de-Latn-ch"), &locale), Ok(&locale!("ja-Jpan-JP")) ); assert_eq!( - effective_locale(&locale, &locale!("de_Latn_ch")), + effective_locale(&locale, &locale!("de-Latn-ch")), Ok(&locale!("de-Latn-CH")) ); } @@ -142,27 +142,27 @@ mod tests { let mut locale = locale!("ja"); lc.maximize(&mut locale.id); assert_eq!( - effective_locale(&locale!("ja_Hani_JP"), &locale), - Ok(&locale!("ja_Hani_JP")) + effective_locale(&locale!("ja-Hani-JP"), &locale), + Ok(&locale!("ja-Hani-JP")) ); assert_eq!( - effective_locale(&locale!("ja_Kana_JP"), &locale), + effective_locale(&locale!("ja-Kana-JP"), &locale), Ok(&locale!("ja-Kana-JP")) ); assert_eq!( - effective_locale(&locale!("ja_Hira_JP"), &locale), + effective_locale(&locale!("ja-Hira-JP"), &locale), Ok(&locale!("ja-Hira-JP")) ); assert_eq!( - effective_locale(&locale, &locale!("ja_Hani_JP")), + effective_locale(&locale, &locale!("ja-Hani-JP")), Ok(&locale!("ja-Jpan-JP")) ); assert_eq!( - effective_locale(&locale, &locale!("ja_Kana_JP")), + effective_locale(&locale, &locale!("ja-Kana-JP")), Ok(&locale!("ja-Jpan-JP")) ); assert_eq!( - effective_locale(&locale, &locale!("ja_Hira_JP")), + effective_locale(&locale, &locale!("ja-Hira-JP")), Ok(&locale!("ja-Jpan-JP")) ); } @@ -173,15 +173,15 @@ mod tests { let scripts = icu_properties::PropertyNamesShort::::new(); assert_eq!( likely_person_name_locale(&person_name("Miyazaki", "Hayao").unwrap(), swe, scripts), - Ok(locale!("und_Latn")) + Ok(locale!("und-Latn")) ); assert_eq!( likely_person_name_locale(&person_name("駿", "宮崎").unwrap(), swe, scripts), - Ok(locale!("und_Hani")) + Ok(locale!("und-Hani")) ); assert_eq!( likely_person_name_locale(&person_name("하야오", "미야자키").unwrap(), swe, scripts), - Ok(locale!("und_Hang")) + Ok(locale!("und-Hang")) ); assert_eq!( likely_person_name_locale( @@ -189,7 +189,7 @@ mod tests { swe, scripts ), - Ok(locale!("und_Kana")) + Ok(locale!("und-Kana")) ); } diff --git a/components/experimental/src/personnames/specifications/derive_name_order.rs b/components/experimental/src/personnames/specifications/derive_name_order.rs index 236653f7bfc..e79fa9b759d 100644 --- a/components/experimental/src/personnames/specifications/derive_name_order.rs +++ b/components/experimental/src/personnames/specifications/derive_name_order.rs @@ -69,7 +69,7 @@ mod tests { // Match "und" assert_eq!( name_order_derive( - &locale!("de_Latn_ch"), + &locale!("de-Latn-ch"), &surname_first, &given_first, fallbacker @@ -84,7 +84,7 @@ mod tests { // since "und" is a catch all set in given first, it is a perfect match. assert_eq!( name_order_derive( - &locale!("ja_Jpan_jp"), + &locale!("ja-Jpan-jp"), &surname_first, &given_first, fallbacker @@ -104,7 +104,7 @@ mod tests { assert_eq!( name_order_derive( - &locale!("en_Latn_SG"), + &locale!("en-Latn-SG"), &surname_first, &given_first, fallbacker @@ -116,7 +116,7 @@ mod tests { // This is not matching because of zh, but because of und-CN assert_eq!( name_order_derive( - &locale!("zh_Hans_CN"), + &locale!("zh-Hans-CN"), &surname_first, &given_first, fallbacker @@ -128,7 +128,7 @@ mod tests { // This is not matching because of zh, but because of und-CN assert_eq!( name_order_derive( - &locale!("zh_Hans"), + &locale!("zh-Hans"), &surname_first, &given_first, fallbacker diff --git a/components/experimental/tests/displaynames/tests.rs b/components/experimental/tests/displaynames/tests.rs index 53e558db6ea..2a92386b4f6 100644 --- a/components/experimental/tests/displaynames/tests.rs +++ b/components/experimental/tests/displaynames/tests.rs @@ -22,7 +22,7 @@ fn test_concatenate() { should_borrow: true, }, TestCase { - input_1: &locale!("zh_Hans"), + input_1: &locale!("zh-Hans"), expected: "Simplified Chinese", should_borrow: true, }, diff --git a/components/experimental/tests/personnames/tests.rs b/components/experimental/tests/personnames/tests.rs index c7aec4c9242..0d3a772ca22 100644 --- a/components/experimental/tests/personnames/tests.rs +++ b/components/experimental/tests/personnames/tests.rs @@ -233,14 +233,14 @@ fn test_space_replacement_spec_formatting_locale_ja() -> Result<(), PersonNamesF let person_name = DefaultPersonName::new( person_data, - Some(locale!("de_Latn_CH")), + Some(locale!("de-Latn-CH")), Some(PreferredOrder::GivenFirst), )?; let formatter = PersonNamesFormatter::try_new_unstable( &TestingProvider, PersonNamesFormatterOptions::new( - locale!("ja_JP"), + locale!("ja-JP"), FormattingOrder::GivenFirst, FormattingLength::Medium, FormattingUsage::Referring, @@ -276,14 +276,14 @@ fn test_space_replacement_spec_formatting_locale_ja_jpan_script( let person_name = DefaultPersonName::new( person_data, - Some(locale!("de_Jpan_CH")), + Some(locale!("de-Jpan-CH")), Some(PreferredOrder::GivenFirst), )?; let formatter = PersonNamesFormatter::try_new_unstable( &TestingProvider, PersonNamesFormatterOptions::new( - locale!("ja_JP"), + locale!("ja-JP"), FormattingOrder::GivenFirst, FormattingLength::Medium, FormattingUsage::Referring, @@ -319,14 +319,14 @@ fn test_space_replacement_spec_formatting_locale_ja_compatible( let person_name = DefaultPersonName::new( person_data, - Some(locale!("ja_Jpan_JP")), + Some(locale!("ja-Jpan-JP")), Some(PreferredOrder::SurnameFirst), )?; let formatter = PersonNamesFormatter::try_new_unstable( &TestingProvider, PersonNamesFormatterOptions::new( - locale!("ja_JP"), + locale!("ja-JP"), FormattingOrder::GivenFirst, FormattingLength::Medium, FormattingUsage::Referring, @@ -362,14 +362,14 @@ fn test_space_replacement_spec_formatting_locale_de_compatible( let person_name = DefaultPersonName::new( person_data, - Some(locale!("de_Latn_CH")), + Some(locale!("de-Latn-CH")), Some(PreferredOrder::GivenFirst), )?; let formatter = PersonNamesFormatter::try_new_unstable( &TestingProvider, PersonNamesFormatterOptions::new( - locale!("de_CH"), + locale!("de-CH"), FormattingOrder::GivenFirst, FormattingLength::Medium, FormattingUsage::Referring, @@ -405,14 +405,14 @@ fn test_space_replacement_spec_formatting_locale_de_jpan_script( let person_name = DefaultPersonName::new( person_data, - Some(locale!("de_Jpan_CH")), + Some(locale!("de-Jpan-CH")), Some(PreferredOrder::GivenFirst), )?; let formatter = PersonNamesFormatter::try_new_unstable( &TestingProvider, PersonNamesFormatterOptions::new( - locale!("de_CH"), + locale!("de-CH"), FormattingOrder::GivenFirst, FormattingLength::Medium, FormattingUsage::Referring, @@ -450,14 +450,14 @@ fn test_space_replacement_spec_formatting_locale_und_latn_jp( let person_name = DefaultPersonName::new( person_data, - Some(locale!("und_Latn_JP")), + Some(locale!("und-Latn-JP")), Some(PreferredOrder::GivenFirst), )?; let formatter = PersonNamesFormatter::try_new_unstable( &TestingProvider, PersonNamesFormatterOptions::new( - locale!("de_CH"), + locale!("de-CH"), FormattingOrder::GivenFirst, FormattingLength::Medium, FormattingUsage::Referring, diff --git a/components/locale/src/expander.rs b/components/locale/src/expander.rs index b388a44f45e..a33a410869c 100644 --- a/components/locale/src/expander.rs +++ b/components/locale/src/expander.rs @@ -478,12 +478,12 @@ impl LocaleExpander { /// /// let lc = LocaleExpander::new(); /// - /// let mut locale = locale!("zh_TW"); + /// let mut locale = locale!("zh-TW"); /// assert_eq!( /// lc.minimize_favor_script(&mut locale.id), /// TransformResult::Modified /// ); - /// assert_eq!(locale, locale!("zh_Hant")); + /// assert_eq!(locale, locale!("zh-Hant")); /// ``` pub fn minimize_favor_script(&self, langid: &mut LanguageIdentifier) -> TransformResult { self.minimize_impl(langid, false) diff --git a/components/locale_core/benches/iai_langid.rs b/components/locale_core/benches/iai_langid.rs index 00408ddb095..22f75accf14 100644 --- a/components/locale_core/benches/iai_langid.rs +++ b/components/locale_core/benches/iai_langid.rs @@ -63,7 +63,7 @@ fn bench_langid_strict_cmp() { // Tests the cost of comparing a langid against byte strings. use core::cmp::Ordering; - let lid = langid!("en_us"); + let lid = langid!("en-us"); let result = LIDS_STR .iter() @@ -76,7 +76,7 @@ fn bench_langid_strict_cmp() { fn bench_langid_matching() { // Tests matching a LID against other LIDs. - let lid = langid!("en_us"); + let lid = langid!("en-us"); let count = LIDS.iter().filter(|l| lid == **l).count(); assert_eq!(count, 1); @@ -85,7 +85,7 @@ fn bench_langid_matching() { fn bench_langid_matching_str() { // Tests matching a LID against list of str. - let lid = langid!("en_us"); + let lid = langid!("en-us"); let count = LIDS_STR.iter().filter(|&l| lid.normalizing_eq(l)).count(); assert_eq!(count, 1); diff --git a/components/locale_core/src/langid.rs b/components/locale_core/src/langid.rs index a87bc73e329..50142fd8b1e 100644 --- a/components/locale_core/src/langid.rs +++ b/components/locale_core/src/langid.rs @@ -66,7 +66,7 @@ use alloc::borrow::Cow; /// subtags::{language, region, script, variant}, /// }; /// -/// let li = langid!("eN_latn_Us-Valencia"); +/// let li = langid!("eN-latn-Us-Valencia"); /// /// assert_eq!(li.language, language!("en")); /// assert_eq!(li.script, Some(script!("Latn"))); @@ -175,7 +175,7 @@ impl LanguageIdentifier { /// use icu::locale::LanguageIdentifier; /// /// assert_eq!( - /// LanguageIdentifier::normalize("pL_latn_pl").as_deref(), + /// LanguageIdentifier::normalize("pL-latn-pl").as_deref(), /// Ok("pl-Latn-PL") /// ); /// ``` @@ -194,7 +194,7 @@ impl LanguageIdentifier { /// use icu::locale::LanguageIdentifier; /// /// assert_eq!( - /// LanguageIdentifier::normalize("pL_latn_pl").as_deref(), + /// LanguageIdentifier::normalize("pL-latn-pl").as_deref(), /// Ok("pl-Latn-PL") /// ); /// ``` diff --git a/components/locale_core/src/locale.rs b/components/locale_core/src/locale.rs index 5b433f1ecac..303f0166e71 100644 --- a/components/locale_core/src/locale.rs +++ b/components/locale_core/src/locale.rs @@ -39,14 +39,14 @@ use core::str::FromStr; /// * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types... /// * *canonical* - valid and no deprecated codes or structure. /// -/// At the moment parsing normalizes a well-formed locale identifier converting -/// `_` separators to `-` and adjusting casing to conform to the Unicode standard. -/// /// Any syntactically invalid subtags will cause the parsing to fail with an error. /// /// This operation normalizes syntax to be well-formed. No legacy subtag replacements is performed. /// For validation and canonicalization, see `LocaleCanonicalizer`. /// +/// ICU4X's Locale parsing does not allow for non-BCP-47-compatible locales [allowed by UTS 35 for backwards compatability][tr35-bcp]. +/// Furthermore, it currently does not allow for language tags to have more than three characters. +/// /// # Examples /// /// Simple example: @@ -75,7 +75,7 @@ use core::str::FromStr; /// ``` /// use icu::locale::{subtags::*, Locale}; /// -/// let loc: Locale = "eN_latn_Us-Valencia_u-hC-H12" +/// let loc: Locale = "eN-latn-Us-Valencia-u-hC-H12" /// .parse() /// .expect("Failed to parse."); /// @@ -89,6 +89,7 @@ use core::str::FromStr; /// ``` /// /// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#Unicode_locale_identifier +/// [tr35-bcp]: https://unicode.org/reports/tr35/#BCP_47_Conformance #[derive(Default, PartialEq, Eq, Clone, Hash)] // no Ord or PartialOrd: see docs #[allow(clippy::exhaustive_structs)] // This struct is stable (and invoked by a macro) pub struct Locale { @@ -159,7 +160,7 @@ impl Locale { /// use icu::locale::Locale; /// /// assert_eq!( - /// Locale::normalize_utf8(b"pL_latn_pl-U-HC-H12").as_deref(), + /// Locale::normalize_utf8(b"pL-latn-pl-U-HC-H12").as_deref(), /// Ok("pl-Latn-PL-u-hc-h12") /// ); /// ``` @@ -178,7 +179,7 @@ impl Locale { /// use icu::locale::Locale; /// /// assert_eq!( - /// Locale::normalize("pL_latn_pl-U-HC-H12").as_deref(), + /// Locale::normalize("pL-latn-pl-U-HC-H12").as_deref(), /// Ok("pl-Latn-PL-u-hc-h12") /// ); /// ``` diff --git a/components/locale_core/src/macros.rs b/components/locale_core/src/macros.rs index 25be0450638..3bd8d863fcb 100644 --- a/components/locale_core/src/macros.rs +++ b/components/locale_core/src/macros.rs @@ -11,9 +11,9 @@ /// ``` /// use icu::locale::{langid, LanguageIdentifier}; /// -/// const DE_AT: LanguageIdentifier = langid!("de_at"); +/// const DE_AT: LanguageIdentifier = langid!("de-at"); /// -/// let de_at: LanguageIdentifier = "de_at".parse().unwrap(); +/// let de_at: LanguageIdentifier = "de-at".parse().unwrap(); /// /// assert_eq!(DE_AT, de_at); /// ``` @@ -64,9 +64,9 @@ macro_rules! langid { /// ``` /// use icu::locale::{locale, Locale}; /// -/// const DE_AT: Locale = locale!("de_at"); +/// const DE_AT: Locale = locale!("de-at"); /// -/// let de_at: Locale = "de_at".parse().unwrap(); +/// let de_at: Locale = "de-at".parse().unwrap(); /// /// assert_eq!(DE_AT, de_at); /// ``` @@ -170,22 +170,22 @@ mod test { #[test] fn test_langid_macro_can_parse_langid_with_single_variant() { - const DE_AT_FOOBAR: LanguageIdentifier = langid!("de_at-foobar"); - let de_at_foobar: LanguageIdentifier = "de_at-foobar".parse().unwrap(); + const DE_AT_FOOBAR: LanguageIdentifier = langid!("de-at-foobar"); + let de_at_foobar: LanguageIdentifier = "de-at-foobar".parse().unwrap(); assert_eq!(DE_AT_FOOBAR, de_at_foobar); } #[test] fn test_locale_macro_can_parse_locale_with_single_variant() { - const DE_AT_FOOBAR: Locale = locale!("de_at-foobar"); - let de_at_foobar: Locale = "de_at-foobar".parse().unwrap(); + const DE_AT_FOOBAR: Locale = locale!("de-at-foobar"); + let de_at_foobar: Locale = "de-at-foobar".parse().unwrap(); assert_eq!(DE_AT_FOOBAR, de_at_foobar); } #[test] fn test_locale_macro_can_parse_locale_with_single_keyword_unicode_extension() { - const DE_AT_U_CA_FOOBAR: Locale = locale!("de_at-u-ca-foobar"); - let de_at_u_ca_foobar: Locale = "de_at-u-ca-foobar".parse().unwrap(); + const DE_AT_U_CA_FOOBAR: Locale = locale!("de-at-u-ca-foobar"); + let de_at_u_ca_foobar: Locale = "de-at-u-ca-foobar".parse().unwrap(); assert_eq!(DE_AT_U_CA_FOOBAR, de_at_u_ca_foobar); } } diff --git a/components/locale_core/src/parser/mod.rs b/components/locale_core/src/parser/mod.rs index f57f8a2cdff..674e26246ee 100644 --- a/components/locale_core/src/parser/mod.rs +++ b/components/locale_core/src/parser/mod.rs @@ -21,7 +21,7 @@ const fn skip_before_separator(slice: &[u8]) -> &[u8] { let mut end = 0; #[allow(clippy::indexing_slicing)] // very protected, should optimize out - while end < slice.len() && !matches!(slice[end], b'-' | b'_') { + while end < slice.len() && !matches!(slice[end], b'-') { // Advance until we reach end of slice or a separator. end += 1; } @@ -106,7 +106,7 @@ mod test { #[test] fn subtag_iterator_peek_test() { - let slice = "de_at-u-ca-foobar"; + let slice = "de-at-u-ca-foobar"; let mut si = SubtagIterator::new(slice.as_bytes()); assert_eq!(si.peek().map(slice_to_str), Some("de")); @@ -156,7 +156,7 @@ mod test { assert_eq!(si.next().map(slice_to_str), Some("")); assert_eq!(si.next(), None); - let slice = "de_at-u-ca-foobar"; + let slice = "de-at-u-ca-foobar"; let si = SubtagIterator::new(slice.as_bytes()); assert_eq!( si.map(slice_to_str).collect::>(), diff --git a/components/locale_core/tests/fixtures/canonicalize.json b/components/locale_core/tests/fixtures/canonicalize.json index 79a50571465..46db8b73861 100644 --- a/components/locale_core/tests/fixtures/canonicalize.json +++ b/components/locale_core/tests/fixtures/canonicalize.json @@ -8,7 +8,7 @@ "output": "en-US" }, { - "input": "ZH_hans_hK", + "input": "ZH-hans-hK", "output": "zh-Hans-HK" }, { diff --git a/components/locale_core/tests/fixtures/invalid-extensions.json b/components/locale_core/tests/fixtures/invalid-extensions.json index 3aff2636b2d..0c84e34dadd 100644 --- a/components/locale_core/tests/fixtures/invalid-extensions.json +++ b/components/locale_core/tests/fixtures/invalid-extensions.json @@ -12,7 +12,7 @@ { "input": { "type": "Locale", - "identifier": "pl-US-x-@A_3" + "identifier": "pl-US-x-@A-3" }, "output": { "error": "InvalidExtension", diff --git a/components/locale_core/tests/fixtures/invalid.json b/components/locale_core/tests/fixtures/invalid.json index c22459e65d4..a9e683dda26 100644 --- a/components/locale_core/tests/fixtures/invalid.json +++ b/components/locale_core/tests/fixtures/invalid.json @@ -6,6 +6,7 @@ "text": "The given language subtag is invalid" } }, + { "input": "--", "output": { @@ -34,6 +35,13 @@ "text": "The given subtag is invalid" } }, + { + "input": "en_us", + "output": { + "error": "InvalidLanguage", + "text": "The given language subtag is invalid" + } + }, { "input": "en--US", "output": { @@ -105,7 +113,7 @@ } }, { - "input": "pl-Latn-US-3_dd", + "input": "pl-Latn-US-3-dd", "output": { "error": "InvalidSubtag", "text": "Invalid subtag"