diff --git a/components/locid/src/extensions/unicode/value.rs b/components/locid/src/extensions/unicode/value.rs index 31bb478346b..94f57ac8532 100644 --- a/components/locid/src/extensions/unicode/value.rs +++ b/components/locid/src/extensions/unicode/value.rs @@ -2,8 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use crate::helpers::ShortVec; use crate::parser::{get_subtag_iterator, ParserError}; -use alloc::vec; use alloc::vec::Vec; use core::ops::RangeInclusive; use core::str::FromStr; @@ -31,7 +31,7 @@ use tinystr::TinyAsciiStr; /// assert_eq!(&value2.to_string(), "islamic-civil"); /// ``` #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] -pub struct Value(Vec>); +pub struct Value(ShortVec>); const VALUE_LENGTH: RangeInclusive = 3..=8; const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true"); @@ -51,16 +51,12 @@ impl Value { /// assert_eq!(&value.to_string(), "buddhist"); /// ``` pub fn from_bytes(input: &[u8]) -> Result { - let mut v = vec![]; + let mut v = ShortVec::new(); if !input.is_empty() { for subtag in get_subtag_iterator(input) { - if !Self::is_type_subtag(subtag) { - return Err(ParserError::InvalidExtension); - } - let val = - TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?; - if val != TRUE_VALUE { + let val = Self::subtag_from_bytes(subtag)?; + if let Some(val) = val { v.push(val); } } @@ -68,12 +64,50 @@ impl Value { Ok(Self(v)) } + /// Const constructor for when the value contains only a single subtag. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Value; + /// + /// Value::try_from_single_subtag(b"buddhist").expect("valid subtag"); + /// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag"); + /// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag"); + /// ``` + pub const fn try_from_single_subtag(subtag: &[u8]) -> Result { + match Self::subtag_from_bytes(subtag) { + Err(_) => Err(ParserError::InvalidExtension), + Ok(option) => Ok(Self::from_tinystr(option)), + } + } + + #[doc(hidden)] + pub const fn from_tinystr(subtag: Option>) -> Self { + match subtag { + None => Self(ShortVec::new()), + Some(val) => { + debug_assert!(val.is_ascii_alphanumeric()); + debug_assert!(!matches!(val, TRUE_VALUE)); + Self(ShortVec::new_single(val)) + } + } + } + pub(crate) fn from_vec_unchecked(input: Vec>) -> Self { - Self(input) + Self(input.into()) } - pub(crate) fn is_type_subtag(t: &[u8]) -> bool { - VALUE_LENGTH.contains(&t.len()) && !t.iter().any(|c: &u8| !c.is_ascii_alphanumeric()) + #[doc(hidden)] + pub const fn subtag_from_bytes(bytes: &[u8]) -> Result>, ParserError> { + if *VALUE_LENGTH.start() > bytes.len() || *VALUE_LENGTH.end() < bytes.len() { + return Err(ParserError::InvalidExtension); + }; + match TinyAsciiStr::from_bytes(bytes) { + Ok(TRUE_VALUE) => Ok(None), + Ok(val) if val.is_ascii_alphanumeric() => Ok(Some(val)), + _ => Err(ParserError::InvalidExtension), + } } pub(crate) fn parse_subtag(t: &[u8]) -> Result>, ParserError> { @@ -95,7 +129,7 @@ impl Value { where F: FnMut(&str) -> Result<(), E>, { - self.0.iter().map(|t| t.as_str()).try_for_each(f) + self.0.as_slice().iter().map(|t| t.as_str()).try_for_each(f) } } diff --git a/components/locid/src/helpers.rs b/components/locid/src/helpers.rs index 61db97fea87..2c50f710828 100644 --- a/components/locid/src/helpers.rs +++ b/components/locid/src/helpers.rs @@ -2,6 +2,60 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use alloc::vec; +use alloc::vec::Vec; + +/// Internal: A vector that supports no-allocation, constant values if length 0 or 1. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) enum ShortVec { + Empty, + Single(T), + Multi(Vec), +} + +impl ShortVec { + #[inline] + pub const fn new() -> Self { + Self::Empty + } + + #[inline] + pub const fn new_single(item: T) -> Self { + Self::Single(item) + } + + pub fn push(&mut self, item: T) { + *self = match core::mem::replace(self, Self::Empty) { + ShortVec::Empty => ShortVec::Single(item), + ShortVec::Single(prev_item) => ShortVec::Multi(vec![prev_item, item]), + ShortVec::Multi(mut items) => { + items.push(item); + ShortVec::Multi(items) + } + }; + } + + #[inline] + pub fn as_slice(&self) -> &[T] { + match self { + ShortVec::Empty => &[], + ShortVec::Single(v) => core::slice::from_ref(v), + ShortVec::Multi(v) => v.as_slice(), + } + } +} + +impl From> for ShortVec { + fn from(v: Vec) -> Self { + match v.len() { + 0 => ShortVec::Empty, + #[allow(clippy::unwrap_used)] // we know that the vec is not empty + 1 => ShortVec::Single(v.into_iter().next().unwrap()), + _ => ShortVec::Multi(v), + } + } +} + macro_rules! impl_writeable_for_single_subtag { ($type:tt, $sample:literal) => { impl core::fmt::Display for $type { diff --git a/components/locid/src/macros.rs b/components/locid/src/macros.rs index 3b72890c9ff..830f24686ef 100644 --- a/components/locid/src/macros.rs +++ b/components/locid/src/macros.rs @@ -265,6 +265,52 @@ macro_rules! unicode_ext_key { }}; } +/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag. +/// +/// The macro only supports single-subtag values. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::{unicode_ext_key, unicode_ext_value}; +/// use icu::locid::extensions::unicode::{Key, Value}; +/// use icu::locid::Locale; +/// use writeable::Writeable; +/// +/// const CALENDAR_KEY: Key = unicode_ext_key!("ca"); +/// const CALENDAR_VALUE: Value = unicode_ext_value!("buddhist"); +/// +/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap(); +/// +/// assert_eq!( +/// loc.extensions.unicode.keywords.get(&CALENDAR_KEY), +/// Some(&CALENDAR_VALUE) +/// ); +/// ``` +/// +/// [`Value`]: crate::extensions::unicode::Value +#[macro_export] +macro_rules! unicode_ext_value { + ($value:literal) => {{ + // What we want: + // const R: $crate::extensions::unicode::Value = + // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) { + // Ok(r) => r, + // #[allow(clippy::panic)] // const context + // _ => panic!(concat!("Invalid Unicode extension value: ", $value)), + // }; + // Workaround until https://github.com/rust-lang/rust/issues/73255 lands: + const R: $crate::extensions::unicode::Value = + $crate::extensions::unicode::Value::from_tinystr( + match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) { + Ok(r) => r, + _ => panic!(concat!("Invalid Unicode extension value: ", $value)), + }, + ); + R + }}; +} + /// A macro allowing for compile-time construction of valid Transform [`Key`] subtag. /// /// The macro will perform syntax canonicalization of the tag.