From 2f8e4aab0f430febc7f37d729c82433c97c18cb6 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 1 Apr 2022 21:46:21 -0700 Subject: [PATCH 1/2] Add ShortVec and use it in Value --- .../locid/src/extensions/unicode/value.rs | 45 +++++++++++----- components/locid/src/helpers.rs | 54 +++++++++++++++++++ 2 files changed, 87 insertions(+), 12 deletions(-) diff --git a/components/locid/src/extensions/unicode/value.rs b/components/locid/src/extensions/unicode/value.rs index 31bb478346b..10e535a9a3a 100644 --- a/components/locid/src/extensions/unicode/value.rs +++ b/components/locid/src/extensions/unicode/value.rs @@ -2,8 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use crate::helpers::ShortVec; use crate::parser::{get_subtag_iterator, ParserError}; -use alloc::vec; use alloc::vec::Vec; use core::ops::RangeInclusive; use core::str::FromStr; @@ -31,7 +31,7 @@ use tinystr::TinyAsciiStr; /// assert_eq!(&value2.to_string(), "islamic-civil"); /// ``` #[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)] -pub struct Value(Vec>); +pub struct Value(ShortVec>); const VALUE_LENGTH: RangeInclusive = 3..=8; const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true"); @@ -51,15 +51,11 @@ impl Value { /// assert_eq!(&value.to_string(), "buddhist"); /// ``` pub fn from_bytes(input: &[u8]) -> Result { - let mut v = vec![]; + let mut v = ShortVec::new(); if !input.is_empty() { for subtag in get_subtag_iterator(input) { - if !Self::is_type_subtag(subtag) { - return Err(ParserError::InvalidExtension); - } - let val = - TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?; + let val = Self::subtag_from_bytes(subtag)?; if val != TRUE_VALUE { v.push(val); } @@ -68,12 +64,37 @@ impl Value { Ok(Self(v)) } + /// Const constructor for when the value contains only a single subtag. + /// + /// # Examples + /// + /// ``` + /// use icu::locid::extensions::unicode::Value; + /// + /// Value::try_from_single_subtag(b"buddhist").expect("valid subtag"); + /// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag"); + /// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag"); + /// ``` + pub const fn try_from_single_subtag(subtag: &[u8]) -> Result { + match Self::subtag_from_bytes(subtag) { + Err(_) => Err(ParserError::InvalidExtension), + Ok(TRUE_VALUE) => Ok(Self(ShortVec::new())), + Ok(val) => Ok(Self(ShortVec::new_single(val))), + } + } + pub(crate) fn from_vec_unchecked(input: Vec>) -> Self { - Self(input) + Self(input.into()) } - pub(crate) fn is_type_subtag(t: &[u8]) -> bool { - VALUE_LENGTH.contains(&t.len()) && !t.iter().any(|c: &u8| !c.is_ascii_alphanumeric()) + const fn subtag_from_bytes(bytes: &[u8]) -> Result, ParserError> { + if *VALUE_LENGTH.start() > bytes.len() || *VALUE_LENGTH.end() < bytes.len() { + return Err(ParserError::InvalidExtension); + }; + match TinyAsciiStr::from_bytes(bytes) { + Ok(val) if val.is_ascii_alphanumeric() => Ok(val), + _ => Err(ParserError::InvalidExtension), + } } pub(crate) fn parse_subtag(t: &[u8]) -> Result>, ParserError> { @@ -95,7 +116,7 @@ impl Value { where F: FnMut(&str) -> Result<(), E>, { - self.0.iter().map(|t| t.as_str()).try_for_each(f) + self.0.as_slice().iter().map(|t| t.as_str()).try_for_each(f) } } diff --git a/components/locid/src/helpers.rs b/components/locid/src/helpers.rs index 61db97fea87..2c50f710828 100644 --- a/components/locid/src/helpers.rs +++ b/components/locid/src/helpers.rs @@ -2,6 +2,60 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use alloc::vec; +use alloc::vec::Vec; + +/// Internal: A vector that supports no-allocation, constant values if length 0 or 1. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) enum ShortVec { + Empty, + Single(T), + Multi(Vec), +} + +impl ShortVec { + #[inline] + pub const fn new() -> Self { + Self::Empty + } + + #[inline] + pub const fn new_single(item: T) -> Self { + Self::Single(item) + } + + pub fn push(&mut self, item: T) { + *self = match core::mem::replace(self, Self::Empty) { + ShortVec::Empty => ShortVec::Single(item), + ShortVec::Single(prev_item) => ShortVec::Multi(vec![prev_item, item]), + ShortVec::Multi(mut items) => { + items.push(item); + ShortVec::Multi(items) + } + }; + } + + #[inline] + pub fn as_slice(&self) -> &[T] { + match self { + ShortVec::Empty => &[], + ShortVec::Single(v) => core::slice::from_ref(v), + ShortVec::Multi(v) => v.as_slice(), + } + } +} + +impl From> for ShortVec { + fn from(v: Vec) -> Self { + match v.len() { + 0 => ShortVec::Empty, + #[allow(clippy::unwrap_used)] // we know that the vec is not empty + 1 => ShortVec::Single(v.into_iter().next().unwrap()), + _ => ShortVec::Multi(v), + } + } +} + macro_rules! impl_writeable_for_single_subtag { ($type:tt, $sample:literal) => { impl core::fmt::Display for $type { From 29b2010bd97a9571701988bb15619cc4b34d3d37 Mon Sep 17 00:00:00 2001 From: "Shane F. Carr" Date: Fri, 1 Apr 2022 22:15:19 -0700 Subject: [PATCH 2/2] Add unicode_ext_value! --- .../locid/src/extensions/unicode/value.rs | 23 ++++++++-- components/locid/src/macros.rs | 46 +++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/components/locid/src/extensions/unicode/value.rs b/components/locid/src/extensions/unicode/value.rs index 10e535a9a3a..94f57ac8532 100644 --- a/components/locid/src/extensions/unicode/value.rs +++ b/components/locid/src/extensions/unicode/value.rs @@ -56,7 +56,7 @@ impl Value { if !input.is_empty() { for subtag in get_subtag_iterator(input) { let val = Self::subtag_from_bytes(subtag)?; - if val != TRUE_VALUE { + if let Some(val) = val { v.push(val); } } @@ -78,8 +78,19 @@ impl Value { pub const fn try_from_single_subtag(subtag: &[u8]) -> Result { match Self::subtag_from_bytes(subtag) { Err(_) => Err(ParserError::InvalidExtension), - Ok(TRUE_VALUE) => Ok(Self(ShortVec::new())), - Ok(val) => Ok(Self(ShortVec::new_single(val))), + Ok(option) => Ok(Self::from_tinystr(option)), + } + } + + #[doc(hidden)] + pub const fn from_tinystr(subtag: Option>) -> Self { + match subtag { + None => Self(ShortVec::new()), + Some(val) => { + debug_assert!(val.is_ascii_alphanumeric()); + debug_assert!(!matches!(val, TRUE_VALUE)); + Self(ShortVec::new_single(val)) + } } } @@ -87,12 +98,14 @@ impl Value { Self(input.into()) } - const fn subtag_from_bytes(bytes: &[u8]) -> Result, ParserError> { + #[doc(hidden)] + pub const fn subtag_from_bytes(bytes: &[u8]) -> Result>, ParserError> { if *VALUE_LENGTH.start() > bytes.len() || *VALUE_LENGTH.end() < bytes.len() { return Err(ParserError::InvalidExtension); }; match TinyAsciiStr::from_bytes(bytes) { - Ok(val) if val.is_ascii_alphanumeric() => Ok(val), + Ok(TRUE_VALUE) => Ok(None), + Ok(val) if val.is_ascii_alphanumeric() => Ok(Some(val)), _ => Err(ParserError::InvalidExtension), } } diff --git a/components/locid/src/macros.rs b/components/locid/src/macros.rs index 3b72890c9ff..830f24686ef 100644 --- a/components/locid/src/macros.rs +++ b/components/locid/src/macros.rs @@ -265,6 +265,52 @@ macro_rules! unicode_ext_key { }}; } +/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag. +/// +/// The macro only supports single-subtag values. +/// +/// # Examples +/// +/// ``` +/// use icu::locid::{unicode_ext_key, unicode_ext_value}; +/// use icu::locid::extensions::unicode::{Key, Value}; +/// use icu::locid::Locale; +/// use writeable::Writeable; +/// +/// const CALENDAR_KEY: Key = unicode_ext_key!("ca"); +/// const CALENDAR_VALUE: Value = unicode_ext_value!("buddhist"); +/// +/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap(); +/// +/// assert_eq!( +/// loc.extensions.unicode.keywords.get(&CALENDAR_KEY), +/// Some(&CALENDAR_VALUE) +/// ); +/// ``` +/// +/// [`Value`]: crate::extensions::unicode::Value +#[macro_export] +macro_rules! unicode_ext_value { + ($value:literal) => {{ + // What we want: + // const R: $crate::extensions::unicode::Value = + // match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) { + // Ok(r) => r, + // #[allow(clippy::panic)] // const context + // _ => panic!(concat!("Invalid Unicode extension value: ", $value)), + // }; + // Workaround until https://github.com/rust-lang/rust/issues/73255 lands: + const R: $crate::extensions::unicode::Value = + $crate::extensions::unicode::Value::from_tinystr( + match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) { + Ok(r) => r, + _ => panic!(concat!("Invalid Unicode extension value: ", $value)), + }, + ); + R + }}; +} + /// A macro allowing for compile-time construction of valid Transform [`Key`] subtag. /// /// The macro will perform syntax canonicalization of the tag.