Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add unicode_ext_value! macro, enabled by new helper ShortVec #1767

Merged
merged 2 commits into from
Apr 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 47 additions & 13 deletions components/locid/src/extensions/unicode/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::helpers::ShortVec;
use crate::parser::{get_subtag_iterator, ParserError};
use alloc::vec;
use alloc::vec::Vec;
use core::ops::RangeInclusive;
use core::str::FromStr;
Expand Down Expand Up @@ -31,7 +31,7 @@ use tinystr::TinyAsciiStr;
/// assert_eq!(&value2.to_string(), "islamic-civil");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
pub struct Value(Vec<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);
pub struct Value(ShortVec<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);

const VALUE_LENGTH: RangeInclusive<usize> = 3..=8;
const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
Expand All @@ -51,29 +51,63 @@ impl Value {
/// assert_eq!(&value.to_string(), "buddhist");
/// ```
pub fn from_bytes(input: &[u8]) -> Result<Self, ParserError> {
let mut v = vec![];
let mut v = ShortVec::new();

if !input.is_empty() {
for subtag in get_subtag_iterator(input) {
if !Self::is_type_subtag(subtag) {
return Err(ParserError::InvalidExtension);
}
let val =
TinyAsciiStr::from_bytes(subtag).map_err(|_| ParserError::InvalidExtension)?;
if val != TRUE_VALUE {
let val = Self::subtag_from_bytes(subtag)?;
if let Some(val) = val {
v.push(val);
}
}
}
Ok(Self(v))
}

/// Const constructor for when the value contains only a single subtag.
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::unicode::Value;
///
/// Value::try_from_single_subtag(b"buddhist").expect("valid subtag");
/// Value::try_from_single_subtag(b"#####").expect_err("invalid subtag");
/// Value::try_from_single_subtag(b"foo-bar").expect_err("not a single subtag");
/// ```
pub const fn try_from_single_subtag(subtag: &[u8]) -> Result<Self, ParserError> {
match Self::subtag_from_bytes(subtag) {
Err(_) => Err(ParserError::InvalidExtension),
Ok(option) => Ok(Self::from_tinystr(option)),
}
}

#[doc(hidden)]
pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self {
match subtag {
None => Self(ShortVec::new()),
Some(val) => {
debug_assert!(val.is_ascii_alphanumeric());
debug_assert!(!matches!(val, TRUE_VALUE));
Self(ShortVec::new_single(val))
}
}
}

pub(crate) fn from_vec_unchecked(input: Vec<TinyAsciiStr<8>>) -> Self {
Self(input)
Self(input.into())
}

pub(crate) fn is_type_subtag(t: &[u8]) -> bool {
VALUE_LENGTH.contains(&t.len()) && !t.iter().any(|c: &u8| !c.is_ascii_alphanumeric())
#[doc(hidden)]
pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
if *VALUE_LENGTH.start() > bytes.len() || *VALUE_LENGTH.end() < bytes.len() {
return Err(ParserError::InvalidExtension);
};
match TinyAsciiStr::from_bytes(bytes) {
Ok(TRUE_VALUE) => Ok(None),
Ok(val) if val.is_ascii_alphanumeric() => Ok(Some(val)),
_ => Err(ParserError::InvalidExtension),
}
}

pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
Expand All @@ -95,7 +129,7 @@ impl Value {
where
F: FnMut(&str) -> Result<(), E>,
{
self.0.iter().map(|t| t.as_str()).try_for_each(f)
self.0.as_slice().iter().map(|t| t.as_str()).try_for_each(f)
}
}

Expand Down
54 changes: 54 additions & 0 deletions components/locid/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,60 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use alloc::vec;
use alloc::vec::Vec;

/// Internal: A vector that supports no-allocation, constant values if length 0 or 1.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum ShortVec<T> {
Empty,
Single(T),
Multi(Vec<T>),
}

impl<T> ShortVec<T> {
#[inline]
pub const fn new() -> Self {
Self::Empty
}

#[inline]
pub const fn new_single(item: T) -> Self {
Self::Single(item)
}

pub fn push(&mut self, item: T) {
*self = match core::mem::replace(self, Self::Empty) {
ShortVec::Empty => ShortVec::Single(item),
ShortVec::Single(prev_item) => ShortVec::Multi(vec![prev_item, item]),
ShortVec::Multi(mut items) => {
items.push(item);
ShortVec::Multi(items)
}
};
}

#[inline]
pub fn as_slice(&self) -> &[T] {
match self {
ShortVec::Empty => &[],
ShortVec::Single(v) => core::slice::from_ref(v),
ShortVec::Multi(v) => v.as_slice(),
}
}
}

impl<T> From<Vec<T>> for ShortVec<T> {
fn from(v: Vec<T>) -> Self {
match v.len() {
0 => ShortVec::Empty,
#[allow(clippy::unwrap_used)] // we know that the vec is not empty
1 => ShortVec::Single(v.into_iter().next().unwrap()),
_ => ShortVec::Multi(v),
}
}
}

macro_rules! impl_writeable_for_single_subtag {
($type:tt, $sample:literal) => {
impl core::fmt::Display for $type {
Expand Down
46 changes: 46 additions & 0 deletions components/locid/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,52 @@ macro_rules! unicode_ext_key {
}};
}

/// A macro allowing for compile-time construction of valid Unicode [`Value`] subtag.
///
/// The macro only supports single-subtag values.
///
/// # Examples
///
/// ```
/// use icu::locid::{unicode_ext_key, unicode_ext_value};
/// use icu::locid::extensions::unicode::{Key, Value};
/// use icu::locid::Locale;
/// use writeable::Writeable;
///
/// const CALENDAR_KEY: Key = unicode_ext_key!("ca");
/// const CALENDAR_VALUE: Value = unicode_ext_value!("buddhist");
///
/// let loc: Locale = "de-u-ca-buddhist".parse().unwrap();
///
/// assert_eq!(
/// loc.extensions.unicode.keywords.get(&CALENDAR_KEY),
/// Some(&CALENDAR_VALUE)
/// );
/// ```
///
/// [`Value`]: crate::extensions::unicode::Value
#[macro_export]
macro_rules! unicode_ext_value {
($value:literal) => {{
// What we want:
// const R: $crate::extensions::unicode::Value =
// match $crate::extensions::unicode::Value::try_from_single_subtag($value.as_bytes()) {
// Ok(r) => r,
// #[allow(clippy::panic)] // const context
// _ => panic!(concat!("Invalid Unicode extension value: ", $value)),
// };
// Workaround until https://github.com/rust-lang/rust/issues/73255 lands:
const R: $crate::extensions::unicode::Value =
$crate::extensions::unicode::Value::from_tinystr(
match $crate::extensions::unicode::Value::subtag_from_bytes($value.as_bytes()) {
Ok(r) => r,
_ => panic!(concat!("Invalid Unicode extension value: ", $value)),
},
);
R
}};
}

/// A macro allowing for compile-time construction of valid Transform [`Key`] subtag.
///
/// The macro will perform syntax canonicalization of the tag.
Expand Down