Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable locale macro to support single unicode key value pair extension #2382

Merged
merged 8 commits into from
Aug 18, 2022
26 changes: 25 additions & 1 deletion components/locid/src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub enum ExtensionType {
}
pdogr marked this conversation as resolved.
Show resolved Hide resolved

impl ExtensionType {
pub(crate) fn from_byte(key: u8) -> Result<Self, ParserError> {
pub(crate) const fn from_byte(key: u8) -> Result<Self, ParserError> {
let key = key.to_ascii_lowercase();
match key {
b'u' => Ok(Self::Unicode),
Expand All @@ -83,6 +83,18 @@ impl ExtensionType {
_ => Err(ParserError::InvalidExtension),
}
}

pub(crate) const fn from_bytes_manual_slice(
bytes: &[u8],
start: usize,
end: usize,
) -> Result<Self, ParserError> {
if end - start != 1 {
return Err(ParserError::InvalidExtension);
}
#[allow(clippy::indexing_slicing)]
Self::from_byte(bytes[start])
}
}

/// A map of extensions associated with a given [`Locale`](crate::Locale).
Expand Down Expand Up @@ -121,6 +133,18 @@ impl Extensions {
}
}

/// Function to create a new map of extensions containing exactly one unicode extension, callable in `const`
/// context.
#[inline]
pub const fn from_unicode(unicode: Unicode) -> Self {
Self {
unicode,
transform: Transform::new(),
private: Private::new(),
other: Vec::new(),
}
}

/// Returns whether there are no extensions present.
///
/// # Examples
Expand Down
22 changes: 15 additions & 7 deletions components/locid/src/extensions/unicode/attribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,25 @@ impl Attribute {
///
/// Notice: No attribute subtags are defined by the current CLDR specification.
pub fn from_bytes(v: &[u8]) -> Result<Self, ParserError> {
if !ATTR_LENGTH.contains(&v.len()) {
return Err(ParserError::InvalidExtension);
}

let s = TinyAsciiStr::from_bytes(v).map_err(|_| ParserError::InvalidExtension)?;
Self::from_bytes_manual_slice(v, 0, v.len())
}

if !s.is_ascii_alphanumeric() {
/// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes) but callable in `const`
/// context.
pub const fn from_bytes_manual_slice(
bytes: &[u8],
start: usize,
end: usize,
) -> Result<Self, ParserError> {
let slice_len = end - start;
if slice_len < *ATTR_LENGTH.start() || slice_len > *ATTR_LENGTH.end() {
return Err(ParserError::InvalidExtension);
}

Ok(Self(s.to_ascii_lowercase()))
match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
Ok(s) if s.is_ascii_alphanumeric() => Ok(Self(s.to_ascii_lowercase())),
_ => Err(ParserError::InvalidExtension),
}
}

/// A helper function for displaying
Expand Down
18 changes: 14 additions & 4 deletions components/locid/src/extensions/unicode/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,25 @@ impl Key {
/// assert_eq!(key.as_str(), "ca");
/// ```
pub const fn from_bytes(key: &[u8]) -> Result<Self, ParserError> {
Self::from_bytes_manual_slice(key, 0, key.len())
}

/// Equivalent to [`from_bytes(bytes[start..end])`](Self::from_bytes) but callable in `const`
/// context.
pub const fn from_bytes_manual_slice(
bytes: &[u8],
start: usize,
end: usize,
) -> Result<Self, ParserError> {
#[allow(clippy::indexing_slicing)] // TODO(#1668) Clippy exceptions need docs or fixing.
if key.len() != KEY_LENGTH
|| !key[0].is_ascii_alphanumeric()
|| !key[1].is_ascii_alphabetic()
if end - start != KEY_LENGTH
|| !bytes[start].is_ascii_alphanumeric()
|| !bytes[start + 1].is_ascii_alphabetic()
{
return Err(ParserError::InvalidExtension);
}

let key = match TinyAsciiStr::from_bytes(key) {
let key = match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
Ok(k) => k,
Err(_) => return Err(ParserError::InvalidSubtag),
};
Expand Down
8 changes: 8 additions & 0 deletions components/locid/src/extensions/unicode/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ impl Keywords {
Self(LiteMap::new())
}

/// Create a new list of key-value pairs having exactly one pair, callable in a `const` context.
#[inline]
pub const fn new_single(key: Key, value: Value) -> Self {
Self(LiteMap::from_sorted_store_unchecked(ShortVec::new_single(
(key, value),
)))
}

/// Returns `true` if there are no keywords.
///
/// # Examples
Expand Down
32 changes: 16 additions & 16 deletions components/locid/src/extensions/unicode/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,28 +111,28 @@ impl Value {

#[doc(hidden)]
pub const fn subtag_from_bytes(bytes: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
if *VALUE_LENGTH.start() > bytes.len() || *VALUE_LENGTH.end() < bytes.len() {
return Err(ParserError::InvalidExtension);
};
match TinyAsciiStr::from_bytes(bytes) {
Ok(TRUE_VALUE) => Ok(None),
Ok(val) if val.is_ascii_alphanumeric() => Ok(Some(val)),
_ => Err(ParserError::InvalidExtension),
}
Self::parse_subtag_from_bytes_manual_slice(bytes, 0, bytes.len())
}

pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
let s = TinyAsciiStr::from_bytes(t).map_err(|_| ParserError::InvalidSubtag)?;
if !VALUE_LENGTH.contains(&t.len()) || !s.is_ascii_alphanumeric() {
Self::parse_subtag_from_bytes_manual_slice(t, 0, t.len())
}

pub(crate) const fn parse_subtag_from_bytes_manual_slice(
robertbastian marked this conversation as resolved.
Show resolved Hide resolved
bytes: &[u8],
start: usize,
end: usize,
) -> Result<Option<TinyAsciiStr<8>>, ParserError> {
let slice_len = end - start;
if slice_len > *VALUE_LENGTH.end() || slice_len < *VALUE_LENGTH.start() {
return Err(ParserError::InvalidExtension);
}

let s = s.to_ascii_lowercase();

if s == TRUE_VALUE {
Ok(None)
} else {
Ok(Some(s))
match TinyAsciiStr::from_bytes_manual_slice(bytes, start, end) {
Ok(TRUE_VALUE) => Ok(None),
Ok(s) if s.is_ascii_alphanumeric() => Ok(Some(s.to_ascii_lowercase())),
Ok(_) => Err(ParserError::InvalidExtension),
Err(_) => Err(ParserError::InvalidSubtag),
}
}

Expand Down
27 changes: 26 additions & 1 deletion components/locid/src/locale.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::ordering::SubtagOrderingResult;
use crate::parser::{get_subtag_iterator, parse_locale, ParserError};
use crate::parser::{
get_subtag_iterator, parse_locale,
parse_locale_with_single_variant_single_keyword_unicode_keyword_extension, ParserError,
ParserMode,
};
use crate::{extensions, subtags, LanguageIdentifier};
use alloc::string::String;
use alloc::string::ToString;
use core::cmp::Ordering;
use core::str::FromStr;
use tinystr::TinyAsciiStr;

/// A core struct representing a [`Unicode Locale Identifier`].
///
Expand Down Expand Up @@ -310,6 +315,26 @@ impl Locale {
iter.next() == None
}

#[doc(hidden)]
#[allow(clippy::type_complexity)]
pub const fn from_bytes_with_single_variant_single_keyword_unicode_extension(
v: &[u8],
) -> Result<
(
subtags::Language,
Option<subtags::Script>,
Option<subtags::Region>,
Option<subtags::Variant>,
Option<(extensions::unicode::Key, Option<TinyAsciiStr<8>>)>,
),
ParserError,
> {
parse_locale_with_single_variant_single_keyword_unicode_keyword_extension(
v,
ParserMode::Locale,
)
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
Expand Down
60 changes: 54 additions & 6 deletions components/locid/src/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,25 +191,54 @@ macro_rules! langid {
/// assert_eq!(DE_AT, de_at);
/// ```
///
/// *Note*: The macro cannot produce locales with more than one variant or extensions due to const
/// *Note*: The macro cannot produce locales with more than one variant or multiple extensions
/// (only single keyword unicode extension is supported) due to const
/// limitations (see [`Heap Allocations in Constants`]):
///
/// ```compile_fail
robertbastian marked this conversation as resolved.
Show resolved Hide resolved
/// icu::locid::locale!("en-US-u-ca-ja");
/// icu::locid::locale!("sl-IT-rozaj-biske-1994")
/// ```
/// Use runtime parsing instead:
/// ```
/// "en-US-u-ca-ja".parse::<icu::locid::Locale>().unwrap();
/// "sl-IT-rozaj-biske-1994".parse::<icu::locid::Locale>().unwrap();
/// ```
///
/// Locales with multiple keys are not supported
/// ```compile_fail
/// icu::locid::locale!("th-TH-u-ca-buddhist-nu-thai");
/// ```
/// Use runtime parsing instead:
/// ```
/// "th-TH-u-ca-buddhist-nu-thai".parse::<icu::locid::Locale>().unwrap();
/// ```
///
/// Locales with attributes are not supported
/// ```compile_fail
/// icu::locid::locale!("en-US-u-foobar-ca-buddhist");
/// ```
/// Use runtime parsing instead:
/// ```
/// "en-US-u-foobar-ca-buddhist".parse::<icu::locid::Locale>().unwrap();
/// ```
///
/// Locales with single key but multiple types are not supported
/// ```compile_fail
/// icu::locid::locale!("en-US-u-ca-islamic-umalqura");
/// ```
/// Use runtime parsing instead:
/// ```
/// "en-US-u-ca-islamic-umalqura".parse::<icu::locid::Locale>().unwrap();
/// ```
/// [`Locale`]: crate::Locale
/// [`Heap Allocations in Constants`]: https://github.com/rust-lang/const-eval/issues/20
#[macro_export]
macro_rules! locale {
($locale:literal) => {{
const R: $crate::Locale =
match $crate::LanguageIdentifier::from_bytes_with_single_variant($locale.as_bytes()) {
Ok((language, script, region, variant)) => $crate::Locale {
match $crate::Locale::from_bytes_with_single_variant_single_keyword_unicode_extension(
$locale.as_bytes(),
) {
Ok((language, script, region, variant, keyword)) => $crate::Locale {
id: $crate::LanguageIdentifier {
language,
script,
Expand All @@ -219,7 +248,19 @@ macro_rules! locale {
None => $crate::subtags::Variants::new(),
},
},
extensions: $crate::extensions::Extensions::new(),
extensions: match keyword {
Some(k) => $crate::extensions::Extensions::from_unicode(
$crate::extensions::Unicode {
keywords: $crate::extensions::unicode::Keywords::new_single(
k.0,
$crate::extensions::unicode::Value::from_tinystr(k.1),
),

attributes: $crate::extensions::unicode::Attributes::new(),
},
),
None => $crate::extensions::Extensions::new(),
},
},
#[allow(clippy::panic)] // const context
_ => panic!(concat!(
Expand Down Expand Up @@ -368,4 +409,11 @@ mod test {
let de_at_foobar: Locale = "de_at-foobar".parse().unwrap();
assert_eq!(DE_AT_FOOBAR, de_at_foobar);
}

#[test]
fn test_locale_macro_can_parse_locale_with_single_keyword_unicode_extension() {
const DE_AT_U_CA_FOOBAR: Locale = locale!("de_at-u-ca-foobar");
let de_at_u_ca_foobar: Locale = "de_at-u-ca-foobar".parse().unwrap();
assert_eq!(DE_AT_U_CA_FOOBAR, de_at_u_ca_foobar);
}
}
Loading