Skip to content

Commit

Permalink
Introduce icu_preferences
Browse files Browse the repository at this point in the history
  • Loading branch information
zbraniecki committed May 22, 2024
1 parent 31e085a commit 9063db8
Show file tree
Hide file tree
Showing 39 changed files with 1,692 additions and 37 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ members = [
"utils/ixdtf",
"utils/litemap",
"utils/pattern",
"utils/preferences",
"utils/resb",
"utils/tinystr",
"utils/tzif",
Expand Down
24 changes: 24 additions & 0 deletions components/locid/src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pub mod transform;
pub mod unicode;

use core::cmp::Ordering;
use core::str::FromStr;

use other::Other;
use private::Private;
Expand Down Expand Up @@ -137,6 +138,21 @@ impl Extensions {
}
}

/// A constructor which takes a utf8 slice, parses it and
/// produces a well-formed [`Extensons`].
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::Extensions;
///
/// Extensions::try_from_bytes(b"u-hc-h12").unwrap();
/// ```
pub fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);
Self::try_from_iter(&mut iter)
}

/// Function to create a new map of extensions containing exactly one unicode extension, callable in `const`
/// context.
#[inline]
Expand Down Expand Up @@ -326,6 +342,14 @@ impl Extensions {
}
}

impl FromStr for Extensions {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

impl_writeable_for_each_subtag_str_no_test!(Extensions);

#[test]
Expand Down
6 changes: 2 additions & 4 deletions components/locid/src/extensions/other/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@
//! let mut loc: Locale = "en-US-a-foo-faa".parse().expect("Parsing failed.");
//! ```
mod subtag;

use crate::parser::ParserError;
use crate::parser::SubtagIterator;
use crate::shortvec::ShortBoxSlice;
use alloc::vec::Vec;
#[doc(inline)]
pub use subtag::{subtag, Subtag};
pub use crate::subtags::{subtag, Subtag};
use alloc::vec::Vec;

/// A list of [`Other Use Extensions`] as defined in [`Unicode Locale
/// Identifier`] specification.
Expand Down
54 changes: 54 additions & 0 deletions components/locid/src/extensions/unicode/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::iter::FromIterator;
use core::str::FromStr;
use litemap::LiteMap;
use writeable::Writeable;

use super::Key;
use super::Value;
#[allow(deprecated)]
use crate::ordering::SubtagOrderingResult;
use crate::parser::ParserError;
use crate::parser::SubtagIterator;
use crate::shortvec::ShortBoxSlice;

/// A list of [`Key`]-[`Value`] pairs representing functional information
Expand Down Expand Up @@ -92,6 +95,11 @@ impl Keywords {
))
}

pub fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);
Self::try_from_iter(&mut iter)
}

/// Returns `true` if there are no keywords.
///
/// # Examples
Expand Down Expand Up @@ -358,6 +366,44 @@ impl Keywords {
}
}

pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
let mut keywords = LiteMap::new();

let mut current_keyword = None;
let mut current_value = ShortBoxSlice::new();

while let Some(subtag) = iter.peek() {
let slen = subtag.len();
if slen == 2 {
if let Some(kw) = current_keyword.take() {
keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
current_value = ShortBoxSlice::new();
}
current_keyword = Some(Key::try_from_bytes(subtag)?);
} else if current_keyword.is_some() {
match Value::parse_subtag(subtag) {
Ok(Some(t)) => current_value.push(t),
Ok(None) => {}
Err(_) => break,
}
} else {
break;
}
iter.next();
}

if let Some(kw) = current_keyword.take() {
keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
}

Ok(keywords.into())
}

/// Produce an ordered iterator over key-value pairs
pub fn iter(&self) -> impl Iterator<Item = (&Key, &Value)> {
self.0.iter()
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
Expand Down Expand Up @@ -388,4 +434,12 @@ impl FromIterator<(Key, Value)> for Keywords {
}
}

impl FromStr for Keywords {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");
55 changes: 26 additions & 29 deletions components/locid/src/extensions/unicode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ mod keywords;
mod value;

use core::cmp::Ordering;
use core::str::FromStr;

#[doc(inline)]
pub use attribute::{attribute, Attribute};
Expand Down Expand Up @@ -103,6 +104,21 @@ impl Unicode {
}
}

/// A constructor which takes a utf8 slice, parses it and
/// produces a well-formed [`Unicode`] extension.
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::unicode::Unicode;
///
/// Unicode::try_from_bytes(b"hc-h12").unwrap();
/// ```
pub fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);
Self::try_from_iter(&mut iter)
}

/// Returns [`true`] if there list of keywords and attributes is empty.
///
/// # Examples
Expand Down Expand Up @@ -164,42 +180,15 @@ impl Unicode {
iter.next();
}

let mut keywords = LiteMap::new();

let mut current_keyword = None;
let mut current_value = ShortBoxSlice::new();

while let Some(subtag) = iter.peek() {
let slen = subtag.len();
if slen == 2 {
if let Some(kw) = current_keyword.take() {
keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
current_value = ShortBoxSlice::new();
}
current_keyword = Some(Key::try_from_bytes(subtag)?);
} else if current_keyword.is_some() {
match Value::parse_subtag(subtag) {
Ok(Some(t)) => current_value.push(t),
Ok(None) => {}
Err(_) => break,
}
} else {
break;
}
iter.next();
}

if let Some(kw) = current_keyword.take() {
keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
}
let keywords = Keywords::try_from_iter(iter)?;

// Ensure we've defined at least one attribute or keyword
if attributes.is_empty() && keywords.is_empty() {
return Err(ParserError::InvalidExtension);
}

Ok(Self {
keywords: keywords.into(),
keywords,
attributes: Attributes::from_short_slice_unchecked(attributes),
})
}
Expand All @@ -218,6 +207,14 @@ impl Unicode {
}
}

impl FromStr for Unicode {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

writeable::impl_display_with_writeable!(Unicode);

impl writeable::Writeable for Unicode {
Expand Down
43 changes: 42 additions & 1 deletion components/locid/src/extensions/unicode/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use crate::parser::{ParserError, SubtagIterator};
use crate::shortvec::ShortBoxSlice;
use crate::subtags::Subtag;
use core::ops::RangeInclusive;
use core::str::FromStr;
use tinystr::TinyAsciiStr;
Expand Down Expand Up @@ -32,7 +33,7 @@ use tinystr::TinyAsciiStr;
/// assert_eq!(value!("true").to_string(), "");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord, Default)]
pub struct Value(ShortBoxSlice<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);
pub struct Value(pub ShortBoxSlice<TinyAsciiStr<{ *VALUE_LENGTH.end() }>>);

const VALUE_LENGTH: RangeInclusive<usize> = 3..=8;
const TRUE_VALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
Expand Down Expand Up @@ -90,6 +91,37 @@ impl Value {
self.0.single()
}

pub fn push_tinystr(&mut self, value: TinyAsciiStr<8>) {
self.0.push(value);
}

pub fn extend(&mut self, rest: &Self) {
for i in rest.0.iter() {
self.0.push(i.clone());
}
}

pub fn is_empty(&self) -> bool {
self.0.is_empty()
}

pub fn len(&self) -> usize {
self.0.len()
}

pub fn remove_subtag(&mut self, idx: usize) -> Option<Subtag> {
if self.0.len() < idx {
None
} else {
let item = self.0.remove(idx);
Some(Subtag(item))
}
}

pub fn get_subtag(&self, idx: usize) -> Option<Subtag> {
self.0.get(idx).map(|s| Subtag(s.clone()))
}

#[doc(hidden)]
pub const fn from_tinystr(subtag: Option<TinyAsciiStr<8>>) -> Self {
match subtag {
Expand Down Expand Up @@ -191,6 +223,15 @@ macro_rules! extensions_unicode_value {
);
R
}};
($value:literal, $value2:literal) => {{
let v: &str = concat!($value, "-", $value2);
let R: $crate::extensions::unicode::Value =
match $crate::extensions::unicode::Value::try_from_bytes(v.as_bytes()) {
Ok(r) => r,
_ => panic!(concat!("Invalid Unicode extension value: ", $value)),
};
R
}};
}
#[doc(inline)]
pub use extensions_unicode_value as value;
2 changes: 1 addition & 1 deletion components/locid/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ macro_rules! impl_tinystr_subtag {
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[repr(transparent)]
$(#[$doc])*
pub struct $name(tinystr::TinyAsciiStr<$len_end>);
pub struct $name(pub tinystr::TinyAsciiStr<$len_end>);

impl $name {
/// A constructor which takes a UTF-8 slice, parses it and
Expand Down
2 changes: 1 addition & 1 deletion components/locid/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ mod locale;
mod macros;
mod ordering;
mod parser;
mod shortvec;
pub mod shortvec;

pub use langid::LanguageIdentifier;
pub use locale::Locale;
Expand Down
2 changes: 1 addition & 1 deletion components/locid/src/shortvec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ impl<T> Default for ShortBoxSliceInner<T> {
///
/// Supports mutation but always reallocs when mutated.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct ShortBoxSlice<T>(ShortBoxSliceInner<T>);
pub struct ShortBoxSlice<T>(ShortBoxSliceInner<T>);

impl<T> Default for ShortBoxSlice<T> {
fn default() -> Self {
Expand Down
35 changes: 35 additions & 0 deletions components/locid/src/subtags/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,38 @@ pub use script::{script, Script};
#[doc(inline)]
pub use variant::{variant, Variant};
pub use variants::Variants;

impl_tinystr_subtag!(
/// A single item used in a list of [`Private`](super::Private) extensions.
///
/// The subtag has to be an ASCII alphanumerical string no shorter than
/// one character and no longer than eight.
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::private::Subtag;
///
/// let subtag1: Subtag = "Foo".parse().expect("Failed to parse a Subtag.");
///
/// assert_eq!(subtag1.as_str(), "foo");
/// ```
Subtag,
subtags,
subtag,
subtags_subtag,
1..=8,
s,
s.is_ascii_alphanumeric(),
s.to_ascii_lowercase(),
s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
InvalidExtension,
["foo12"],
["toolooong"],
);

impl Subtag {
pub(crate) const fn valid_key(v: &[u8]) -> bool {
2 <= v.len() && v.len() <= 8
}
}
Loading

0 comments on commit 9063db8

Please sign in to comment.