Skip to content

Commit

Permalink
Introduce icu_preferences
Browse files Browse the repository at this point in the history
  • Loading branch information
zbraniecki committed May 22, 2024
1 parent 31e085a commit e8bedc6
Show file tree
Hide file tree
Showing 47 changed files with 2,057 additions and 37 deletions.
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ members = [
"utils/ixdtf",
"utils/litemap",
"utils/pattern",
"utils/preferences",
"utils/resb",
"utils/tinystr",
"utils/tzif",
Expand Down
24 changes: 24 additions & 0 deletions components/locid/src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pub mod transform;
pub mod unicode;

use core::cmp::Ordering;
use core::str::FromStr;

use other::Other;
use private::Private;
Expand Down Expand Up @@ -137,6 +138,21 @@ impl Extensions {
}
}

/// A constructor which takes a utf8 slice, parses it and
/// produces a well-formed [`Extensons`].
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::Extensions;
///
/// Extensions::try_from_bytes(b"u-hc-h12").unwrap();
/// ```
pub fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);
Self::try_from_iter(&mut iter)
}

/// Function to create a new map of extensions containing exactly one unicode extension, callable in `const`
/// context.
#[inline]
Expand Down Expand Up @@ -326,6 +342,14 @@ impl Extensions {
}
}

impl FromStr for Extensions {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

impl_writeable_for_each_subtag_str_no_test!(Extensions);

#[test]
Expand Down
6 changes: 2 additions & 4 deletions components/locid/src/extensions/other/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,12 @@
//! let mut loc: Locale = "en-US-a-foo-faa".parse().expect("Parsing failed.");
//! ```
mod subtag;

use crate::parser::ParserError;
use crate::parser::SubtagIterator;
use crate::shortvec::ShortBoxSlice;
use alloc::vec::Vec;
#[doc(inline)]
pub use subtag::{subtag, Subtag};
pub use crate::subtags::{subtag, Subtag};
use alloc::vec::Vec;

/// A list of [`Other Use Extensions`] as defined in [`Unicode Locale
/// Identifier`] specification.
Expand Down
54 changes: 54 additions & 0 deletions components/locid/src/extensions/unicode/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::iter::FromIterator;
use core::str::FromStr;
use litemap::LiteMap;
use writeable::Writeable;

use super::Key;
use super::Value;
#[allow(deprecated)]
use crate::ordering::SubtagOrderingResult;
use crate::parser::ParserError;
use crate::parser::SubtagIterator;
use crate::shortvec::ShortBoxSlice;

/// A list of [`Key`]-[`Value`] pairs representing functional information
Expand Down Expand Up @@ -92,6 +95,11 @@ impl Keywords {
))
}

pub fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);
Self::try_from_iter(&mut iter)
}

/// Returns `true` if there are no keywords.
///
/// # Examples
Expand Down Expand Up @@ -358,6 +366,44 @@ impl Keywords {
}
}

pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParserError> {
let mut keywords = LiteMap::new();

let mut current_keyword = None;
let mut current_value = ShortBoxSlice::new();

while let Some(subtag) = iter.peek() {
let slen = subtag.len();
if slen == 2 {
if let Some(kw) = current_keyword.take() {
keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
current_value = ShortBoxSlice::new();
}
current_keyword = Some(Key::try_from_bytes(subtag)?);
} else if current_keyword.is_some() {
match Value::parse_subtag(subtag) {
Ok(Some(t)) => current_value.push(t),
Ok(None) => {}
Err(_) => break,
}
} else {
break;
}
iter.next();
}

if let Some(kw) = current_keyword.take() {
keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
}

Ok(keywords.into())
}

/// Produce an ordered iterator over key-value pairs
pub fn iter(&self) -> impl Iterator<Item = (&Key, &Value)> {
self.0.iter()
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
Expand Down Expand Up @@ -388,4 +434,12 @@ impl FromIterator<(Key, Value)> for Keywords {
}
}

impl FromStr for Keywords {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");
58 changes: 28 additions & 30 deletions components/locid/src/extensions/unicode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,25 @@ mod attribute;
mod attributes;
mod key;
mod keywords;
mod subdivision;
mod value;

use core::cmp::Ordering;
use core::str::FromStr;

#[doc(inline)]
pub use attribute::{attribute, Attribute};
pub use attributes::Attributes;
#[doc(inline)]
pub use key::{key, Key};
pub use keywords::Keywords;
pub use subdivision::{subdivision_suffix, SubdivisionId, SubdivisionSuffix};
#[doc(inline)]
pub use value::{value, Value};

use crate::parser::ParserError;
use crate::parser::SubtagIterator;
use crate::shortvec::ShortBoxSlice;
use litemap::LiteMap;

/// Unicode Extensions provide information about user preferences in a given locale.
///
Expand Down Expand Up @@ -103,6 +105,21 @@ impl Unicode {
}
}

/// A constructor which takes a utf8 slice, parses it and
/// produces a well-formed [`Unicode`] extension.
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::unicode::Unicode;
///
/// Unicode::try_from_bytes(b"hc-h12").unwrap();
/// ```
pub fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);
Self::try_from_iter(&mut iter)
}

/// Returns [`true`] if there list of keywords and attributes is empty.
///
/// # Examples
Expand Down Expand Up @@ -164,42 +181,15 @@ impl Unicode {
iter.next();
}

let mut keywords = LiteMap::new();

let mut current_keyword = None;
let mut current_value = ShortBoxSlice::new();

while let Some(subtag) = iter.peek() {
let slen = subtag.len();
if slen == 2 {
if let Some(kw) = current_keyword.take() {
keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
current_value = ShortBoxSlice::new();
}
current_keyword = Some(Key::try_from_bytes(subtag)?);
} else if current_keyword.is_some() {
match Value::parse_subtag(subtag) {
Ok(Some(t)) => current_value.push(t),
Ok(None) => {}
Err(_) => break,
}
} else {
break;
}
iter.next();
}

if let Some(kw) = current_keyword.take() {
keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
}
let keywords = Keywords::try_from_iter(iter)?;

// Ensure we've defined at least one attribute or keyword
if attributes.is_empty() && keywords.is_empty() {
return Err(ParserError::InvalidExtension);
}

Ok(Self {
keywords: keywords.into(),
keywords,
attributes: Attributes::from_short_slice_unchecked(attributes),
})
}
Expand All @@ -218,6 +208,14 @@ impl Unicode {
}
}

impl FromStr for Unicode {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

writeable::impl_display_with_writeable!(Unicode);

impl writeable::Writeable for Unicode {
Expand Down
110 changes: 110 additions & 0 deletions components/locid/src/extensions/unicode/subdivision.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use core::str::FromStr;

use crate::parser::ParserError;
use crate::subtags::Region;

impl_tinystr_subtag!(
/// An subdivision suffix used in a set of [`SubdivisionId`].
///
/// An subdivision suffix has to be a sequence of alphanumerical characters no
/// shorter than one and no longer than four characters.
///
///
/// # Examples
///
/// ```
/// use icu::locid::extensions::unicode::{subdivision_suffix, SubdivisionSuffix};
///
/// let ss: SubdivisionSuffix =
/// "sct".parse().expect("Failed to parse a SubdivisionSuffix.");
///
/// assert_eq!(ss, subdivision_suffix!("sct"));
/// ```
SubdivisionSuffix,
extensions::unicode,
subdivision_suffix,
extensions_unicode_subdivision_suffix,
1..=4,
s,
s.is_ascii_alphanumeric(),
s.to_ascii_lowercase(),
s.is_ascii_alphanumeric() && s.is_ascii_lowercase(),
InvalidExtension,
["sct"],
["toolooong"],
);

/// A SubDivision Id as defined in [`Unicode Locale Identifier`].
///
/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/tr35.html#unicode_subdivision_id
///
/// # Examples
///
/// ```
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
#[non_exhaustive]
pub struct SubdivisionId {
pub region: Region,
pub suffix: SubdivisionSuffix,
}

impl SubdivisionId {
pub const fn new(region: Region, suffix: SubdivisionSuffix) -> Self {
Self { region, suffix }
}

pub fn try_from_bytes(input: &[u8]) -> Result<Self, ParserError> {
let is_alpha = if let Some(b) = input.first() {
if b.is_ascii_digit() {
false
} else if b.is_ascii_alphabetic() {
true
} else {
return Err(ParserError::InvalidExtension);
}
} else {
return Err(ParserError::InvalidExtension);
};
let region_len = if is_alpha { 2 } else { 3 };
if input.len() < region_len + 1 {
return Err(ParserError::InvalidExtension);
}
let (region_bytes, suffix_bytes) = input.split_at(region_len);
let region =
Region::try_from_bytes(region_bytes).map_err(|_| ParserError::InvalidExtension)?;
let suffix = SubdivisionSuffix::try_from_bytes(suffix_bytes)?;
Ok(Self { region, suffix })
}
}

impl writeable::Writeable for SubdivisionId {
#[inline]
fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
sink.write_str(self.region.as_str())?;
sink.write_str(self.suffix.as_str())
}

#[inline]
fn writeable_length_hint(&self) -> writeable::LengthHint {
todo!()
}
#[inline]
fn write_to_string(&self) -> alloc::borrow::Cow<str> {
todo!()
}
}

writeable::impl_display_with_writeable!(SubdivisionId);

impl FromStr for SubdivisionId {
type Err = ParserError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(s.as_bytes())
}
}
Loading

0 comments on commit e8bedc6

Please sign in to comment.