-
Notifications
You must be signed in to change notification settings - Fork 183
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
2,494 additions
and
186 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
use smallvec::SmallVec; | ||
use zerotrie::ZeroTrie; | ||
use zerovec::ZeroVec; | ||
|
||
use crate::{ | ||
power::get_power, | ||
provider::{Base, MeasureUnitItem, SiPrefix}, | ||
si_prefix::{get_si_prefix_base_ten, get_si_prefix_base_two}, | ||
ConversionError, | ||
}; | ||
|
||
// TODO(#4369): split this struct to two structs: MeasureUnitParser for parsing the identifier and MeasureUnit to represent the unit. | ||
// TODO NOTE: the MeasureUnitParser takes the trie and the ConverterFactory takes the full payload and an instance of MeasureUnitParser. | ||
pub struct MeasureUnit { | ||
/// Contains the processed units. | ||
pub contained_units: SmallVec<[MeasureUnitItem; 8]>, | ||
} | ||
|
||
impl MeasureUnit { | ||
// TODO: complete all the cases for the prefixes. | ||
// TODO: consider using a trie for the prefixes. | ||
/// Extracts the SI prefix. | ||
/// NOTE: | ||
/// if the prefix is found, the function will return (SiPrefix, part without the prefix string). | ||
/// if the prefix is not found, the function will return (SiPrefix { power: 0, base: Base::Decimal }, part). | ||
fn get_si_prefix(part: &str) -> (SiPrefix, &str) { | ||
let (si_prefix_base_10, part) = get_si_prefix_base_ten(part); | ||
if si_prefix_base_10 != 0 { | ||
return ( | ||
SiPrefix { | ||
power: si_prefix_base_10, | ||
base: Base::Decimal, | ||
}, | ||
part, | ||
); | ||
} | ||
|
||
let (si_prefix_base_2, part) = get_si_prefix_base_two(part); | ||
if si_prefix_base_2 != 0 { | ||
return ( | ||
SiPrefix { | ||
power: si_prefix_base_2, | ||
base: Base::Binary, | ||
}, | ||
part, | ||
); | ||
} | ||
|
||
( | ||
SiPrefix { | ||
power: 0, | ||
base: Base::Decimal, | ||
}, | ||
part, | ||
) | ||
} | ||
|
||
/// Get the unit id. | ||
/// NOTE: | ||
/// if the unit id is found, the function will return (unit id, part without the unit id and without `-` at the beginning of the remaining part if it exists). | ||
/// if the unit id is not found, the function will return None. | ||
fn get_unit_id<'data>(part: &'data str, trie: &ZeroTrie<ZeroVec<'data, u8>>) -> Option<usize> { | ||
trie.get(part.as_bytes()) | ||
} | ||
|
||
/// Process a part of an identifier. | ||
/// For example, if the whole identifier is: "square-kilometer-per-second", | ||
/// this function will be called for "square-kilometer" with sign (1) and "second" with sign (-1). | ||
fn analyze_identifier_part( | ||
identifier_part: &str, | ||
sign: i8, | ||
result: &mut Vec<MeasureUnitItem>, | ||
trie: &ZeroTrie<ZeroVec<'_, u8>>, | ||
) -> Result<(), ConversionError> { | ||
if identifier_part.is_empty() { | ||
return Ok(()); | ||
} | ||
let mut identifier_split = identifier_part.split('-'); | ||
while let Some(mut part) = identifier_split.next() { | ||
let power = match get_power(part) { | ||
Some(power) => { | ||
part = identifier_split | ||
.next() | ||
.ok_or(ConversionError::InvalidUnit)?; | ||
power | ||
} | ||
None => 1, | ||
}; | ||
|
||
let (si_prefix, identifier_after_si) = Self::get_si_prefix(part); | ||
let unit_id = | ||
Self::get_unit_id(identifier_after_si, trie).ok_or(ConversionError::InvalidUnit)?; | ||
|
||
result.push(MeasureUnitItem { | ||
power: sign * power, | ||
si_prefix, | ||
unit_id: unit_id as u16, | ||
}); | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
// TODO: add test cases for this function. | ||
/// Process an identifier. | ||
pub fn try_from_identifier<'data>( | ||
identifier: &'data str, | ||
trie: &ZeroTrie<ZeroVec<'data, u8>>, | ||
) -> Result<Vec<MeasureUnitItem>, ConversionError> { | ||
if identifier.starts_with('-') { | ||
return Err(ConversionError::InvalidUnit); | ||
} | ||
|
||
let (num_part, den_part) = identifier | ||
.split_once("per-") | ||
.map(|(num_part, den_part)| (num_part.strip_suffix('-').unwrap_or(num_part), den_part)) | ||
.unwrap_or((identifier, "")); | ||
|
||
let mut measure_unit_items = Vec::<MeasureUnitItem>::new(); | ||
|
||
Self::analyze_identifier_part(num_part, 1, &mut measure_unit_items, trie)?; | ||
Self::analyze_identifier_part(den_part, -1, &mut measure_unit_items, trie)?; | ||
Ok(measure_unit_items) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
// TODO: consider returning Option<(u8, &str)> instead of (1, part) for the case when the power is not found. | ||
// TODO: complete all the cases for the powers. | ||
// TODO: consider using a trie for the powers. | ||
/// Converts a power string to a power. | ||
pub fn get_power(part: &str) -> Option<i8> { | ||
match part { | ||
"pow1" => Some(1), | ||
"square" | "pow2" => Some(2), | ||
"cubic" | "pow3" => Some(3), | ||
"pow4" => Some(4), | ||
"pow5" => Some(5), | ||
"pow6" => Some(6), | ||
"pow7" => Some(7), | ||
"pow8" => Some(8), | ||
"pow9" => Some(9), | ||
"pow10" => Some(10), | ||
"pow11" => Some(11), | ||
"pow12" => Some(12), | ||
"pow13" => Some(13), | ||
"pow14" => Some(14), | ||
"pow15" => Some(15), | ||
_ => None, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
// This file is part of ICU4X. For terms of use, please see the file | ||
// called LICENSE at the top level of the ICU4X source tree | ||
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
|
||
// TODO: consider returning Option<(i8, &str)> instead of (0, part) for the case when the prefix is not found. | ||
// TODO: consider using a trie for the prefixes. | ||
// TODO: complete all the cases for the prefixes. | ||
/// Extracts the SI prefix of base 10. | ||
/// NOTE: | ||
/// if the prefix is found, the function will return (power, part without the prefix). | ||
/// if the prefix is not found, the function will return (0, part). | ||
pub fn get_si_prefix_base_ten(part: &str) -> (i8, &str) { | ||
if let Some(part) = part.strip_prefix("quetta") { | ||
(30, part) | ||
} else if let Some(part) = part.strip_prefix("ronna") { | ||
(27, part) | ||
} else if let Some(part) = part.strip_prefix("yotta") { | ||
(24, part) | ||
} else if let Some(part) = part.strip_prefix("zetta") { | ||
(21, part) | ||
} else if let Some(part) = part.strip_prefix("exa") { | ||
(18, part) | ||
} else if let Some(part) = part.strip_prefix("peta") { | ||
(15, part) | ||
} else if let Some(part) = part.strip_prefix("tera") { | ||
(12, part) | ||
} else if let Some(part) = part.strip_prefix("giga") { | ||
(9, part) | ||
} else if let Some(part) = part.strip_prefix("mega") { | ||
(6, part) | ||
} else if let Some(part) = part.strip_prefix("kilo") { | ||
(3, part) | ||
} else if let Some(part) = part.strip_prefix("hecto") { | ||
(2, part) | ||
} else if let Some(part) = part.strip_prefix("deca") { | ||
(1, part) | ||
} else if let Some(part) = part.strip_prefix("deci") { | ||
(-1, part) | ||
} else if let Some(part) = part.strip_prefix("centi") { | ||
(-2, part) | ||
} else if let Some(part) = part.strip_prefix("milli") { | ||
(-3, part) | ||
} else if let Some(part) = part.strip_prefix("micro") { | ||
(-6, part) | ||
} else if let Some(part) = part.strip_prefix("nano") { | ||
(-9, part) | ||
} else if let Some(part) = part.strip_prefix("pico") { | ||
(-12, part) | ||
} else if let Some(part) = part.strip_prefix("femto") { | ||
(-15, part) | ||
} else if let Some(part) = part.strip_prefix("atto") { | ||
(-18, part) | ||
} else if let Some(part) = part.strip_prefix("zepto") { | ||
(-21, part) | ||
} else if let Some(part) = part.strip_prefix("yocto") { | ||
(-24, part) | ||
} else if let Some(part) = part.strip_prefix("ronto") { | ||
(-27, part) | ||
} else if let Some(part) = part.strip_prefix("quecto") { | ||
(-30, part) | ||
} else { | ||
(0, part) | ||
} | ||
} | ||
|
||
// TODO: consider returning Option<(i8, &str)> instead of (0, part) for the case when the prefix is not found. | ||
// TODO: consider using a trie for the prefixes. | ||
// TODO: complete all the cases for the prefixes. | ||
/// Extracts the SI prefix of base 2. | ||
/// NOTE: | ||
/// if the prefix is found, the function will return (power, part without the prefix). | ||
/// if the prefix is not found, the function will return (0, part). | ||
pub fn get_si_prefix_base_two(part: &str) -> (i8, &str) { | ||
if let Some(part) = part.strip_prefix("kibi") { | ||
(10, part) | ||
} else if let Some(part) = part.strip_prefix("mebi") { | ||
(20, part) | ||
} else if let Some(part) = part.strip_prefix("gibi") { | ||
(30, part) | ||
} else if let Some(part) = part.strip_prefix("tebi") { | ||
(40, part) | ||
} else if let Some(part) = part.strip_prefix("pebi") { | ||
(50, part) | ||
} else if let Some(part) = part.strip_prefix("exbi") { | ||
(60, part) | ||
} else if let Some(part) = part.strip_prefix("zebi") { | ||
(70, part) | ||
} else if let Some(part) = part.strip_prefix("yobi") { | ||
(80, part) | ||
} else { | ||
(0, part) | ||
} | ||
} |
Oops, something went wrong.