From 1bda4d4e093120b42d00ecfcc5a8a5a6e165f812 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 23 Aug 2023 17:40:53 +0200 Subject: [PATCH 001/104] Add the needed helper and the structure --- experimental/unitsconversion/src/provider.rs | 29 +++ .../src/transform/cldr/units/helpers.rs | 227 ++++++++++++++++++ .../datagen/src/transform/cldr/units/mod.rs | 2 + 3 files changed, 258 insertions(+) create mode 100644 provider/datagen/src/transform/cldr/units/helpers.rs diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 2608f3950d7..2a1395f9514 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -35,3 +35,32 @@ pub struct UnitsConstantsV1<'data> { #[cfg_attr(feature = "serde", serde(borrow))] pub constants_map: ZeroMap<'data, str, str>, } + +#[zerovec::make_ule(ConstantTypeULE)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_unitsconversion::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] +#[repr(u8)] +pub enum ConstantType { + #[default] + Actual = 0, + Approximate = 1, +} + +#[zerovec::make_ule(ConstantValueULE)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_unitsconversion::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] +pub struct ConstantValue { + pub numerator: u32, + pub denominator: u32, + pub constant_type: ConstantType, +} diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs new file mode 100644 index 00000000000..6b7c94803ba --- /dev/null +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -0,0 +1,227 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use icu_datetime::input; +use icu_provider::DataError; + +/// Removes all whitespace from a string. +fn remove_whitespace(s: &str) -> String { + s.chars().filter(|c| !c.is_whitespace()).collect() +} + +/// Split a string into a vector of strings, using specific characters as delimiters. +fn split_string(s: &str, delimiters: Vec<&str>) -> Vec { + let mut result = vec![]; + let mut current = String::new(); + for c in s.chars() { + if delimiters.contains(&c.to_string().as_str()) { + result.push(current.clone()); + current.clear(); + } else { + current.push(c); + } + } + result.push(current); + result +} + +/// Returns the greatest common divisor of two numbers. +fn gcd(a: u64, b: u64) -> u64 { + if b == 0 { + return a; + } + gcd(b, a % b) +} + +/// Checks if a string represents a decimal number. +fn check_if_decimal_number(number: &str) -> bool { + if number.chars().next().unwrap_or('0') == '-' || number.chars().next().unwrap_or('0') == '+' { + return check_if_decimal_number(&number[1..]); + } + + let mut split = number.split('.'); + if split.clone().count() > 2 { + return false; + } + let whole = split.next().unwrap_or("0"); + let fractional = split.next().unwrap_or("0"); + if whole.chars().any(|c| !c.is_digit(10)) { + return false; + } + if fractional.chars().any(|c| !c.is_digit(10)) { + return false; + } + true +} + +/// Converts a rational number represented as a string into a tuple of (numerator, denominator). +fn to_fractional(number: &str) -> Result<(u64, u64), DataError> { + if !check_if_decimal_number(number) { + return Err(DataError::custom("the number is not a decimal number")); + } + + let mut split = number.split('.'); + let whole = split.next().unwrap_or("0"); + let fractional = split.next().unwrap_or("0"); + let mut denominator = 1; + for _ in 0..fractional.len() { + denominator *= 10; + } + let numerator = whole.to_string() + fractional; + let numerator = numerator.parse::().unwrap_or(0); + let gcd = gcd(numerator, denominator); + Ok((numerator / gcd, denominator / gcd)) +} + +/// Checks if a string represents a scientific notation number. +fn check_if_scientific_notation_number(number: &str) -> bool { + let mut split = number.split('E'); + if split.clone().count() > 2 { + return false; + } + + let base = split.next().unwrap_or("0"); + let exponent = split.next().unwrap_or("0"); + check_if_decimal_number(base) && check_if_decimal_number(exponent) +} + +/// Converts a scientific notation number represented as a string into a tuple of (numerator, denominator). +fn convert_scientific_notation_number_to_fractional(number: &str) -> Result<(u64, u64), DataError> { + if !check_if_scientific_notation_number(number) { + return Err(DataError::custom( + "the number is not a scientific notation number", + )); + } + + let mut split = number.split('E'); + let base = split.next().unwrap_or("0"); + let exponent = split.next().unwrap_or("0"); + + let (mut numerator, mut denominator) = to_fractional(base).unwrap(); + let exponent = exponent.parse::().unwrap(); + if exponent > 0 { + for _ in 0..exponent { + numerator *= 10; + } + } else { + for _ in 0..(-exponent) { + denominator *= 10; + } + } + let gcd = gcd(numerator, denominator); + Ok((numerator / gcd, denominator / gcd)) +} + +#[test] +fn test_remove_whitespace() { + let input = "He llo Wo rld!"; + let expected = "HelloWorld!"; + let actual = remove_whitespace(input); + assert_eq!(expected, actual); +} + +#[test] +fn test_split_string() { + let input = "Hello,World!/in/ICU4X"; + let expected = vec![ + "Hello".to_string(), + "World!".to_string(), + "in".to_string(), + "ICU4X".to_string(), + ]; + let actual = split_string(input, vec![",", "/"]); + assert_eq!(expected, actual); + + let input = "ft3_to_m3/12*12*12"; + let expected = vec![ + "ft3_to_m3".to_string(), + "12".to_string(), + "12".to_string(), + "12".to_string(), + ]; + let actual = split_string(input, vec!["/", "*"]); + assert_eq!(expected, actual); +} + +#[test] +fn test_gcd() { + let input = (12, 8); + let expected = 4; + let actual = gcd(input.0, input.1); + assert_eq!(expected, actual); + + let input = (8, 12); + let expected = 4; + let actual = gcd(input.0, input.1); + assert_eq!(expected, actual); + + let input = (12, 0); + let expected = 12; + let actual = gcd(input.0, input.1); + assert_eq!(expected, actual); + + let input = (0, 12); + let expected = 12; + let actual = gcd(input.0, input.1); + assert_eq!(expected, actual); +} + +#[test] +fn test_to_fractional() { + let input = "1.5"; + let expected = (3, 2); + let actual = to_fractional(input); + assert_eq!(expected, actual.unwrap()); + + let input = "1.25"; + let expected = (5, 4); + let actual = to_fractional(input); + assert_eq!(expected, actual.unwrap()); + + let input = "1.125"; + let expected = (9, 8); + let actual = to_fractional(input); + assert_eq!(expected, actual.unwrap()); + + let input = "1.0625"; + let expected = (17, 16); + let actual = to_fractional(input); + assert_eq!(expected, actual.unwrap()); + + let input = "0.000003333"; + let expected = (3333, 1000000000); + let actual = to_fractional(input); + assert_eq!(expected, actual.unwrap()); + + let input = "11111"; + let expected = (11111, 1); + let actual = to_fractional(input); + assert_eq!(expected, actual.unwrap()); + + let input = "1,000,000.5"; + let actual = to_fractional(input); + assert!(actual.is_err()); + + let input = "1.5.5"; + let actual = to_fractional(input); + assert!(actual.is_err()); +} + +#[test] +fn test_convert_scientific_notation_number_to_fractional() { + let input = "1.5E1"; + let expected = (15, 1); + let actual = convert_scientific_notation_number_to_fractional(input); + assert_eq!(expected, actual.unwrap()); + + let input = "1.5E-1"; + let expected = (3, 20); + let actual = convert_scientific_notation_number_to_fractional(input); + assert_eq!(expected, actual.unwrap()); + + let input = "1.5E-2"; + let expected = (3, 200); + let actual = convert_scientific_notation_number_to_fractional(input); + assert_eq!(expected, actual.unwrap()); +} diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 353a1b4d549..7127a1b697e 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -2,6 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +mod helpers; + use std::collections::BTreeMap; use crate::transform::cldr::cldr_serde; From 8b97948d6019fabce90855164560d8c3ea10561b Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 23 Aug 2023 17:45:01 +0200 Subject: [PATCH 002/104] fix clippy --- provider/datagen/src/transform/cldr/units/helpers.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 6b7c94803ba..a561ed22cbd 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use icu_datetime::input; + use icu_provider::DataError; /// Removes all whitespace from a string. @@ -46,10 +46,10 @@ fn check_if_decimal_number(number: &str) -> bool { } let whole = split.next().unwrap_or("0"); let fractional = split.next().unwrap_or("0"); - if whole.chars().any(|c| !c.is_digit(10)) { + if whole.chars().any(|c| !c.is_ascii_digit()) { return false; } - if fractional.chars().any(|c| !c.is_digit(10)) { + if fractional.chars().any(|c| !c.is_ascii_digit()) { return false; } true From 1ac078a00a4cd57227bfa0338fbc93294262565c Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 23 Aug 2023 17:47:20 +0200 Subject: [PATCH 003/104] change signature --- .../datagen/src/transform/cldr/units/helpers.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index a561ed22cbd..eddd41e6f6f 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -6,12 +6,12 @@ use icu_provider::DataError; /// Removes all whitespace from a string. -fn remove_whitespace(s: &str) -> String { +pub fn remove_whitespace(s: &str) -> String { s.chars().filter(|c| !c.is_whitespace()).collect() } /// Split a string into a vector of strings, using specific characters as delimiters. -fn split_string(s: &str, delimiters: Vec<&str>) -> Vec { +pub fn split_string(s: &str, delimiters: Vec<&str>) -> Vec { let mut result = vec![]; let mut current = String::new(); for c in s.chars() { @@ -27,7 +27,7 @@ fn split_string(s: &str, delimiters: Vec<&str>) -> Vec { } /// Returns the greatest common divisor of two numbers. -fn gcd(a: u64, b: u64) -> u64 { +pub fn gcd(a: u64, b: u64) -> u64 { if b == 0 { return a; } @@ -35,7 +35,7 @@ fn gcd(a: u64, b: u64) -> u64 { } /// Checks if a string represents a decimal number. -fn check_if_decimal_number(number: &str) -> bool { +pub fn check_if_decimal_number(number: &str) -> bool { if number.chars().next().unwrap_or('0') == '-' || number.chars().next().unwrap_or('0') == '+' { return check_if_decimal_number(&number[1..]); } @@ -56,7 +56,7 @@ fn check_if_decimal_number(number: &str) -> bool { } /// Converts a rational number represented as a string into a tuple of (numerator, denominator). -fn to_fractional(number: &str) -> Result<(u64, u64), DataError> { +pub fn to_fractional(number: &str) -> Result<(u64, u64), DataError> { if !check_if_decimal_number(number) { return Err(DataError::custom("the number is not a decimal number")); } @@ -75,7 +75,7 @@ fn to_fractional(number: &str) -> Result<(u64, u64), DataError> { } /// Checks if a string represents a scientific notation number. -fn check_if_scientific_notation_number(number: &str) -> bool { +pub fn check_if_scientific_notation_number(number: &str) -> bool { let mut split = number.split('E'); if split.clone().count() > 2 { return false; @@ -87,7 +87,7 @@ fn check_if_scientific_notation_number(number: &str) -> bool { } /// Converts a scientific notation number represented as a string into a tuple of (numerator, denominator). -fn convert_scientific_notation_number_to_fractional(number: &str) -> Result<(u64, u64), DataError> { +pub fn convert_scientific_notation_number_to_fractional(number: &str) -> Result<(u64, u64), DataError> { if !check_if_scientific_notation_number(number) { return Err(DataError::custom( "the number is not a scientific notation number", From ce273391929396e2120581c82ed9ff4e7028d51c Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 24 Aug 2023 15:50:23 +0200 Subject: [PATCH 004/104] Add all the helpers functions --- experimental/unitsconversion/src/helpers.rs | 11 ++ experimental/unitsconversion/src/lib.rs | 16 +++ experimental/unitsconversion/src/provider.rs | 59 ++++++++++ .../src/transform/cldr/units/helpers.rs | 107 ++++++++++++++++-- 4 files changed, 185 insertions(+), 8 deletions(-) create mode 100644 experimental/unitsconversion/src/helpers.rs diff --git a/experimental/unitsconversion/src/helpers.rs b/experimental/unitsconversion/src/helpers.rs new file mode 100644 index 00000000000..30cbd63cde1 --- /dev/null +++ b/experimental/unitsconversion/src/helpers.rs @@ -0,0 +1,11 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +/// Returns the greatest common divisor of two numbers. +pub fn gcd(a: u64, b: u64) -> u64 { + if b == 0 { + return a; + } + gcd(b, a % b) +} diff --git a/experimental/unitsconversion/src/lib.rs b/experimental/unitsconversion/src/lib.rs index 529e07a2325..aa1625da219 100644 --- a/experimental/unitsconversion/src/lib.rs +++ b/experimental/unitsconversion/src/lib.rs @@ -4,6 +4,22 @@ //! Experimental. +use displaydoc::Display; + extern crate alloc; pub mod provider; +pub mod helpers; + + + +#[derive(Display, Debug, PartialEq)] +#[non_exhaustive] +pub enum Error { + #[displaydoc("Magnitude or number of digits exceeded")] + Limit, +} + +#[cfg(feature = "std")] +impl std::error::Error for Error {} + diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 2a1395f9514..b023f1b9cf9 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -12,6 +12,8 @@ use icu_provider::prelude::*; use zerovec::ZeroMap; +use crate::{helpers::gcd, Error}; + /// This type contains all of the constants data for units conversion. /// ///
@@ -64,3 +66,60 @@ pub struct ConstantValue { pub denominator: u32, pub constant_type: ConstantType, } + + +impl ConstantValue { + pub fn multiply(&self, other: &ConstantValue) -> Result { + let numerator = self.numerator as u64 * other.numerator as u64; + let denominator = self.denominator as u64 * other.denominator as u64; + let gcd = gcd(numerator, denominator); + + let numerator = match u32::try_from(numerator / gcd) { + Ok(numerator) => numerator, + Err(_) => return Err(Error::Limit), + }; + + let denominator = match u32::try_from(denominator / gcd) { + Ok(denominator) => denominator, + Err(_) => return Err(Error::Limit), + }; + + let constant_type = match (self.constant_type, other.constant_type) { + (ConstantType::Actual, ConstantType::Actual) => ConstantType::Actual, + _ => ConstantType::Approximate, + }; + + Ok(ConstantValue { + numerator, + denominator, + constant_type, + }) + } + + pub fn divide(&self, other: &ConstantValue) -> Result { + let numerator = self.numerator as u64 * other.denominator as u64; + let denominator = self.denominator as u64 * other.numerator as u64; + let gcd = gcd(numerator, denominator); + + let numerator = match u32::try_from(numerator / gcd) { + Ok(numerator) => numerator, + Err(_) => return Err(Error::Limit), + }; + + let denominator = match u32::try_from(denominator / gcd) { + Ok(denominator) => denominator, + Err(_) => return Err(Error::Limit), + }; + + let constant_type = match (self.constant_type, other.constant_type) { + (ConstantType::Actual, ConstantType::Actual) => ConstantType::Actual, + _ => ConstantType::Approximate, + }; + + Ok(ConstantValue { + numerator, + denominator, + constant_type, + }) + } +} \ No newline at end of file diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index eddd41e6f6f..2a6b9d63337 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -2,8 +2,11 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use std::collections::BTreeMap; use icu_provider::DataError; +use icu_unitsconversion::{provider::{ConstantValue, ConstantType}, helpers::gcd, Error}; + /// Removes all whitespace from a string. pub fn remove_whitespace(s: &str) -> String { @@ -26,13 +29,6 @@ pub fn split_string(s: &str, delimiters: Vec<&str>) -> Vec { result } -/// Returns the greatest common divisor of two numbers. -pub fn gcd(a: u64, b: u64) -> u64 { - if b == 0 { - return a; - } - gcd(b, a % b) -} /// Checks if a string represents a decimal number. pub fn check_if_decimal_number(number: &str) -> bool { @@ -87,7 +83,9 @@ pub fn check_if_scientific_notation_number(number: &str) -> bool { } /// Converts a scientific notation number represented as a string into a tuple of (numerator, denominator). -pub fn convert_scientific_notation_number_to_fractional(number: &str) -> Result<(u64, u64), DataError> { +pub fn convert_scientific_notation_number_to_fractional( + number: &str, +) -> Result<(u64, u64), DataError> { if !check_if_scientific_notation_number(number) { return Err(DataError::custom( "the number is not a scientific notation number", @@ -113,6 +111,9 @@ pub fn convert_scientific_notation_number_to_fractional(number: &str) -> Result< Ok((numerator / gcd, denominator / gcd)) } + + + #[test] fn test_remove_whitespace() { let input = "He llo Wo rld!"; @@ -167,6 +168,91 @@ fn test_gcd() { assert_eq!(expected, actual); } +/// Converts a string representing a constant value into a tuple of (numerator, denominator). +/// For example: input = "6.67408E-11", output will be = (41713, 625000000000000) +/// input = "ft_to_m", output will be error. +pub fn convert_constant_value_in_scientific_to_fractional(input: &str, constant_type: ConstantType) -> Result { + if !check_if_scientific_notation_number(input) { + return Err(DataError::custom( + "the number is not a scientific notation number", + )); + } + + let (numerator, denominator) = match convert_scientific_notation_number_to_fractional(input) { + Ok((numerator, denominator)) => (numerator, denominator), + Err(e) => return Err(e), + }; + + let gcd = gcd(numerator, denominator); + + let numerator = match u32::try_from(numerator / gcd) { + Ok(numerator) => numerator, + Err(_) => return Err(DataError::custom("the numerator is too large")), + }; + + let denominator = match u32::try_from(denominator / gcd) { + Ok(denominator) => denominator, + Err(_) => return Err(DataError::custom("the denominator is too large")), + }; + + Ok(ConstantValue { + numerator, + denominator, + constant_type, + }) +} + +pub fn convert_any_constant_value_to_fractional(constant_str :&str, constants_map : BTreeMap<&str, ConstantValue> ,constant_type: ConstantType) -> Result { + let constant_string_cleaned = remove_whitespace(constant_str); + let fraction_str = split_string(constant_str, vec!["/"]); + let numerator_strs = split_string(&fraction_str[0].to_string(),vec!["*"] ); + let denominator_strs = split_string(&fraction_str[1].to_string(),vec!["*"] ); + + let mut result = ConstantValue { + numerator: 1, + denominator: 1, + constant_type, + }; + + for numerator_str in numerator_strs { + let numerator = match constants_map.get(numerator_str.as_str()) { + Some(numerator) => *numerator, + None => match convert_constant_value_in_scientific_to_fractional(numerator_str.as_str(), constant_type) { + Ok(numerator) => numerator, + Err(e) => return Err(e), + }, + }; + result = match result.multiply(&numerator) { + Ok(result) => result, + Err(e) => return match e { + Error::Limit => Err(DataError::custom("calculations exceeded the limit")), + _ => Err(DataError::custom("the numerator is too large")), + }, + }; + } + + for denominator_str in denominator_strs { + let denominator = match constants_map.get(denominator_str.as_str()) { + Some(denominator) => *denominator, + None => match convert_constant_value_in_scientific_to_fractional(denominator_str.as_str(), constant_type) { + Ok(denominator) => denominator, + Err(e) => return Err(e), + }, + }; + result = match result.divide(&denominator) { + Ok(result) => result, + Err(e) => return match e { + Error::Limit => Err(DataError::custom("calculations exceeded the limit")), + _ => Err(DataError::custom("the denominator is too large")), + }, + }; + } + + Ok(result) + +} + + #[test] fn test_to_fractional() { let input = "1.5"; @@ -224,4 +310,9 @@ fn test_convert_scientific_notation_number_to_fractional() { let expected = (3, 200); let actual = convert_scientific_notation_number_to_fractional(input); assert_eq!(expected, actual.unwrap()); + + let input = "6.67408E-11"; + let expected = (41713, 625000000000000); + let actual = convert_scientific_notation_number_to_fractional(input); + assert_eq!(expected, actual.unwrap()); } From f7852517c71ba013d614ce0f40444e44e5e557ec Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 24 Aug 2023 16:44:35 +0200 Subject: [PATCH 005/104] Use the helpers to generate the constant value --- experimental/unitsconversion/src/provider.rs | 2 +- .../src/transform/cldr/units/helpers.rs | 66 +++++++++++------- .../datagen/src/transform/cldr/units/mod.rs | 67 +++++++++++++++++-- 3 files changed, 101 insertions(+), 34 deletions(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index b023f1b9cf9..2b424ddb074 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -35,7 +35,7 @@ pub struct UnitsConstantsV1<'data> { // Also, the constant types. /// Maps from constant name (e.g. ft_to_m) to the value of the constant (e.g. 0.3048). #[cfg_attr(feature = "serde", serde(borrow))] - pub constants_map: ZeroMap<'data, str, str>, + pub constants_map: ZeroMap<'data, str, ConstantValue>, } #[zerovec::make_ule(ConstantTypeULE)] diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 2a6b9d63337..3b5668e5344 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -5,8 +5,11 @@ use std::collections::BTreeMap; use icu_provider::DataError; -use icu_unitsconversion::{provider::{ConstantValue, ConstantType}, helpers::gcd, Error}; - +use icu_unitsconversion::{ + helpers::gcd, + provider::{ConstantType, ConstantValue}, + Error, +}; /// Removes all whitespace from a string. pub fn remove_whitespace(s: &str) -> String { @@ -29,7 +32,6 @@ pub fn split_string(s: &str, delimiters: Vec<&str>) -> Vec { result } - /// Checks if a string represents a decimal number. pub fn check_if_decimal_number(number: &str) -> bool { if number.chars().next().unwrap_or('0') == '-' || number.chars().next().unwrap_or('0') == '+' { @@ -111,9 +113,6 @@ pub fn convert_scientific_notation_number_to_fractional( Ok((numerator / gcd, denominator / gcd)) } - - - #[test] fn test_remove_whitespace() { let input = "He llo Wo rld!"; @@ -171,7 +170,10 @@ fn test_gcd() { /// Converts a string representing a constant value into a tuple of (numerator, denominator). /// For example: input = "6.67408E-11", output will be = (41713, 625000000000000) /// input = "ft_to_m", output will be error. -pub fn convert_constant_value_in_scientific_to_fractional(input: &str, constant_type: ConstantType) -> Result { +pub fn convert_constant_value_in_scientific_to_fractional( + input: &str, + constant_type: ConstantType, +) -> Result { if !check_if_scientific_notation_number(input) { return Err(DataError::custom( "the number is not a scientific notation number", @@ -202,13 +204,17 @@ pub fn convert_constant_value_in_scientific_to_fractional(input: &str, constant_ }) } -pub fn convert_any_constant_value_to_fractional(constant_str :&str, constants_map : BTreeMap<&str, ConstantValue> ,constant_type: ConstantType) -> Result { - let constant_string_cleaned = remove_whitespace(constant_str); - let fraction_str = split_string(constant_str, vec!["/"]); - let numerator_strs = split_string(&fraction_str[0].to_string(),vec!["*"] ); - let denominator_strs = split_string(&fraction_str[1].to_string(),vec!["*"] ); - - let mut result = ConstantValue { +pub fn convert_any_constant_value_to_fractional( + constant_str: &str, + constants_map: &BTreeMap<&str, ConstantValue>, + constant_type: ConstantType, +) -> Result { + let constant_string_cleaned = remove_whitespace(constant_str); + let fraction_str = split_string(constant_string_cleaned.as_str(), vec!["/"]); + let numerator_strs = split_string(&fraction_str[0].to_string(), vec!["*"]); + let denominator_strs = split_string(&fraction_str[1].to_string(), vec!["*"]); + + let mut result = ConstantValue { numerator: 1, denominator: 1, constant_type, @@ -217,42 +223,50 @@ pub fn convert_any_constant_value_to_fractional(constant_str :&str, constants_ma for numerator_str in numerator_strs { let numerator = match constants_map.get(numerator_str.as_str()) { Some(numerator) => *numerator, - None => match convert_constant_value_in_scientific_to_fractional(numerator_str.as_str(), constant_type) { + None => match convert_constant_value_in_scientific_to_fractional( + numerator_str.as_str(), + constant_type, + ) { Ok(numerator) => numerator, Err(e) => return Err(e), }, }; result = match result.multiply(&numerator) { Ok(result) => result, - Err(e) => return match e { - Error::Limit => Err(DataError::custom("calculations exceeded the limit")), - _ => Err(DataError::custom("the numerator is too large")), - }, + Err(e) => { + return match e { + Error::Limit => Err(DataError::custom("calculations exceeded the limit")), + _ => Err(DataError::custom("the numerator is too large")), + } + } }; } for denominator_str in denominator_strs { let denominator = match constants_map.get(denominator_str.as_str()) { Some(denominator) => *denominator, - None => match convert_constant_value_in_scientific_to_fractional(denominator_str.as_str(), constant_type) { + None => match convert_constant_value_in_scientific_to_fractional( + denominator_str.as_str(), + constant_type, + ) { Ok(denominator) => denominator, Err(e) => return Err(e), }, }; result = match result.divide(&denominator) { Ok(result) => result, - Err(e) => return match e { - Error::Limit => Err(DataError::custom("calculations exceeded the limit")), - _ => Err(DataError::custom("the denominator is too large")), - }, + Err(e) => { + return match e { + Error::Limit => Err(DataError::custom("calculations exceeded the limit")), + _ => Err(DataError::custom("the denominator is too large")), + } + } }; } Ok(result) - } - #[test] fn test_to_fractional() { let input = "1.5"; diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 7127a1b697e..6d88c5d62ef 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -11,8 +11,14 @@ use icu_provider::{ datagen::IterableDataProvider, DataError, DataLocale, DataPayload, DataProvider, DataRequest, DataResponse, }; -use icu_unitsconversion::provider::{UnitsConstantsV1, UnitsConstantsV1Marker}; -use zerovec::ZeroMap; +use icu_unitsconversion::provider::{ + ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, +}; +use zerovec::{ZeroMap, ule::AsULE}; + +use self::helpers::{ + convert_any_constant_value_to_fractional, convert_constant_value_in_scientific_to_fractional, +}; impl DataProvider for crate::DatagenProvider { fn load(&self, _req: DataRequest) -> Result, DataError> { @@ -22,11 +28,47 @@ impl DataProvider for crate::DatagenProvider { .cldr()? .core() .read_and_parse("supplemental/units.json")?; - let mut constants_map = BTreeMap::<&str, &str>::new(); + let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); let constants = &_units_data.supplemental.unit_constants.constants; + let mut constants_need_map = Vec::<(&str, &str, ConstantType)>::new(); for (key, constant) in constants { - constants_map.insert(key, &constant.value); + let constant_type = match &constant.status { + Some(status) => match status.as_str() { + "approximate" => ConstantType::Approximate, + _ => return Err(DataError::custom("Unknown constant type")), + }, + None => ConstantType::Actual, + }; + + let constant_str = constant.value.as_str(); + let constant_value = match convert_constant_value_in_scientific_to_fractional( + constant_str, + constant_type, + ) { + Ok(value) => value, + Err(_) => { + constants_need_map.push((key, constant_str, constant_type)); + continue; + } + }; + + constants_map.insert(key, constant_value); + } + + for (key, constant_str, constant_type) in constants_need_map { + let constant_value = convert_any_constant_value_to_fractional( + constant_str, + &constants_map, + constant_type, + ); + + match constant_value { + Ok(constant_value) => { + constants_map.insert(key, constant_value); + } + Err(_) => return Err(DataError::custom("Failed to convert constant_str").with_debug_context(constant_str)), + } } let result = UnitsConstantsV1 { @@ -63,8 +105,19 @@ fn test_basic() { .take_payload() .unwrap(); - let constants = &und.get().to_owned().constants_map; + // let constants = &und.get().to_owned().constants_map; + + // let ft_to_m : ConstantValue = constants.get("ft_to_m").unwrap(); + + // assert_eq!(ft_to_m , ConstantValue { + // numerator: 3048, + // denominator: 10000, + // constant_type: ConstantType::Actual, + // }); - assert_eq!(constants.get("ft_to_m").unwrap(), "0.3048"); - assert_eq!(constants.get("ft2_to_m2").unwrap(), "ft_to_m*ft_to_m"); + // assert!(constants.get("ft2_to_m2").eq( ConstantValue { + // numerator: 3048, + // denominator: 10000, + // constant_type: ConstantType::Actual, + // })); } From 2114087a3aacb8307227a24d060acc4434188e6f Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 24 Aug 2023 17:06:50 +0200 Subject: [PATCH 006/104] fix --- .../datagen/src/transform/cldr/units/helpers.rs | 13 ++++++++++++- .../data/cldr/cldr-core/supplemental/units.json | 10 ---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 3b5668e5344..c33871b13d6 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -103,10 +103,16 @@ pub fn convert_scientific_notation_number_to_fractional( if exponent > 0 { for _ in 0..exponent { numerator *= 10; + let gcd = gcd(numerator, denominator); + numerator /= gcd; + denominator /= gcd; } } else { for _ in 0..(-exponent) { denominator *= 10; + let gcd = gcd(numerator, denominator); + numerator /= gcd; + denominator /= gcd; } } let gcd = gcd(numerator, denominator); @@ -212,7 +218,7 @@ pub fn convert_any_constant_value_to_fractional( let constant_string_cleaned = remove_whitespace(constant_str); let fraction_str = split_string(constant_string_cleaned.as_str(), vec!["/"]); let numerator_strs = split_string(&fraction_str[0].to_string(), vec!["*"]); - let denominator_strs = split_string(&fraction_str[1].to_string(), vec!["*"]); + let denominator_strs = split_string(&fraction_str.get(1).unwrap_or(&"".to_string()), vec!["*"]); let mut result = ConstantValue { numerator: 1, @@ -329,4 +335,9 @@ fn test_convert_scientific_notation_number_to_fractional() { let expected = (41713, 625000000000000); let actual = convert_scientific_notation_number_to_fractional(input); assert_eq!(expected, actual.unwrap()); + + let input = "149597870700"; + let expected = (149597870700, 1); + let actual = convert_scientific_notation_number_to_fractional(input); + assert_eq!(expected, actual.unwrap()); } diff --git a/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json b/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json index 394af2bd011..c93643c6d2d 100644 --- a/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json +++ b/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json @@ -14,16 +14,9 @@ "ft3_to_m3": { "_value": "ft_to_m*ft_to_m*ft_to_m" }, - "G": { - "_value": "6.67408E-11", - "_status": "approximate" - }, "gal_imp_to_m3": { "_value": "0.00454609" }, - "gal_to_m3": { - "_value": "231*in3_to_m3" - }, "glucose_molar_mass": { "_description": "derivation from the mean atomic weights according to STANDARD ATOMIC WEIGHTS 2019 on https://ciaaw.org/atomic-weights.htm", "_value": "180.1557" @@ -34,9 +27,6 @@ "in3_to_m3": { "_value": "ft3_to_m3/12*12*12" }, - "item_per_mole": { - "_value": "6.02214076E+23" - }, "lb_to_kg": { "_value": "0.45359237" }, From acedc3293021412cf19dc84f5ed3fd62cc0e4c69 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 24 Aug 2023 17:30:19 +0200 Subject: [PATCH 007/104] first stage in data ( the data is not complete yet!) --- experimental/unitsconversion/src/lib.rs | 5 +- experimental/unitsconversion/src/provider.rs | 7 +- .../src/transform/cldr/units/helpers.rs | 12 +++- .../datagen/src/transform/cldr/units/mod.rs | 7 +- .../cldr/cldr-core/supplemental/units.json | 7 -- .../data/json/units/constants@1/und.json | 65 ++++++++++++++----- .../tests/data/postcard/fingerprints.csv | 2 +- 7 files changed, 70 insertions(+), 35 deletions(-) diff --git a/experimental/unitsconversion/src/lib.rs b/experimental/unitsconversion/src/lib.rs index aa1625da219..4595579af4b 100644 --- a/experimental/unitsconversion/src/lib.rs +++ b/experimental/unitsconversion/src/lib.rs @@ -8,10 +8,8 @@ use displaydoc::Display; extern crate alloc; -pub mod provider; pub mod helpers; - - +pub mod provider; #[derive(Display, Debug, PartialEq)] #[non_exhaustive] @@ -22,4 +20,3 @@ pub enum Error { #[cfg(feature = "std")] impl std::error::Error for Error {} - diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 2b424ddb074..3c19d4c947d 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -67,7 +67,6 @@ pub struct ConstantValue { pub constant_type: ConstantType, } - impl ConstantValue { pub fn multiply(&self, other: &ConstantValue) -> Result { let numerator = self.numerator as u64 * other.numerator as u64; @@ -78,7 +77,7 @@ impl ConstantValue { Ok(numerator) => numerator, Err(_) => return Err(Error::Limit), }; - + let denominator = match u32::try_from(denominator / gcd) { Ok(denominator) => denominator, Err(_) => return Err(Error::Limit), @@ -105,7 +104,7 @@ impl ConstantValue { Ok(numerator) => numerator, Err(_) => return Err(Error::Limit), }; - + let denominator = match u32::try_from(denominator / gcd) { Ok(denominator) => denominator, Err(_) => return Err(Error::Limit), @@ -122,4 +121,4 @@ impl ConstantValue { constant_type, }) } -} \ No newline at end of file +} diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index c33871b13d6..cb36dea98ed 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -218,7 +218,11 @@ pub fn convert_any_constant_value_to_fractional( let constant_string_cleaned = remove_whitespace(constant_str); let fraction_str = split_string(constant_string_cleaned.as_str(), vec!["/"]); let numerator_strs = split_string(&fraction_str[0].to_string(), vec!["*"]); - let denominator_strs = split_string(&fraction_str.get(1).unwrap_or(&"".to_string()), vec!["*"]); + let denominator_strs: Option> = if fraction_str.len() > 1 { + Some(split_string(&fraction_str[1].to_string(), vec!["*"])) + } else { + None + }; let mut result = ConstantValue { numerator: 1, @@ -248,7 +252,11 @@ pub fn convert_any_constant_value_to_fractional( }; } - for denominator_str in denominator_strs { + if denominator_strs.is_none() { + return Ok(result); + } + + for denominator_str in denominator_strs.unwrap() { let denominator = match constants_map.get(denominator_str.as_str()) { Some(denominator) => *denominator, None => match convert_constant_value_in_scientific_to_fractional( diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 6d88c5d62ef..9b2eb4279e5 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -14,7 +14,7 @@ use icu_provider::{ use icu_unitsconversion::provider::{ ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, }; -use zerovec::{ZeroMap, ule::AsULE}; +use zerovec::{ule::AsULE, ZeroMap}; use self::helpers::{ convert_any_constant_value_to_fractional, convert_constant_value_in_scientific_to_fractional, @@ -67,7 +67,10 @@ impl DataProvider for crate::DatagenProvider { Ok(constant_value) => { constants_map.insert(key, constant_value); } - Err(_) => return Err(DataError::custom("Failed to convert constant_str").with_debug_context(constant_str)), + Err(_) => { + return Err(DataError::custom("Failed to convert constant_str") + .with_debug_context(constant_str)) + } } } diff --git a/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json b/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json index c93643c6d2d..638c8d6c586 100644 --- a/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json +++ b/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json @@ -24,16 +24,9 @@ "gravity": { "_value": "9.80665" }, - "in3_to_m3": { - "_value": "ft3_to_m3/12*12*12" - }, "lb_to_kg": { "_value": "0.45359237" }, - "meters_per_AU": { - "_description": "https://www.iau.org/public/themes/measuring/", - "_value": "149597870700" - }, "PI": { "_value": "411557987 / 131002976", "_status": "approximate" diff --git a/provider/datagen/tests/data/json/units/constants@1/und.json b/provider/datagen/tests/data/json/units/constants@1/und.json index 5a41da05a9c..2982bd56382 100644 --- a/provider/datagen/tests/data/json/units/constants@1/und.json +++ b/provider/datagen/tests/data/json/units/constants@1/und.json @@ -1,19 +1,54 @@ { "constants_map": { - "G": "6.67408E-11", - "PI": "411557987 / 131002976", - "ft2_to_m2": "ft_to_m*ft_to_m", - "ft3_to_m3": "ft_to_m*ft_to_m*ft_to_m", - "ft_to_m": "0.3048", - "gal_imp_to_m3": "0.00454609", - "gal_to_m3": "231*in3_to_m3", - "glucose_molar_mass": "180.1557", - "gravity": "9.80665", - "in3_to_m3": "ft3_to_m3/12*12*12", - "item_per_mole": "6.02214076E+23", - "lb_to_kg": "0.45359237", - "meters_per_AU": "149597870700", - "sec_per_julian_year": "31557600", - "speed_of_light_meters_per_second": "299792458" + "PI": { + "numerator": 411557987, + "denominator": 131002976, + "constant_type": "Approximate" + }, + "ft2_to_m2": { + "numerator": 145161, + "denominator": 1562500, + "constant_type": "Actual" + }, + "ft3_to_m3": { + "numerator": 55306341, + "denominator": 1953125000, + "constant_type": "Actual" + }, + "ft_to_m": { + "numerator": 381, + "denominator": 1250, + "constant_type": "Actual" + }, + "gal_imp_to_m3": { + "numerator": 454609, + "denominator": 100000000, + "constant_type": "Actual" + }, + "glucose_molar_mass": { + "numerator": 1801557, + "denominator": 10000, + "constant_type": "Actual" + }, + "gravity": { + "numerator": 196133, + "denominator": 20000, + "constant_type": "Actual" + }, + "lb_to_kg": { + "numerator": 45359237, + "denominator": 100000000, + "constant_type": "Actual" + }, + "sec_per_julian_year": { + "numerator": 31557600, + "denominator": 1, + "constant_type": "Actual" + }, + "speed_of_light_meters_per_second": { + "numerator": 299792458, + "denominator": 1, + "constant_type": "Actual" + } } } diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index d2f7bfd4f4b..552c9eee717 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -2055,4 +2055,4 @@ transliterator/rules@1, und+und-t-s0-publish, 1525B, 1a110a14d23d3e0e transliterator/rules@1, und+und-t-und-Latn-d0-ascii, 30496B, 58523bdcfad4125e transliterator/rules@1, und+x-bengali-interindic, 2987B, a6707070c6ead260 transliterator/rules@1, und+x-interindic-arabic, 24586B, ba7243b220d9043 -units/constants@1, und, 426B, e0c7eeb9e702371c +units/constants@1, und, 241B, d01bb675795388a6 From b80be70cbbc7ebe73d25e3e3a049c4c20462c35b Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 24 Aug 2023 18:36:42 +0200 Subject: [PATCH 008/104] fix clippy --- provider/datagen/src/transform/cldr/units/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 9b2eb4279e5..a827919cd15 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -14,7 +14,7 @@ use icu_provider::{ use icu_unitsconversion::provider::{ ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, }; -use zerovec::{ule::AsULE, ZeroMap}; +use zerovec::{ZeroMap}; use self::helpers::{ convert_any_constant_value_to_fractional, convert_constant_value_in_scientific_to_fractional, From 40a412c2a3552f3a628fa826485f99a435bf6122 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 5 Sep 2023 14:43:55 +0200 Subject: [PATCH 009/104] fix fingerprint --- .../datagen/tests/data/postcard/fingerprints.csv | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index 894e69171ed..feed7e87eb4 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -2046,16 +2046,6 @@ time_zone/specific_short@1, sr-Latn, 119B, dcdb9855b7df4f90 time_zone/specific_short@1, th, 31B, 4b7af6a019fab889 time_zone/specific_short@1, tr, 31B, 4b7af6a019fab889 time_zone/specific_short@1, und, 31B, 4b7af6a019fab889 -transliterator/rules@1, und+de-t-de-d0-ascii, 16818B, 926b0cd37b52ed48 -transliterator/rules@1, und+el-Latn-t-el-m0-bgn, 14576B, 30e6e0ba8972cb3f -transliterator/rules@1, und+und-Arab-t-und-Beng, 379B, e6de49d4e1b89e23 -transliterator/rules@1, und+und-Latn-t-s0-ascii, 108B, 70ffc7abd2bb02c8 -transliterator/rules@1, und+und-t-d0-publish, 3683B, b72ef4e641ae77ae -transliterator/rules@1, und+und-t-s0-publish, 1525B, 1a110a14d23d3e0e -transliterator/rules@1, und+und-t-und-Latn-d0-ascii, 30496B, 58523bdcfad4125e -transliterator/rules@1, und+x-bengali-interindic, 2987B, a6707070c6ead260 -transliterator/rules@1, und+x-interindic-arabic, 24586B, ba7243b220d9043 -units/constants@1, und, 241B, d01bb675795388a6 transliterator/rules@1, und+de-t-de-d0-ascii, 16784B, cbb0f7f795d1c6dc transliterator/rules@1, und+el-Latn-t-el-m0-bgn, 13818B, 2293dcb5f5e7fc0b transliterator/rules@1, und+und-Arab-t-s0-intindic, 24110B, 9f9bbc5a58d2831 @@ -2072,4 +2062,4 @@ transliterator/rules@1, und+und-t-und-d0-test-m0-hexuni-s0-test, 104B, 4335c7101 transliterator/rules@1, und+und-t-und-d0-test-m0-niels-s0-test, 1800B, 6a560a4143a4b60c transliterator/rules@1, und+und-t-und-d0-test-m0-rectesta-s0-test, 370B, af652bcb33e1038b transliterator/rules@1, und+und-t-und-d0-test-m0-rectestr-s0-test, 281B, 51be7571fd233bd6 -units/constants@1, und, 426B, e0c7eeb9e702371c +units/constants@1, und, 241B, d01bb675795388a6 From e376ec69fb5f7228d09009ba6d7a557de838edae Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 13 Sep 2023 15:18:10 +0200 Subject: [PATCH 010/104] Units conversion skeleton. --- Cargo.lock | 17 +++- experimental/unitsconversion/Cargo.toml | 3 +- experimental/unitsconversion/src/helpers.rs | 47 ++++++++++++ experimental/unitsconversion/src/lib.rs | 3 + experimental/unitsconversion/src/provider.rs | 81 ++++---------------- experimental/unitsconversion/src/ule.rs | 19 +++++ 6 files changed, 102 insertions(+), 68 deletions(-) create mode 100644 experimental/unitsconversion/src/ule.rs diff --git a/Cargo.lock b/Cargo.lock index 4cc4cdd7cf8..1373fdc4d2e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2044,6 +2044,8 @@ dependencies = [ "fixed_decimal", "icu_locid", "icu_provider", + "num-bigint", + "num-traits", "serde", "tinystr", "zerovec", @@ -2359,6 +2361,17 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-complex" version = "0.4.3" @@ -2380,9 +2393,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", "libm", diff --git a/experimental/unitsconversion/Cargo.toml b/experimental/unitsconversion/Cargo.toml index 2527a460abc..5f1b6d06843 100644 --- a/experimental/unitsconversion/Cargo.toml +++ b/experimental/unitsconversion/Cargo.toml @@ -28,7 +28,8 @@ icu_provider = { workspace = true, features = ["macros"] } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } tinystr = { workspace = true, features = ["zerovec"], default-features = false } zerovec = { workspace = true, features = ["yoke"] } - +num-bigint = { version = "0.4.4", default-features = false } +num-traits = { version = "0.2.14", default-features = false } [features] default = [] diff --git a/experimental/unitsconversion/src/helpers.rs b/experimental/unitsconversion/src/helpers.rs index 30cbd63cde1..bd10e62e689 100644 --- a/experimental/unitsconversion/src/helpers.rs +++ b/experimental/unitsconversion/src/helpers.rs @@ -2,6 +2,31 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use std::str::FromStr; + +use num_bigint::BigUint; +use num_traits::{Zero, One}; + +use crate::Error; + + +pub fn gcd_big_unit(a: BigUint, b: BigUint) -> Result { + if a.is_zero() || b.is_zero(){ + return Err(Error::InvalidInput); + } + + let mut a = a.clone(); + let mut b = b.clone(); + + while !b.is_zero() { + let t = b.clone(); + b = a.modpow(&One::one(), &b); + a = t; + } + + Ok(a) +} + /// Returns the greatest common divisor of two numbers. pub fn gcd(a: u64, b: u64) -> u64 { if b == 0 { @@ -9,3 +34,25 @@ pub fn gcd(a: u64, b: u64) -> u64 { } gcd(b, a % b) } + +#[test] +fn test_gcd_big_unit() { + let input1 = BigUint::from(10u32); + let input2 = BigUint::from(5u32); + let expected = BigUint::from(5u32); + assert_eq!(gcd_big_unit(input1, input2).unwrap(), expected); + + let input1 = BigUint::from(1000u32); + let input2 = BigUint::from(10u32); + let expected = BigUint::from(10u32); + assert_eq!(gcd_big_unit(input1, input2).unwrap(), expected); + + let input1 = BigUint::from(0u32); + let input2 = BigUint::from(10u32); + assert!(gcd_big_unit(input1, input2).is_err()); + + let input1 = BigUint::from_str("100025").unwrap(); + let input2 = BigUint::from_str("500").unwrap(); + let expected = BigUint::from_str("25").unwrap(); + assert_eq!(gcd_big_unit(input1, input2).unwrap(), expected); +} \ No newline at end of file diff --git a/experimental/unitsconversion/src/lib.rs b/experimental/unitsconversion/src/lib.rs index 4595579af4b..6ecbbdf8439 100644 --- a/experimental/unitsconversion/src/lib.rs +++ b/experimental/unitsconversion/src/lib.rs @@ -16,6 +16,9 @@ pub mod provider; pub enum Error { #[displaydoc("Magnitude or number of digits exceeded")] Limit, + + #[displaydoc("The input is not valid")] + InvalidInput, } #[cfg(feature = "std")] diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 3c19d4c947d..d159dc7bea6 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -9,8 +9,11 @@ //! //! Read more about data providers: [`icu_provider`] +use std::borrow::Cow; + use icu_provider::prelude::*; -use zerovec::ZeroMap; +use zerovec::{ZeroMap, ZeroVec}; +use num_bigint::BigUint; use crate::{helpers::gcd, Error}; @@ -22,7 +25,7 @@ use crate::{helpers::gcd, Error}; /// to be stable, their Rust representation might not be. Use with caution. ///
#[icu_provider::data_struct(marker(UnitsConstantsV1Marker, "units/constants@1", singleton))] -#[derive(Default, Clone, PartialEq, Debug)] +#[derive(Clone, PartialEq, Debug)] #[cfg_attr( feature = "datagen", derive(serde::Serialize, databake::Bake), @@ -30,12 +33,12 @@ use crate::{helpers::gcd, Error}; )] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[yoke(prove_covariance_manually)] -pub struct UnitsConstantsV1<'data> { +pub struct UnitsConstantsV1 <'data> { // TODO(#3882): Use a more efficient representation for the values with numerators and denominators. // Also, the constant types. /// Maps from constant name (e.g. ft_to_m) to the value of the constant (e.g. 0.3048). #[cfg_attr(feature = "serde", serde(borrow))] - pub constants_map: ZeroMap<'data, str, ConstantValue>, + pub constants_map: ZeroMap<'data, str, ConstantValueULE>, } #[zerovec::make_ule(ConstantTypeULE)] @@ -53,72 +56,20 @@ pub enum ConstantType { Approximate = 1, } -#[zerovec::make_ule(ConstantValueULE)] +#[zerovec::make_varule(ConstantValueULE)] #[cfg_attr( feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_unitsconversion::provider), )] +#[zerovec::derive(Serialize, Deserialize, Debug)] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] -#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] -pub struct ConstantValue { - pub numerator: u32, - pub denominator: u32, - pub constant_type: ConstantType, -} - -impl ConstantValue { - pub fn multiply(&self, other: &ConstantValue) -> Result { - let numerator = self.numerator as u64 * other.numerator as u64; - let denominator = self.denominator as u64 * other.denominator as u64; - let gcd = gcd(numerator, denominator); - - let numerator = match u32::try_from(numerator / gcd) { - Ok(numerator) => numerator, - Err(_) => return Err(Error::Limit), - }; - - let denominator = match u32::try_from(denominator / gcd) { - Ok(denominator) => denominator, - Err(_) => return Err(Error::Limit), - }; +#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] +pub struct ConstantValue <'data> { + #[serde(borrow)] + pub numerator: ZeroVec<'data, u8>, + #[serde(borrow)] + pub denominator: ZeroVec<'data, u8>, - let constant_type = match (self.constant_type, other.constant_type) { - (ConstantType::Actual, ConstantType::Actual) => ConstantType::Actual, - _ => ConstantType::Approximate, - }; - - Ok(ConstantValue { - numerator, - denominator, - constant_type, - }) - } - - pub fn divide(&self, other: &ConstantValue) -> Result { - let numerator = self.numerator as u64 * other.denominator as u64; - let denominator = self.denominator as u64 * other.numerator as u64; - let gcd = gcd(numerator, denominator); - - let numerator = match u32::try_from(numerator / gcd) { - Ok(numerator) => numerator, - Err(_) => return Err(Error::Limit), - }; - - let denominator = match u32::try_from(denominator / gcd) { - Ok(denominator) => denominator, - Err(_) => return Err(Error::Limit), - }; - - let constant_type = match (self.constant_type, other.constant_type) { - (ConstantType::Actual, ConstantType::Actual) => ConstantType::Actual, - _ => ConstantType::Approximate, - }; - - Ok(ConstantValue { - numerator, - denominator, - constant_type, - }) - } + pub constant_type: ConstantType, } diff --git a/experimental/unitsconversion/src/ule.rs b/experimental/unitsconversion/src/ule.rs new file mode 100644 index 00000000000..f80fa3961ef --- /dev/null +++ b/experimental/unitsconversion/src/ule.rs @@ -0,0 +1,19 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use zerovec::{ + maps::ZeroMapKV, + ule::{AsULE, ZeroVecError, ULE}, +}; + + + +/// `ConstantValueULE` is a type optimized for efficient storing and +/// deserialization of `CurrencyPatterns` using the `ZeroVec` model. +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(transparent)] +pub struct ConstantValueULE([u8; 3]); + + +// TODO: Implement the ULE trait for ConstantValueULE. \ No newline at end of file From 8cd825b99f712ad6e63640a532a98cd75a651536 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Sat, 16 Sep 2023 00:05:56 +0200 Subject: [PATCH 011/104] finalize the skeleton --- Cargo.lock | 48 +++ experimental/unitsconversion/Cargo.toml | 1 + experimental/unitsconversion/src/helpers.rs | 58 --- experimental/unitsconversion/src/lib.rs | 16 - experimental/unitsconversion/src/provider.rs | 26 +- experimental/unitsconversion/src/ule.rs | 19 - provider/datagen/Cargo.toml | 3 + .../src/transform/cldr/units/helpers.rs | 388 +++++------------- .../datagen/src/transform/cldr/units/mod.rs | 127 ++++-- 9 files changed, 253 insertions(+), 433 deletions(-) delete mode 100644 experimental/unitsconversion/src/helpers.rs delete mode 100644 experimental/unitsconversion/src/ule.rs diff --git a/Cargo.lock b/Cargo.lock index 1373fdc4d2e..d203ed4aae0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1018,6 +1018,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678" +dependencies = [ + "num", +] + [[package]] name = "freertos-rust" version = "0.1.2" @@ -1473,6 +1482,7 @@ dependencies = [ "displaydoc", "elsa 1.8.1 (git+https://github.com/Manishearth/elsa?rev=56d05375eea36596432a0843721d26edf3b0ec75)", "eyre", + "fraction", "icu", "icu_calendar", "icu_casemap", @@ -1504,6 +1514,8 @@ dependencies = [ "log", "memchr", "ndarray", + "num-bigint", + "num-traits", "once_cell", "postcard", "rayon", @@ -2042,6 +2054,7 @@ dependencies = [ "databake", "displaydoc", "fixed_decimal", + "fraction", "icu_locid", "icu_provider", "num-bigint", @@ -2361,6 +2374,19 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.4" @@ -2391,6 +2417,28 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.16" diff --git a/experimental/unitsconversion/Cargo.toml b/experimental/unitsconversion/Cargo.toml index 5f1b6d06843..7ab604b2423 100644 --- a/experimental/unitsconversion/Cargo.toml +++ b/experimental/unitsconversion/Cargo.toml @@ -30,6 +30,7 @@ tinystr = { workspace = true, features = ["zerovec"], default-features = false } zerovec = { workspace = true, features = ["yoke"] } num-bigint = { version = "0.4.4", default-features = false } num-traits = { version = "0.2.14", default-features = false } +fraction = { version = "0.13.1", default-features = false } [features] default = [] diff --git a/experimental/unitsconversion/src/helpers.rs b/experimental/unitsconversion/src/helpers.rs deleted file mode 100644 index bd10e62e689..00000000000 --- a/experimental/unitsconversion/src/helpers.rs +++ /dev/null @@ -1,58 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use std::str::FromStr; - -use num_bigint::BigUint; -use num_traits::{Zero, One}; - -use crate::Error; - - -pub fn gcd_big_unit(a: BigUint, b: BigUint) -> Result { - if a.is_zero() || b.is_zero(){ - return Err(Error::InvalidInput); - } - - let mut a = a.clone(); - let mut b = b.clone(); - - while !b.is_zero() { - let t = b.clone(); - b = a.modpow(&One::one(), &b); - a = t; - } - - Ok(a) -} - -/// Returns the greatest common divisor of two numbers. -pub fn gcd(a: u64, b: u64) -> u64 { - if b == 0 { - return a; - } - gcd(b, a % b) -} - -#[test] -fn test_gcd_big_unit() { - let input1 = BigUint::from(10u32); - let input2 = BigUint::from(5u32); - let expected = BigUint::from(5u32); - assert_eq!(gcd_big_unit(input1, input2).unwrap(), expected); - - let input1 = BigUint::from(1000u32); - let input2 = BigUint::from(10u32); - let expected = BigUint::from(10u32); - assert_eq!(gcd_big_unit(input1, input2).unwrap(), expected); - - let input1 = BigUint::from(0u32); - let input2 = BigUint::from(10u32); - assert!(gcd_big_unit(input1, input2).is_err()); - - let input1 = BigUint::from_str("100025").unwrap(); - let input2 = BigUint::from_str("500").unwrap(); - let expected = BigUint::from_str("25").unwrap(); - assert_eq!(gcd_big_unit(input1, input2).unwrap(), expected); -} \ No newline at end of file diff --git a/experimental/unitsconversion/src/lib.rs b/experimental/unitsconversion/src/lib.rs index 6ecbbdf8439..529e07a2325 100644 --- a/experimental/unitsconversion/src/lib.rs +++ b/experimental/unitsconversion/src/lib.rs @@ -4,22 +4,6 @@ //! Experimental. -use displaydoc::Display; - extern crate alloc; -pub mod helpers; pub mod provider; - -#[derive(Display, Debug, PartialEq)] -#[non_exhaustive] -pub enum Error { - #[displaydoc("Magnitude or number of digits exceeded")] - Limit, - - #[displaydoc("The input is not valid")] - InvalidInput, -} - -#[cfg(feature = "std")] -impl std::error::Error for Error {} diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index d159dc7bea6..efbc171b473 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -9,13 +9,8 @@ //! //! Read more about data providers: [`icu_provider`] -use std::borrow::Cow; - use icu_provider::prelude::*; use zerovec::{ZeroMap, ZeroVec}; -use num_bigint::BigUint; - -use crate::{helpers::gcd, Error}; /// This type contains all of the constants data for units conversion. /// @@ -33,7 +28,7 @@ use crate::{helpers::gcd, Error}; )] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[yoke(prove_covariance_manually)] -pub struct UnitsConstantsV1 <'data> { +pub struct UnitsConstantsV1<'data> { // TODO(#3882): Use a more efficient representation for the values with numerators and denominators. // Also, the constant types. /// Maps from constant name (e.g. ft_to_m) to the value of the constant (e.g. 0.3048). @@ -56,6 +51,21 @@ pub enum ConstantType { Approximate = 1, } +#[zerovec::make_ule(SignULE)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize, databake::Bake), + databake(path = icu_unitsconversion::provider), +)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] +#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] +#[repr(u8)] +pub enum Sign { + #[default] + Positive = 0, + Negative = 1, +} + #[zerovec::make_varule(ConstantValueULE)] #[cfg_attr( feature = "datagen", @@ -65,11 +75,11 @@ pub enum ConstantType { #[zerovec::derive(Serialize, Deserialize, Debug)] #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] -pub struct ConstantValue <'data> { +pub struct ConstantValue<'data> { #[serde(borrow)] pub numerator: ZeroVec<'data, u8>, #[serde(borrow)] pub denominator: ZeroVec<'data, u8>, - + pub sign: Sign, pub constant_type: ConstantType, } diff --git a/experimental/unitsconversion/src/ule.rs b/experimental/unitsconversion/src/ule.rs deleted file mode 100644 index f80fa3961ef..00000000000 --- a/experimental/unitsconversion/src/ule.rs +++ /dev/null @@ -1,19 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use zerovec::{ - maps::ZeroMapKV, - ule::{AsULE, ZeroVecError, ULE}, -}; - - - -/// `ConstantValueULE` is a type optimized for efficient storing and -/// deserialization of `CurrencyPatterns` using the `ZeroVec` model. -#[derive(Copy, Clone, Debug, PartialEq)] -#[repr(transparent)] -pub struct ConstantValueULE([u8; 3]); - - -// TODO: Implement the ULE trait for ConstantValueULE. \ No newline at end of file diff --git a/provider/datagen/Cargo.toml b/provider/datagen/Cargo.toml index 8be5c490e97..681c223f7d5 100644 --- a/provider/datagen/Cargo.toml +++ b/provider/datagen/Cargo.toml @@ -32,6 +32,9 @@ rust-version.workspace = true all-features = true [dependencies] +fraction = { version = "0.13.1", default-features = false } +num-bigint = { version = "0.4.4", default-features = false } +num-traits = { version = "0.2.14", default-features = false } # ICU components icu_calendar = { workspace = true, features = ["datagen"] } diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index cb36dea98ed..c333e7192d2 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -2,350 +2,150 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use std::collections::BTreeMap; +use std::{ + ops::{Div, Mul}, + str::FromStr, +}; +use fraction::GenericFraction; use icu_provider::DataError; -use icu_unitsconversion::{ - helpers::gcd, - provider::{ConstantType, ConstantValue}, - Error, -}; +use icu_unitsconversion::provider::{ConstantType, Sign}; +use num_bigint::BigUint; /// Removes all whitespace from a string. pub fn remove_whitespace(s: &str) -> String { s.chars().filter(|c| !c.is_whitespace()).collect() } -/// Split a string into a vector of strings, using specific characters as delimiters. -pub fn split_string(s: &str, delimiters: Vec<&str>) -> Vec { - let mut result = vec![]; - let mut current = String::new(); - for c in s.chars() { - if delimiters.contains(&c.to_string().as_str()) { - result.push(current.clone()); - current.clear(); - } else { - current.push(c); - } - } - result.push(current); - result -} - -/// Checks if a string represents a decimal number. -pub fn check_if_decimal_number(number: &str) -> bool { - if number.chars().next().unwrap_or('0') == '-' || number.chars().next().unwrap_or('0') == '+' { - return check_if_decimal_number(&number[1..]); - } - - let mut split = number.split('.'); - if split.clone().count() > 2 { - return false; - } - let whole = split.next().unwrap_or("0"); - let fractional = split.next().unwrap_or("0"); - if whole.chars().any(|c| !c.is_ascii_digit()) { - return false; - } - if fractional.chars().any(|c| !c.is_ascii_digit()) { - return false; - } - true -} - -/// Converts a rational number represented as a string into a tuple of (numerator, denominator). -pub fn to_fractional(number: &str) -> Result<(u64, u64), DataError> { - if !check_if_decimal_number(number) { - return Err(DataError::custom("the number is not a decimal number")); - } - - let mut split = number.split('.'); - let whole = split.next().unwrap_or("0"); - let fractional = split.next().unwrap_or("0"); - let mut denominator = 1; - for _ in 0..fractional.len() { - denominator *= 10; - } - let numerator = whole.to_string() + fractional; - let numerator = numerator.parse::().unwrap_or(0); - let gcd = gcd(numerator, denominator); - Ok((numerator / gcd, denominator / gcd)) -} - -/// Checks if a string represents a scientific notation number. -pub fn check_if_scientific_notation_number(number: &str) -> bool { - let mut split = number.split('E'); - if split.clone().count() > 2 { - return false; - } - - let base = split.next().unwrap_or("0"); - let exponent = split.next().unwrap_or("0"); - check_if_decimal_number(base) && check_if_decimal_number(exponent) -} - /// Converts a scientific notation number represented as a string into a tuple of (numerator, denominator). pub fn convert_scientific_notation_number_to_fractional( number: &str, -) -> Result<(u64, u64), DataError> { - if !check_if_scientific_notation_number(number) { +) -> Result, DataError> { + let number = remove_whitespace(number); // TODO: check this. + let mut split = number.split('E'); + if split.clone().count() > 2 { return Err(DataError::custom( "the number is not a scientific notation number", )); } - - let mut split = number.split('E'); let base = split.next().unwrap_or("0"); let exponent = split.next().unwrap_or("0"); + let base: GenericFraction = match GenericFraction::from_str(base) { + Ok(base) => base, + Err(_) => return Err(DataError::custom("the number is not a valid number")), + }; + let exponent = match f64::from_str(exponent) { + Ok(exponent) => exponent, + Err(_) => return Err(DataError::custom("the exponent is not a valid number")), + }; - let (mut numerator, mut denominator) = to_fractional(base).unwrap(); - let exponent = exponent.parse::().unwrap(); - if exponent > 0 { - for _ in 0..exponent { - numerator *= 10; - let gcd = gcd(numerator, denominator); - numerator /= gcd; - denominator /= gcd; + let mut result = base; + let generic_ten: GenericFraction = + GenericFraction::new(BigUint::from(10u32), BigUint::from(1u32)); // TODO: fix this + if exponent > 0.0 { + for _ in 0..exponent as u32 { + result = result.mul(generic_ten.clone()); } } else { - for _ in 0..(-exponent) { - denominator *= 10; - let gcd = gcd(numerator, denominator); - numerator /= gcd; - denominator /= gcd; + for _ in 0..(-exponent) as u32 { + result = result.div(generic_ten.clone()); } } - let gcd = gcd(numerator, denominator); - Ok((numerator / gcd, denominator / gcd)) -} - -#[test] -fn test_remove_whitespace() { - let input = "He llo Wo rld!"; - let expected = "HelloWorld!"; - let actual = remove_whitespace(input); - assert_eq!(expected, actual); -} - -#[test] -fn test_split_string() { - let input = "Hello,World!/in/ICU4X"; - let expected = vec![ - "Hello".to_string(), - "World!".to_string(), - "in".to_string(), - "ICU4X".to_string(), - ]; - let actual = split_string(input, vec![",", "/"]); - assert_eq!(expected, actual); - let input = "ft3_to_m3/12*12*12"; - let expected = vec![ - "ft3_to_m3".to_string(), - "12".to_string(), - "12".to_string(), - "12".to_string(), - ]; - let actual = split_string(input, vec!["/", "*"]); - assert_eq!(expected, actual); + Ok(result) } -#[test] -fn test_gcd() { - let input = (12, 8); - let expected = 4; - let actual = gcd(input.0, input.1); - assert_eq!(expected, actual); - - let input = (8, 12); - let expected = 4; - let actual = gcd(input.0, input.1); - assert_eq!(expected, actual); - - let input = (12, 0); - let expected = 12; - let actual = gcd(input.0, input.1); - assert_eq!(expected, actual); - - let input = (0, 12); - let expected = 12; - let actual = gcd(input.0, input.1); - assert_eq!(expected, actual); +/// Checks if a string has letters. +/// Returns true if the string has letters, false otherwise. +pub fn has_letters(s: &str) -> bool { + s.chars().any(|c| c.is_alphabetic()) } -/// Converts a string representing a constant value into a tuple of (numerator, denominator). -/// For example: input = "6.67408E-11", output will be = (41713, 625000000000000) -/// input = "ft_to_m", output will be error. -pub fn convert_constant_value_in_scientific_to_fractional( - input: &str, +/// Converts a fractional number to a constant value. +pub fn convert_fractional_to_constant_value( + fraction: GenericFraction, constant_type: ConstantType, -) -> Result { - if !check_if_scientific_notation_number(input) { - return Err(DataError::custom( - "the number is not a scientific notation number", - )); - } - - let (numerator, denominator) = match convert_scientific_notation_number_to_fractional(input) { - Ok((numerator, denominator)) => (numerator, denominator), - Err(e) => return Err(e), +) -> Result<(Vec, Vec, Sign, ConstantType), DataError> { + let numerator = match fraction.numer() { + Some(numerator) => numerator.to_bytes_le(), + None => return Err(DataError::custom("the numerator is too large")), }; - let gcd = gcd(numerator, denominator); - - let numerator = match u32::try_from(numerator / gcd) { - Ok(numerator) => numerator, - Err(_) => return Err(DataError::custom("the numerator is too large")), + let denominator = match fraction.denom() { + Some(denominator) => denominator.to_bytes_le(), + None => return Err(DataError::custom("the denominator is too large")), }; - let denominator = match u32::try_from(denominator / gcd) { - Ok(denominator) => denominator, - Err(_) => return Err(DataError::custom("the denominator is too large")), + let sign = match fraction.sign() { + Some(sign) => match sign { + fraction::Sign::Plus => Sign::Positive, + fraction::Sign::Minus => Sign::Negative, + }, + None => return Err(DataError::custom("the sign is not valid")), }; - Ok(ConstantValue { - numerator, - denominator, - constant_type, - }) + Ok((numerator, denominator, sign, constant_type)) } -pub fn convert_any_constant_value_to_fractional( - constant_str: &str, - constants_map: &BTreeMap<&str, ConstantValue>, - constant_type: ConstantType, -) -> Result { - let constant_string_cleaned = remove_whitespace(constant_str); - let fraction_str = split_string(constant_string_cleaned.as_str(), vec!["/"]); - let numerator_strs = split_string(&fraction_str[0].to_string(), vec!["*"]); - let denominator_strs: Option> = if fraction_str.len() > 1 { - Some(split_string(&fraction_str[1].to_string(), vec!["*"])) - } else { - None - }; - - let mut result = ConstantValue { - numerator: 1, - denominator: 1, - constant_type, - }; - - for numerator_str in numerator_strs { - let numerator = match constants_map.get(numerator_str.as_str()) { - Some(numerator) => *numerator, - None => match convert_constant_value_in_scientific_to_fractional( - numerator_str.as_str(), - constant_type, - ) { - Ok(numerator) => numerator, - Err(e) => return Err(e), - }, - }; - result = match result.multiply(&numerator) { - Ok(result) => result, - Err(e) => { - return match e { - Error::Limit => Err(DataError::custom("calculations exceeded the limit")), - _ => Err(DataError::custom("the numerator is too large")), - } - } +/// Converts an array of strings of numerator or denominator to fraction. +pub fn convert_array_of_strings_to_fraction( + num: &[&str], + den: &[&str], +) -> Result, DataError> { + let mut result = GenericFraction::new(BigUint::from(1u32), BigUint::from(1u32)); + + for vnum in num.iter() { + let num = match convert_scientific_notation_number_to_fractional(vnum) { + Ok(num) => num, + Err(e) => return Err(e), }; + result = result.mul(num); } - if denominator_strs.is_none() { - return Ok(result); - } - - for denominator_str in denominator_strs.unwrap() { - let denominator = match constants_map.get(denominator_str.as_str()) { - Some(denominator) => *denominator, - None => match convert_constant_value_in_scientific_to_fractional( - denominator_str.as_str(), - constant_type, - ) { - Ok(denominator) => denominator, - Err(e) => return Err(e), - }, - }; - result = match result.divide(&denominator) { - Ok(result) => result, - Err(e) => { - return match e { - Error::Limit => Err(DataError::custom("calculations exceeded the limit")), - _ => Err(DataError::custom("the denominator is too large")), - } - } + for vden in den.iter() { + let den = match convert_scientific_notation_number_to_fractional(vden) { + Ok(den) => den, + Err(e) => return Err(e), }; + result = result.div(den); } Ok(result) } -#[test] -fn test_to_fractional() { - let input = "1.5"; - let expected = (3, 2); - let actual = to_fractional(input); - assert_eq!(expected, actual.unwrap()); +/// Splits the constant string into a tuple of (numerator, denominator). +/// The numerator and denominator are represented as array of strings. +/// For example: "1/2" -> (["1"], ["2"]) +/// "1 * 2 / 3 * ft_to_m" -> (["1", "2"], ["3" , "ft_to_m"]) +pub fn split_constant_string(constant_string: &str) -> Result<(Vec<&str>, Vec<&str>), DataError> { + let constant_string = remove_whitespace(constant_string); + let mut numerator = Vec::<&str>::new(); + let mut denominator = Vec::<&str>::new(); - let input = "1.25"; - let expected = (5, 4); - let actual = to_fractional(input); - assert_eq!(expected, actual.unwrap()); - - let input = "1.125"; - let expected = (9, 8); - let actual = to_fractional(input); - assert_eq!(expected, actual.unwrap()); - - let input = "1.0625"; - let expected = (17, 16); - let actual = to_fractional(input); - assert_eq!(expected, actual.unwrap()); - - let input = "0.000003333"; - let expected = (3333, 1000000000); - let actual = to_fractional(input); - assert_eq!(expected, actual.unwrap()); + let mut split = constant_string.split('/'); + if split.clone().count() > 2 { + return Err(DataError::custom("the constant string is not valid")); + } + let numerator_string = split.next().unwrap_or("1"); + let denominator_string = split.next().unwrap_or("1"); - let input = "11111"; - let expected = (11111, 1); - let actual = to_fractional(input); - assert_eq!(expected, actual.unwrap()); + let mut split = numerator_string.split('*'); + for num in split { + numerator.push(num); + } - let input = "1,000,000.5"; - let actual = to_fractional(input); - assert!(actual.is_err()); + let mut split = denominator_string.split('*'); + for num in split { + denominator.push(num); + } - let input = "1.5.5"; - let actual = to_fractional(input); - assert!(actual.is_err()); + Ok((numerator, denominator)) } #[test] -fn test_convert_scientific_notation_number_to_fractional() { - let input = "1.5E1"; - let expected = (15, 1); - let actual = convert_scientific_notation_number_to_fractional(input); - assert_eq!(expected, actual.unwrap()); - - let input = "1.5E-1"; - let expected = (3, 20); - let actual = convert_scientific_notation_number_to_fractional(input); - assert_eq!(expected, actual.unwrap()); - - let input = "1.5E-2"; - let expected = (3, 200); - let actual = convert_scientific_notation_number_to_fractional(input); - assert_eq!(expected, actual.unwrap()); - - let input = "6.67408E-11"; - let expected = (41713, 625000000000000); - let actual = convert_scientific_notation_number_to_fractional(input); - assert_eq!(expected, actual.unwrap()); - - let input = "149597870700"; - let expected = (149597870700, 1); - let actual = convert_scientific_notation_number_to_fractional(input); - assert_eq!(expected, actual.unwrap()); +fn test_remove_whitespace() { + let input = "He llo Wo rld!"; + let expected = "HelloWorld!"; + let actual = remove_whitespace(input); + assert_eq!(expected, actual); } diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index f90c9259b30..46d52899fe7 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -7,17 +7,20 @@ mod helpers; use std::collections::BTreeMap; use crate::transform::cldr::cldr_serde; +use fraction::GenericFraction; use icu_provider::{ datagen::IterableDataProvider, DataError, DataLocale, DataPayload, DataProvider, DataRequest, DataResponse, }; use icu_unitsconversion::provider::{ - ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, + ConstantType, ConstantValue, ConstantValueULE, UnitsConstantsV1, UnitsConstantsV1Marker, }; -use zerovec::{ZeroMap}; +use num_bigint::BigUint; +use zerovec::{ZeroMap, ZeroVec}; use self::helpers::{ - convert_any_constant_value_to_fractional, convert_constant_value_in_scientific_to_fractional, + convert_array_of_strings_to_fraction, convert_fractional_to_constant_value, has_letters, + remove_whitespace, split_constant_string, }; impl DataProvider for crate::DatagenProvider { @@ -31,52 +34,100 @@ impl DataProvider for crate::DatagenProvider { let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); let constants = &_units_data.supplemental.unit_constants.constants; - let mut constants_need_map = Vec::<(&str, &str, ConstantType)>::new(); - for (key, constant) in constants { - let constant_type = match &constant.status { - Some(status) => match status.as_str() { - "approximate" => ConstantType::Approximate, - _ => return Err(DataError::custom("Unknown constant type")), - }, - None => ConstantType::Actual, - }; - let constant_str = constant.value.as_str(); - let constant_value = match convert_constant_value_in_scientific_to_fractional( - constant_str, - constant_type, - ) { - Ok(value) => value, - Err(_) => { - constants_need_map.push((key, constant_str, constant_type)); - continue; + // Constants that has a constants in their value. + // For exmaple: "ft2_to_m2": "ft_to_m * ft_to_m", + let mut constants_with_constants_map = + BTreeMap::<&str, (Vec<&str>, Vec<&str>, ConstantType)>::new(); + + for (cons_name, cons_value) in constants { + let value = remove_whitespace(&cons_value.value); + let (num, den) = match split_constant_string(&value) { + Ok((num, den)) => (num, den), + Err(e) => { + return Err(e); } }; - constants_map.insert(key, constant_value); - } + let constant_type = match cons_value.status.as_deref() { + Some("approximate") => ConstantType::Approximate, + _ => ConstantType::Actual, + }; - for (key, constant_str, constant_type) in constants_need_map { - let constant_value = convert_any_constant_value_to_fractional( - constant_str, - &constants_map, - constant_type, - ); + constants_with_constants_map.insert(cons_name, (num, den, constant_type)); + } - match constant_value { - Ok(constant_value) => { - constants_map.insert(key, constant_value); + // This loop will replace all the constants in the value of a constant with their values. + loop { + let mut cons_with_text: u16 = 0; + for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { + for i in 0..num.len() { + if !has_letters(num[i]) { + continue; + } + + if constants_with_constants_map.contains_key(num[i]) { + let (rnum, rden, rconstant_type) = + constants_with_constants_map.get(num[i]).unwrap(); + num.remove(i); + // append the elements in rnum to num and rden to den + num.append(&mut rnum.clone()); + den.append(&mut rden.clone()); + + if *rconstant_type == ConstantType::Approximate { + *constant_type = ConstantType::Approximate; + } + } else { + cons_with_text += 1; + } } - Err(_) => { - return Err(DataError::custom("Failed to convert constant_str") - .with_debug_context(constant_str)) + + for i in 0..den.len() { + if !has_letters(den[i]) { + continue; + } + + if constants_with_constants_map.contains_key(den[i]) { + let (rnum, rden, constant_type) = + constants_with_constants_map.get(den[i]).unwrap(); + den.remove(i); + // append the elements in rden to num and rnum to den + num.append(&mut rden.clone()); + den.append(&mut rnum.clone()); + + if *constant_type == ConstantType::Approximate { + *constant_type = ConstantType::Approximate; + } + } else { + cons_with_text += 1; + } } } + + if cons_with_text == 0 { + break; + } + } + + let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); + + for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { + let value = convert_array_of_strings_to_fraction(num, den)?; + let (num, den, sign, cons_type) = + convert_fractional_to_constant_value(value, *constant_type)?; + constants_map.insert( + cons_name, + ConstantValue { + numerator: ZeroVec::from_iter(num), + denominator: ZeroVec::from_iter(den), + sign, + constant_type: cons_type, + }, + ); } - let result = UnitsConstantsV1 { - constants_map: ZeroMap::from_iter(constants_map), - }; + let constants_map = ZeroMap::from_iter(constants_map.into_iter()); + let result = UnitsConstantsV1 { constants_map }; Ok(DataResponse { metadata: Default::default(), From 0ab1091cda8afc0988f4016ff5d073188e01fffb Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Sat, 16 Sep 2023 00:07:19 +0200 Subject: [PATCH 012/104] update the data --- .../data/cldr/cldr-core/supplemental/units.json | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json b/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json index 638c8d6c586..394af2bd011 100644 --- a/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json +++ b/provider/datagen/tests/data/cldr/cldr-core/supplemental/units.json @@ -14,9 +14,16 @@ "ft3_to_m3": { "_value": "ft_to_m*ft_to_m*ft_to_m" }, + "G": { + "_value": "6.67408E-11", + "_status": "approximate" + }, "gal_imp_to_m3": { "_value": "0.00454609" }, + "gal_to_m3": { + "_value": "231*in3_to_m3" + }, "glucose_molar_mass": { "_description": "derivation from the mean atomic weights according to STANDARD ATOMIC WEIGHTS 2019 on https://ciaaw.org/atomic-weights.htm", "_value": "180.1557" @@ -24,9 +31,19 @@ "gravity": { "_value": "9.80665" }, + "in3_to_m3": { + "_value": "ft3_to_m3/12*12*12" + }, + "item_per_mole": { + "_value": "6.02214076E+23" + }, "lb_to_kg": { "_value": "0.45359237" }, + "meters_per_AU": { + "_description": "https://www.iau.org/public/themes/measuring/", + "_value": "149597870700" + }, "PI": { "_value": "411557987 / 131002976", "_status": "approximate" From 7fefed305c729a004d299b37f0c92ed5732344b1 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Sat, 16 Sep 2023 00:13:31 +0200 Subject: [PATCH 013/104] fix --- experimental/unitsconversion/src/provider.rs | 2 ++ provider/datagen/src/transform/cldr/units/mod.rs | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index efbc171b473..60ecc592b4a 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -80,6 +80,8 @@ pub struct ConstantValue<'data> { pub numerator: ZeroVec<'data, u8>, #[serde(borrow)] pub denominator: ZeroVec<'data, u8>, + pub sign: Sign, + pub constant_type: ConstantType, } diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 46d52899fe7..8cddec80474 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -126,8 +126,12 @@ impl DataProvider for crate::DatagenProvider { ); } + + let constants_map = ZeroMap::from_iter(constants_map.into_iter()); - let result = UnitsConstantsV1 { constants_map }; + let result = UnitsConstantsV1 { + constants_map, + }; Ok(DataResponse { metadata: Default::default(), From 4c99aabeebedec1e7731261c83e035a460be89e5 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Sat, 16 Sep 2023 00:13:44 +0200 Subject: [PATCH 014/104] fix --- provider/datagen/src/transform/cldr/units/mod.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 8cddec80474..46d52899fe7 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -126,12 +126,8 @@ impl DataProvider for crate::DatagenProvider { ); } - - let constants_map = ZeroMap::from_iter(constants_map.into_iter()); - let result = UnitsConstantsV1 { - constants_map, - }; + let result = UnitsConstantsV1 { constants_map }; Ok(DataResponse { metadata: Default::default(), From ecd0df2fd16b526a616d8b3f9cd254a3331ed59c Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 19 Sep 2023 22:24:21 +0200 Subject: [PATCH 015/104] fix ule issue --- .../src/transform/cldr/units/helpers.rs | 16 +++---- .../datagen/src/transform/cldr/units/mod.rs | 42 ++++++++++--------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index c333e7192d2..0a6ba9a81e0 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -89,8 +89,8 @@ pub fn convert_fractional_to_constant_value( /// Converts an array of strings of numerator or denominator to fraction. pub fn convert_array_of_strings_to_fraction( - num: &[&str], - den: &[&str], + num: &Vec, + den: &Vec, ) -> Result, DataError> { let mut result = GenericFraction::new(BigUint::from(1u32), BigUint::from(1u32)); @@ -117,10 +117,12 @@ pub fn convert_array_of_strings_to_fraction( /// The numerator and denominator are represented as array of strings. /// For example: "1/2" -> (["1"], ["2"]) /// "1 * 2 / 3 * ft_to_m" -> (["1", "2"], ["3" , "ft_to_m"]) -pub fn split_constant_string(constant_string: &str) -> Result<(Vec<&str>, Vec<&str>), DataError> { +pub fn split_constant_string( + constant_string: &str, +) -> Result<(Vec, Vec), DataError> { let constant_string = remove_whitespace(constant_string); - let mut numerator = Vec::<&str>::new(); - let mut denominator = Vec::<&str>::new(); + let mut numerator = Vec::::new(); + let mut denominator = Vec::::new(); let mut split = constant_string.split('/'); if split.clone().count() > 2 { @@ -131,12 +133,12 @@ pub fn split_constant_string(constant_string: &str) -> Result<(Vec<&str>, Vec<&s let mut split = numerator_string.split('*'); for num in split { - numerator.push(num); + numerator.push(num.to_string()); } let mut split = denominator_string.split('*'); for num in split { - denominator.push(num); + denominator.push(num.to_string()); } Ok((numerator, denominator)) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 46d52899fe7..dfe6ea00f89 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -13,7 +13,7 @@ use icu_provider::{ DataResponse, }; use icu_unitsconversion::provider::{ - ConstantType, ConstantValue, ConstantValueULE, UnitsConstantsV1, UnitsConstantsV1Marker, + ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, }; use num_bigint::BigUint; use zerovec::{ZeroMap, ZeroVec}; @@ -38,7 +38,7 @@ impl DataProvider for crate::DatagenProvider { // Constants that has a constants in their value. // For exmaple: "ft2_to_m2": "ft_to_m * ft_to_m", let mut constants_with_constants_map = - BTreeMap::<&str, (Vec<&str>, Vec<&str>, ConstantType)>::new(); + BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); for (cons_name, cons_value) in constants { let value = remove_whitespace(&cons_value.value); @@ -60,15 +60,18 @@ impl DataProvider for crate::DatagenProvider { // This loop will replace all the constants in the value of a constant with their values. loop { let mut cons_with_text: u16 = 0; - for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { + for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter_mut() { for i in 0..num.len() { - if !has_letters(num[i]) { + if !has_letters(num[i].as_str()) { continue; } - if constants_with_constants_map.contains_key(num[i]) { - let (rnum, rden, rconstant_type) = - constants_with_constants_map.get(num[i]).unwrap(); + cons_with_text += 1; + + if constants_with_constants_map.contains_key(num[i].as_str()) { + let (rnum, rden, rconstant_type) = constants_with_constants_map + .get_mut(num[i].as_str()) + .unwrap(); num.remove(i); // append the elements in rnum to num and rden to den num.append(&mut rnum.clone()); @@ -77,29 +80,25 @@ impl DataProvider for crate::DatagenProvider { if *rconstant_type == ConstantType::Approximate { *constant_type = ConstantType::Approximate; } - } else { - cons_with_text += 1; } } for i in 0..den.len() { - if !has_letters(den[i]) { + if !has_letters(den[i].as_str()) { continue; } - - if constants_with_constants_map.contains_key(den[i]) { - let (rnum, rden, constant_type) = - constants_with_constants_map.get(den[i]).unwrap(); + cons_with_text += 1; + if constants_with_constants_map.contains_key(den[i].as_str()) { + let (rnum, rden, mut rconstant_type) = + constants_with_constants_map.get(den[i].as_str()).unwrap(); den.remove(i); // append the elements in rden to num and rnum to den num.append(&mut rden.clone()); den.append(&mut rnum.clone()); - if *constant_type == ConstantType::Approximate { + if rconstant_type == ConstantType::Approximate { *constant_type = ConstantType::Approximate; } - } else { - cons_with_text += 1; } } } @@ -112,7 +111,7 @@ impl DataProvider for crate::DatagenProvider { let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { - let value = convert_array_of_strings_to_fraction(num, den)?; + let value = convert_array_of_strings_to_fraction(&num, &den)?; let (num, den, sign, cons_type) = convert_fractional_to_constant_value(value, *constant_type)?; constants_map.insert( @@ -125,8 +124,11 @@ impl DataProvider for crate::DatagenProvider { }, ); } - - let constants_map = ZeroMap::from_iter(constants_map.into_iter()); + let constants_map = ZeroMap::from_iter( + constants_map + .into_iter() + .map(|(k, v)| (k, zerovec::ule::encode_varule_to_box(&v))), + ); let result = UnitsConstantsV1 { constants_map }; Ok(DataResponse { From 40f541e9aca10310d0ee6d109973a0a8b089f2ba Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 15:16:29 +0200 Subject: [PATCH 016/104] Fix ULE issue and get the correct results --- .../src/transform/cldr/units/helpers.rs | 14 ++ .../datagen/src/transform/cldr/units/mod.rs | 72 +++--- .../data/json/units/constants@1/und.json | 214 ++++++++++++++++-- .../tests/data/postcard/fingerprints.csv | 2 +- 4 files changed, 255 insertions(+), 47 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 0a6ba9a81e0..b1a24f4f612 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -61,6 +61,20 @@ pub fn has_letters(s: &str) -> bool { s.chars().any(|c| c.is_alphabetic()) } +/// Checks if a string is a valid scientific notation number. +/// Returns true if the string is a valid scientific notation number, false otherwise. +pub fn is_scientific_number(s: &str) -> bool { + let mut split = s.split('E'); + if split.clone().count() > 2 { + return false; + } + + let base = split.next().unwrap_or("0"); + let exponent = split.next().unwrap_or("0"); + + !has_letters(base) && !has_letters(exponent) +} + /// Converts a fractional number to a constant value. pub fn convert_fractional_to_constant_value( fraction: GenericFraction, diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index dfe6ea00f89..17682c29bd2 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -6,7 +6,10 @@ mod helpers; use std::collections::BTreeMap; -use crate::transform::cldr::cldr_serde; +use crate::transform::cldr::{ + cldr_serde, + units::helpers::{convert_scientific_notation_number_to_fractional, is_scientific_number}, +}; use fraction::GenericFraction; use icu_provider::{ datagen::IterableDataProvider, DataError, DataLocale, DataPayload, DataProvider, DataRequest, @@ -58,51 +61,68 @@ impl DataProvider for crate::DatagenProvider { } // This loop will replace all the constants in the value of a constant with their values. + let mut cons_with_text: u16 = 0; loop { - let mut cons_with_text: u16 = 0; - for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter_mut() { - for i in 0..num.len() { - if !has_letters(num[i].as_str()) { + cons_with_text = 0; + let mut constants_with_constants_map_replaceable = + BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); + for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { + let mut temp_num = num.clone(); + let mut temp_den = den.clone(); + let mut temp_constant_type = *constant_type; + + for i in 0..temp_num.len() { + if !has_letters(temp_num[i].as_str()) + || is_scientific_number(temp_num[i].as_str()) + { continue; } cons_with_text += 1; - - if constants_with_constants_map.contains_key(num[i].as_str()) { - let (rnum, rden, rconstant_type) = constants_with_constants_map - .get_mut(num[i].as_str()) - .unwrap(); - num.remove(i); + if let Some((rnum, rden, rconstant_type)) = + constants_with_constants_map.get(temp_num[i].as_str()) + { + temp_num.remove(i); // append the elements in rnum to num and rden to den - num.append(&mut rnum.clone()); - den.append(&mut rden.clone()); + temp_num.append(&mut rnum.clone()); + temp_den.append(&mut rden.clone()); if *rconstant_type == ConstantType::Approximate { - *constant_type = ConstantType::Approximate; + temp_constant_type = ConstantType::Approximate; } } } - for i in 0..den.len() { - if !has_letters(den[i].as_str()) { + for i in 0..temp_den.len() { + if !has_letters(temp_den[i].as_str()) + || is_scientific_number(temp_den[i].as_str()) + { continue; } + cons_with_text += 1; - if constants_with_constants_map.contains_key(den[i].as_str()) { - let (rnum, rden, mut rconstant_type) = - constants_with_constants_map.get(den[i].as_str()).unwrap(); - den.remove(i); - // append the elements in rden to num and rnum to den - num.append(&mut rden.clone()); - den.append(&mut rnum.clone()); - - if rconstant_type == ConstantType::Approximate { - *constant_type = ConstantType::Approximate; + if let Some((rnum, rden, rconstant_type)) = + constants_with_constants_map.get(temp_den[i].as_str()) + { + temp_den.remove(i); + // append the elements in rnum to den and rden to num + temp_num.append(&mut rden.clone()); + temp_den.append(&mut rnum.clone()); + + if *rconstant_type == ConstantType::Approximate { + temp_constant_type = ConstantType::Approximate; } } } + + constants_with_constants_map_replaceable + .insert(cons_name, (temp_num, temp_den, temp_constant_type)); } + constants_with_constants_map.clear(); + constants_with_constants_map = constants_with_constants_map_replaceable; + + print!("cons_with_text: {} \n", cons_with_text); if cons_with_text == 0 { break; } diff --git a/provider/datagen/tests/data/json/units/constants@1/und.json b/provider/datagen/tests/data/json/units/constants@1/und.json index 2982bd56382..acd36827e8f 100644 --- a/provider/datagen/tests/data/json/units/constants@1/und.json +++ b/provider/datagen/tests/data/json/units/constants@1/und.json @@ -1,53 +1,227 @@ { "constants_map": { + "G": { + "numerator": [ + 241, + 162 + ], + "denominator": [ + 0, + 16, + 252, + 38, + 111, + 56, + 2 + ], + "sign": "Positive", + "constant_type": "Approximate" + }, "PI": { - "numerator": 411557987, - "denominator": 131002976, + "numerator": [ + 99, + 224, + 135, + 24 + ], + "denominator": [ + 96, + 242, + 206, + 7 + ], + "sign": "Positive", "constant_type": "Approximate" }, "ft2_to_m2": { - "numerator": 145161, - "denominator": 1562500, + "numerator": [ + 9, + 55, + 2 + ], + "denominator": [ + 132, + 215, + 23 + ], + "sign": "Positive", "constant_type": "Actual" }, "ft3_to_m3": { - "numerator": 55306341, - "denominator": 1953125000, + "numerator": [ + 101, + 232, + 75, + 3 + ], + "denominator": [ + 136, + 82, + 106, + 116 + ], + "sign": "Positive", "constant_type": "Actual" }, "ft_to_m": { - "numerator": 381, - "denominator": 1250, + "numerator": [ + 125, + 1 + ], + "denominator": [ + 226, + 4 + ], + "sign": "Positive", "constant_type": "Actual" }, "gal_imp_to_m3": { - "numerator": 454609, - "denominator": 100000000, + "numerator": [ + 209, + 239, + 6 + ], + "denominator": [ + 0, + 225, + 245, + 5 + ], + "sign": "Positive", + "constant_type": "Actual" + }, + "gal_to_m3": { + "numerator": [ + 153, + 25, + 52, + 28 + ], + "denominator": [ + 0, + 162, + 148, + 26, + 29 + ], + "sign": "Positive", "constant_type": "Actual" }, "glucose_molar_mass": { - "numerator": 1801557, - "denominator": 10000, + "numerator": [ + 85, + 125, + 27 + ], + "denominator": [ + 16, + 39 + ], + "sign": "Positive", "constant_type": "Actual" }, "gravity": { - "numerator": 196133, - "denominator": 20000, + "numerator": [ + 37, + 254, + 2 + ], + "denominator": [ + 32, + 78 + ], + "sign": "Positive", + "constant_type": "Actual" + }, + "in3_to_m3": { + "numerator": [ + 127, + 65, + 31 + ], + "denominator": [ + 0, + 162, + 148, + 26, + 29 + ], + "sign": "Positive", + "constant_type": "Actual" + }, + "item_per_mole": { + "numerator": [ + 0, + 0, + 198, + 92, + 20, + 95, + 41, + 23, + 134, + 127 + ], + "denominator": [ + 1 + ], + "sign": "Positive", "constant_type": "Actual" }, "lb_to_kg": { - "numerator": 45359237, - "denominator": 100000000, + "numerator": [ + 133, + 32, + 180, + 2 + ], + "denominator": [ + 0, + 225, + 245, + 5 + ], + "sign": "Positive", + "constant_type": "Actual" + }, + "meters_per_AU": { + "numerator": [ + 108, + 90, + 186, + 212, + 34 + ], + "denominator": [ + 1 + ], + "sign": "Positive", "constant_type": "Actual" }, "sec_per_julian_year": { - "numerator": 31557600, - "denominator": 1, + "numerator": [ + 224, + 135, + 225, + 1 + ], + "denominator": [ + 1 + ], + "sign": "Positive", "constant_type": "Actual" }, "speed_of_light_meters_per_second": { - "numerator": 299792458, - "denominator": 1, + "numerator": [ + 74, + 120, + 222, + 17 + ], + "denominator": [ + 1 + ], + "sign": "Positive", "constant_type": "Actual" } } diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index feed7e87eb4..0653ee00375 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -2062,4 +2062,4 @@ transliterator/rules@1, und+und-t-und-d0-test-m0-hexuni-s0-test, 104B, 4335c7101 transliterator/rules@1, und+und-t-und-d0-test-m0-niels-s0-test, 1800B, 6a560a4143a4b60c transliterator/rules@1, und+und-t-und-d0-test-m0-rectesta-s0-test, 370B, af652bcb33e1038b transliterator/rules@1, und+und-t-und-d0-test-m0-rectestr-s0-test, 281B, 51be7571fd233bd6 -units/constants@1, und, 241B, d01bb675795388a6 +units/constants@1, und, 555B, b463e4109a02b639 From 51fb8a32f107bea3cf707d884379f9d5c8a1fc3d Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 15:24:11 +0200 Subject: [PATCH 017/104] remove unnecessary print --- provider/datagen/src/transform/cldr/units/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 17682c29bd2..3a94fc95f54 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -122,7 +122,6 @@ impl DataProvider for crate::DatagenProvider { constants_with_constants_map.clear(); constants_with_constants_map = constants_with_constants_map_replaceable; - print!("cons_with_text: {} \n", cons_with_text); if cons_with_text == 0 { break; } From 3330f092bc0358a144cc84729c1d18973e4a34b4 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 15:25:52 +0200 Subject: [PATCH 018/104] fix assign --- provider/datagen/src/transform/cldr/units/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 3a94fc95f54..07240cbed81 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -61,7 +61,7 @@ impl DataProvider for crate::DatagenProvider { } // This loop will replace all the constants in the value of a constant with their values. - let mut cons_with_text: u16 = 0; + let mut cons_with_text; loop { cons_with_text = 0; let mut constants_with_constants_map_replaceable = From 10883bc180321228c6a3878525a5c02203943647 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 15:30:50 +0200 Subject: [PATCH 019/104] fix clippy --- provider/datagen/src/transform/cldr/units/helpers.rs | 4 ++-- provider/datagen/src/transform/cldr/units/mod.rs | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index b1a24f4f612..f88b898360a 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -145,12 +145,12 @@ pub fn split_constant_string( let numerator_string = split.next().unwrap_or("1"); let denominator_string = split.next().unwrap_or("1"); - let mut split = numerator_string.split('*'); + let split = numerator_string.split('*'); for num in split { numerator.push(num.to_string()); } - let mut split = denominator_string.split('*'); + let split = denominator_string.split('*'); for num in split { denominator.push(num.to_string()); } diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 07240cbed81..7af979f8891 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -8,9 +8,9 @@ use std::collections::BTreeMap; use crate::transform::cldr::{ cldr_serde, - units::helpers::{convert_scientific_notation_number_to_fractional, is_scientific_number}, + units::helpers::{is_scientific_number}, }; -use fraction::GenericFraction; + use icu_provider::{ datagen::IterableDataProvider, DataError, DataLocale, DataPayload, DataProvider, DataRequest, DataResponse, @@ -18,7 +18,7 @@ use icu_provider::{ use icu_unitsconversion::provider::{ ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, }; -use num_bigint::BigUint; + use zerovec::{ZeroMap, ZeroVec}; use self::helpers::{ @@ -34,7 +34,7 @@ impl DataProvider for crate::DatagenProvider { .cldr()? .core() .read_and_parse("supplemental/units.json")?; - let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); + let _constants_map = BTreeMap::<&str, ConstantValue>::new(); let constants = &_units_data.supplemental.unit_constants.constants; @@ -130,7 +130,7 @@ impl DataProvider for crate::DatagenProvider { let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { - let value = convert_array_of_strings_to_fraction(&num, &den)?; + let value = convert_array_of_strings_to_fraction(num, den)?; let (num, den, sign, cons_type) = convert_fractional_to_constant_value(value, *constant_type)?; constants_map.insert( @@ -171,7 +171,7 @@ fn test_basic() { let provider = crate::DatagenProvider::latest_tested_offline_subset(); - let und: DataPayload = provider + let _und: DataPayload = provider .load(DataRequest { locale: &locale!("und").into(), metadata: Default::default(), From 5cbd0f2acf0ee23d41704aee30750d922393fa23 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 15:42:00 +0200 Subject: [PATCH 020/104] improvement --- .../src/transform/cldr/units/helpers.rs | 22 +++++++++---------- .../datagen/src/transform/cldr/units/mod.rs | 15 +++++-------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index f88b898360a..e1fe6864b73 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -55,28 +55,28 @@ pub fn convert_scientific_notation_number_to_fractional( Ok(result) } -/// Checks if a string has letters. -/// Returns true if the string has letters, false otherwise. -pub fn has_letters(s: &str) -> bool { - s.chars().any(|c| c.is_alphabetic()) +/// Determines if a string contains any alphabetic characters. +/// Returns true if the string contains at least one alphabetic character, false otherwise. +pub fn contains_alphabetic_chars(s: &str) -> bool { + s.chars().any(char::is_alphabetic) } /// Checks if a string is a valid scientific notation number. /// Returns true if the string is a valid scientific notation number, false otherwise. pub fn is_scientific_number(s: &str) -> bool { - let mut split = s.split('E'); - if split.clone().count() > 2 { + let parts: Vec<&str> = s.split('E').collect(); + if parts.len() > 2 { return false; } - let base = split.next().unwrap_or("0"); - let exponent = split.next().unwrap_or("0"); + let base = parts.get(0).unwrap_or(&"0"); + let exponent = parts.get(1).unwrap_or(&"0"); - !has_letters(base) && !has_letters(exponent) + !contains_alphabetic_chars(base) && !contains_alphabetic_chars(exponent) } -/// Converts a fractional number to a constant value. -pub fn convert_fractional_to_constant_value( +/// Transforms a fractional number into a constant value. +pub fn transform_fraction_to_constant_value( fraction: GenericFraction, constant_type: ConstantType, ) -> Result<(Vec, Vec, Sign, ConstantType), DataError> { diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 7af979f8891..6274935affd 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -6,10 +6,7 @@ mod helpers; use std::collections::BTreeMap; -use crate::transform::cldr::{ - cldr_serde, - units::helpers::{is_scientific_number}, -}; +use crate::transform::cldr::{cldr_serde, units::helpers::is_scientific_number}; use icu_provider::{ datagen::IterableDataProvider, DataError, DataLocale, DataPayload, DataProvider, DataRequest, @@ -22,8 +19,8 @@ use icu_unitsconversion::provider::{ use zerovec::{ZeroMap, ZeroVec}; use self::helpers::{ - convert_array_of_strings_to_fraction, convert_fractional_to_constant_value, has_letters, - remove_whitespace, split_constant_string, + contains_alphabetic_chars, convert_array_of_strings_to_fraction, + transform_fraction_to_constant_value, remove_whitespace, split_constant_string, }; impl DataProvider for crate::DatagenProvider { @@ -72,7 +69,7 @@ impl DataProvider for crate::DatagenProvider { let mut temp_constant_type = *constant_type; for i in 0..temp_num.len() { - if !has_letters(temp_num[i].as_str()) + if !contains_alphabetic_chars(temp_num[i].as_str()) || is_scientific_number(temp_num[i].as_str()) { continue; @@ -94,7 +91,7 @@ impl DataProvider for crate::DatagenProvider { } for i in 0..temp_den.len() { - if !has_letters(temp_den[i].as_str()) + if !contains_alphabetic_chars(temp_den[i].as_str()) || is_scientific_number(temp_den[i].as_str()) { continue; @@ -132,7 +129,7 @@ impl DataProvider for crate::DatagenProvider { for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { let value = convert_array_of_strings_to_fraction(num, den)?; let (num, den, sign, cons_type) = - convert_fractional_to_constant_value(value, *constant_type)?; + transform_fraction_to_constant_value(value, *constant_type)?; constants_map.insert( cons_name, ConstantValue { From b80923278e6fb58d692d8616790520a844fc3b33 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 15:44:12 +0200 Subject: [PATCH 021/104] improve --- provider/datagen/src/transform/cldr/units/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 6274935affd..9a3cc23cd21 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -19,8 +19,8 @@ use icu_unitsconversion::provider::{ use zerovec::{ZeroMap, ZeroVec}; use self::helpers::{ - contains_alphabetic_chars, convert_array_of_strings_to_fraction, - transform_fraction_to_constant_value, remove_whitespace, split_constant_string, + contains_alphabetic_chars, convert_array_of_strings_to_fraction, remove_whitespace, + split_constant_string, transform_fraction_to_constant_value, }; impl DataProvider for crate::DatagenProvider { From 27bbcc9cbad14d27c476f687b59e28f05dd478c6 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 16:18:17 +0200 Subject: [PATCH 022/104] improve the code --- .../src/transform/cldr/units/helpers.rs | 65 ++++++++++++++----- .../datagen/src/transform/cldr/units/mod.rs | 2 +- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index e1fe6864b73..f98117b1f82 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -127,36 +127,71 @@ pub fn convert_array_of_strings_to_fraction( Ok(result) } -/// Splits the constant string into a tuple of (numerator, denominator). -/// The numerator and denominator are represented as array of strings. -/// For example: "1/2" -> (["1"], ["2"]) -/// "1 * 2 / 3 * ft_to_m" -> (["1", "2"], ["3" , "ft_to_m"]) +/// Splits a constant string into a tuple of (numerator, denominator). +/// The numerator and denominator are represented as arrays of strings. +/// Examples: +/// - "1/2" is split into (["1"], ["2"]) +/// - "1 * 2 / 3 * ft_to_m" is split into (["1", "2"], ["3" , "ft_to_m"]) +/// - "/2" is split into (["1"], ["2"]) +/// - "2" is split into (["2"], ["1"]) pub fn split_constant_string( constant_string: &str, ) -> Result<(Vec, Vec), DataError> { - let constant_string = remove_whitespace(constant_string); + let cleaned_string = remove_whitespace(constant_string); let mut numerator = Vec::::new(); let mut denominator = Vec::::new(); - let mut split = constant_string.split('/'); + let mut split = cleaned_string.split('/'); if split.clone().count() > 2 { - return Err(DataError::custom("the constant string is not valid")); + return Err(DataError::custom("Invalid constant string")); } + let numerator_string = split.next().unwrap_or("1"); let denominator_string = split.next().unwrap_or("1"); - let split = numerator_string.split('*'); - for num in split { - numerator.push(num.to_string()); - } + let numerator_values = if numerator_string.is_empty() { + vec!["1".to_string()] + } else { + numerator_string.split('*').map(|s| s.to_string()).collect() + }; - let split = denominator_string.split('*'); - for num in split { - denominator.push(num.to_string()); - } + let denominator_values = if denominator_string.is_empty() { + vec!["1".to_string()] + } else { + denominator_string.split('*').map(|s| s.to_string()).collect() + }; + + numerator.extend(numerator_values); + denominator.extend(denominator_values); Ok((numerator, denominator)) } +// TODO: move this to the comment above. +#[test] +fn test_split_constant_string() { + let input = "1/2"; + let expected = (vec!["1".to_string()], vec!["2".to_string()]); + let actual = split_constant_string(input).unwrap(); + assert_eq!(expected, actual); + + let input = "1 * 2 / 3 * ft_to_m"; + let expected = ( + vec!["1".to_string(), "2".to_string()], + vec!["3".to_string(), "ft_to_m".to_string()], + ); + let actual = split_constant_string(input).unwrap(); + assert_eq!(expected, actual); + + let input = "/2"; + let expected = (vec!["1".to_string()], vec!["2".to_string()]); + let actual = split_constant_string(input).unwrap(); + assert_eq!(expected, actual); + + let input = "2"; + let expected = (vec!["2".to_string()], vec!["1".to_string()]); + let actual = split_constant_string(input).unwrap(); + assert_eq!(expected, actual); +} #[test] fn test_remove_whitespace() { diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 9a3cc23cd21..e4252b23ea8 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -mod helpers; +pub mod helpers; use std::collections::BTreeMap; From 631b5984b6f5635f61d12f5804a3fbbbbf6e24a3 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 16:19:00 +0200 Subject: [PATCH 023/104] move to the correct place --- .../datagen/src/transform/cldr/units/helpers.rs | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index f98117b1f82..c512ef2e0ee 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -17,6 +17,15 @@ pub fn remove_whitespace(s: &str) -> String { s.chars().filter(|c| !c.is_whitespace()).collect() } +// TODO: move this to the comment above. +#[test] +fn test_remove_whitespace() { + let input = "He llo Wo rld!"; + let expected = "HelloWorld!"; + let actual = remove_whitespace(input); + assert_eq!(expected, actual); +} + /// Converts a scientific notation number represented as a string into a tuple of (numerator, denominator). pub fn convert_scientific_notation_number_to_fractional( number: &str, @@ -193,10 +202,3 @@ fn test_split_constant_string() { assert_eq!(expected, actual); } -#[test] -fn test_remove_whitespace() { - let input = "He llo Wo rld!"; - let expected = "HelloWorld!"; - let actual = remove_whitespace(input); - assert_eq!(expected, actual); -} From 4a93af569ab1181c78fa3d8a9d44173d39f91331 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 16:20:33 +0200 Subject: [PATCH 024/104] improve naming --- provider/datagen/src/transform/cldr/units/helpers.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index c512ef2e0ee..470ddc79419 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -26,8 +26,8 @@ fn test_remove_whitespace() { assert_eq!(expected, actual); } -/// Converts a scientific notation number represented as a string into a tuple of (numerator, denominator). -pub fn convert_scientific_notation_number_to_fractional( +/// Converts a scientific notation number represented as a string into a GenericFraction. +pub fn convert_scientific_notation_to_fraction( number: &str, ) -> Result, DataError> { let number = remove_whitespace(number); // TODO: check this. @@ -118,7 +118,7 @@ pub fn convert_array_of_strings_to_fraction( let mut result = GenericFraction::new(BigUint::from(1u32), BigUint::from(1u32)); for vnum in num.iter() { - let num = match convert_scientific_notation_number_to_fractional(vnum) { + let num = match convert_scientific_notation_to_fraction(vnum) { Ok(num) => num, Err(e) => return Err(e), }; @@ -126,7 +126,7 @@ pub fn convert_array_of_strings_to_fraction( } for vden in den.iter() { - let den = match convert_scientific_notation_number_to_fractional(vden) { + let den = match convert_scientific_notation_to_fraction(vden) { Ok(den) => den, Err(e) => return Err(e), }; From 748d8e0dc12ef1f7fbbe2bc072665b2c2a0ec412 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 16:29:20 +0200 Subject: [PATCH 025/104] fix convert fractional --- .../src/transform/cldr/units/helpers.rs | 61 ++++++++++++++----- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 470ddc79419..b8d36dbbb87 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -27,31 +27,33 @@ fn test_remove_whitespace() { } /// Converts a scientific notation number represented as a string into a GenericFraction. +/// Examples: +/// - "1E2" is converted to 100 +/// - "1E-2" is converted to 1/100 +/// - "1.5E2" is converted to 150 +/// - "1.5E-2" is converted to 15/1000 +/// - "1.5E-2.5" is an invalid scientific notation number pub fn convert_scientific_notation_to_fraction( number: &str, ) -> Result, DataError> { - let number = remove_whitespace(number); // TODO: check this. - let mut split = number.split('E'); - if split.clone().count() > 2 { + let number = remove_whitespace(number); + let parts: Vec<&str> = number.split('E').collect(); + if parts.len() > 2 { return Err(DataError::custom( "the number is not a scientific notation number", )); } - let base = split.next().unwrap_or("0"); - let exponent = split.next().unwrap_or("0"); - let base: GenericFraction = match GenericFraction::from_str(base) { - Ok(base) => base, - Err(_) => return Err(DataError::custom("the number is not a valid number")), - }; - let exponent = match f64::from_str(exponent) { - Ok(exponent) => exponent, - Err(_) => return Err(DataError::custom("the exponent is not a valid number")), - }; + let base = parts.get(0).unwrap_or(&"0"); + let exponent = parts.get(1).unwrap_or(&"0"); + let base: GenericFraction = GenericFraction::from_str(base) + .map_err(|_| DataError::custom("the number is not a valid number"))?; + let exponent = i64::from_str(exponent) + .map_err(|_| DataError::custom("the exponent is not a valid number"))?; let mut result = base; let generic_ten: GenericFraction = - GenericFraction::new(BigUint::from(10u32), BigUint::from(1u32)); // TODO: fix this - if exponent > 0.0 { + GenericFraction::new(BigUint::from(10u32), BigUint::from(1u32)); + if exponent > 0 { for _ in 0..exponent as u32 { result = result.mul(generic_ten.clone()); } @@ -64,6 +66,35 @@ pub fn convert_scientific_notation_to_fraction( Ok(result) } +// TODO: move this to the comment above. +#[test] +fn test_convert_scientific_notation_to_fraction() { + let input = "1E2"; + let expected = GenericFraction::new(BigUint::from(100u32), BigUint::from(1u32)); + let actual = convert_scientific_notation_to_fraction(input).unwrap(); + assert_eq!(expected, actual); + + let input = "1E-2"; + let expected = GenericFraction::new(BigUint::from(1u32), BigUint::from(100u32)); + let actual = convert_scientific_notation_to_fraction(input).unwrap(); + assert_eq!(expected, actual); + + let input = "1.5E2"; + let expected = GenericFraction::new(BigUint::from(150u32), BigUint::from(1u32)); + let actual = convert_scientific_notation_to_fraction(input).unwrap(); + assert_eq!(expected, actual); + + let input = "1.5E-2"; + let expected = GenericFraction::new(BigUint::from(15u32), BigUint::from(1000u32)); + let actual = convert_scientific_notation_to_fraction(input).unwrap(); + assert_eq!(expected, actual); + + let input = "1.5E-2.5"; + let actual = convert_scientific_notation_to_fraction(input); + assert!(actual.is_err()); +} + + /// Determines if a string contains any alphabetic characters. /// Returns true if the string contains at least one alphabetic character, false otherwise. pub fn contains_alphabetic_chars(s: &str) -> bool { From 0415d578c60618b08be047a60539badf8d19c53d Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 16:37:09 +0200 Subject: [PATCH 026/104] improve --- .../src/transform/cldr/units/helpers.rs | 59 +++++++++++++------ 1 file changed, 41 insertions(+), 18 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index b8d36dbbb87..912d6f85f8e 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -97,10 +97,39 @@ fn test_convert_scientific_notation_to_fraction() { /// Determines if a string contains any alphabetic characters. /// Returns true if the string contains at least one alphabetic character, false otherwise. +/// Examples: +/// - "1" returns false +/// - "ft_to_m" returns true +/// - "1E2" returns true +/// - "1.5E-2" returns true pub fn contains_alphabetic_chars(s: &str) -> bool { s.chars().any(char::is_alphabetic) } +#[test] +fn test_contains_alphabetic_chars() { + let input = "1"; + let expected = false; + let actual = contains_alphabetic_chars(input); + assert_eq!(expected, actual); + + let input = "ft_to_m"; + let expected = true; + let actual = contains_alphabetic_chars(input); + assert_eq!(expected, actual); + + let input = "1E2"; + let expected = true; + let actual = contains_alphabetic_chars(input); + assert_eq!(expected, actual); + + let input = "1.5E-2"; + let expected = true; + let actual = contains_alphabetic_chars(input); + assert_eq!(expected, actual); +} + + /// Checks if a string is a valid scientific notation number. /// Returns true if the string is a valid scientific notation number, false otherwise. pub fn is_scientific_number(s: &str) -> bool { @@ -141,30 +170,24 @@ pub fn transform_fraction_to_constant_value( Ok((numerator, denominator, sign, constant_type)) } -/// Converts an array of strings of numerator or denominator to fraction. +/// Converts vectors of numerator and denominator strings to a fraction. pub fn convert_array_of_strings_to_fraction( - num: &Vec, - den: &Vec, + numerator_strings: &[String], + denominator_strings: &[String], ) -> Result, DataError> { - let mut result = GenericFraction::new(BigUint::from(1u32), BigUint::from(1u32)); - - for vnum in num.iter() { - let num = match convert_scientific_notation_to_fraction(vnum) { - Ok(num) => num, - Err(e) => return Err(e), - }; - result = result.mul(num); + let mut fraction = GenericFraction::new(BigUint::from(1u32), BigUint::from(1u32)); + + for numerator in numerator_strings { + let num_fraction = convert_scientific_notation_to_fraction(numerator)?; + fraction = fraction.mul(num_fraction); } - for vden in den.iter() { - let den = match convert_scientific_notation_to_fraction(vden) { - Ok(den) => den, - Err(e) => return Err(e), - }; - result = result.div(den); + for denominator in denominator_strings { + let den_fraction = convert_scientific_notation_to_fraction(denominator)?; + fraction = fraction.div(den_fraction); } - Ok(result) + Ok(fraction) } /// Splits a constant string into a tuple of (numerator, denominator). From 6c62c4027295c8c4e93e74a615947fa3494284f0 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 16:45:03 +0200 Subject: [PATCH 027/104] Fix comments and add tests --- .../src/transform/cldr/units/helpers.rs | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 912d6f85f8e..6461b3a9204 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -171,6 +171,13 @@ pub fn transform_fraction_to_constant_value( } /// Converts vectors of numerator and denominator strings to a fraction. +/// Examples: +/// - ["1"], ["2"] is converted to 1*2 --> 2 +/// - ["1", "2"], ["3", "1E2"] is converted to 1*2/(3*1E2) --> 2/300 +/// - ["1", "2"], ["3", "1E-2"] is converted to 1*2/(3*1E-2) --> 200/3 +/// - ["1", "2"], ["3", "1E-2.5"] is an invalid scientific notation number +/// - ["1E2"], ["2"] is converted to 1E2/2 --> 100/2 --> 50/1 +/// - ["1E2", "2"], ["3", "1E2"] is converted to 1E2*2/(3*1E2) --> 2/3 pub fn convert_array_of_strings_to_fraction( numerator_strings: &[String], denominator_strings: &[String], @@ -190,6 +197,46 @@ pub fn convert_array_of_strings_to_fraction( Ok(fraction) } + +// TODO: move some of these tests to the comment above. +#[test] +fn test_convert_array_of_strings_to_fraction() { + let numerator = vec!["1".to_string()]; + let denominator = vec!["2".to_string()]; + let expected = GenericFraction::new(BigUint::from(1u32), BigUint::from(2u32)); + let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + assert_eq!(expected, actual); + + let numerator = vec!["1".to_string(), "2".to_string()]; + let denominator = vec!["3".to_string(), "1E2".to_string()]; + let expected = GenericFraction::new(BigUint::from(2u32), BigUint::from(300u32)); + let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + assert_eq!(expected, actual); + + let numerator = vec!["1".to_string(), "2".to_string()]; + let denominator = vec!["3".to_string(), "1E-2".to_string()]; + let expected = GenericFraction::new(BigUint::from(200u32), BigUint::from(3u32)); + let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + assert_eq!(expected, actual); + + let numerator = vec!["1".to_string(), "2".to_string()]; + let denominator = vec!["3".to_string(), "1E-2.5".to_string()]; + let actual = convert_array_of_strings_to_fraction(&numerator, &denominator); + assert!(actual.is_err()); + + let numerator = vec!["1E2".to_string()]; + let denominator = vec!["2".to_string()]; + let expected = GenericFraction::new(BigUint::from(50u32), BigUint::from(1u32)); + let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + assert_eq!(expected, actual); + + let numerator = vec!["1E2".to_string(), "2".to_string()]; + let denominator = vec!["3".to_string(), "1E2".to_string()]; + let expected = GenericFraction::new(BigUint::from(2u32), BigUint::from(3u32)); + let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + assert_eq!(expected, actual); +} + /// Splits a constant string into a tuple of (numerator, denominator). /// The numerator and denominator are represented as arrays of strings. /// Examples: From 4291b4005af9d21b5d20e1a6be7fd6fef72309db Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 16:46:12 +0200 Subject: [PATCH 028/104] fix format --- provider/datagen/src/transform/cldr/units/helpers.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 6461b3a9204..c3c383b5df7 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -94,7 +94,6 @@ fn test_convert_scientific_notation_to_fraction() { assert!(actual.is_err()); } - /// Determines if a string contains any alphabetic characters. /// Returns true if the string contains at least one alphabetic character, false otherwise. /// Examples: @@ -129,7 +128,6 @@ fn test_contains_alphabetic_chars() { assert_eq!(expected, actual); } - /// Checks if a string is a valid scientific notation number. /// Returns true if the string is a valid scientific notation number, false otherwise. pub fn is_scientific_number(s: &str) -> bool { @@ -177,7 +175,7 @@ pub fn transform_fraction_to_constant_value( /// - ["1", "2"], ["3", "1E-2"] is converted to 1*2/(3*1E-2) --> 200/3 /// - ["1", "2"], ["3", "1E-2.5"] is an invalid scientific notation number /// - ["1E2"], ["2"] is converted to 1E2/2 --> 100/2 --> 50/1 -/// - ["1E2", "2"], ["3", "1E2"] is converted to 1E2*2/(3*1E2) --> 2/3 +/// - ["1E2", "2"], ["3", "1E2"] is converted to 1E2*2/(3*1E2) --> 2/3 pub fn convert_array_of_strings_to_fraction( numerator_strings: &[String], denominator_strings: &[String], @@ -197,7 +195,6 @@ pub fn convert_array_of_strings_to_fraction( Ok(fraction) } - // TODO: move some of these tests to the comment above. #[test] fn test_convert_array_of_strings_to_fraction() { @@ -268,7 +265,10 @@ pub fn split_constant_string( let denominator_values = if denominator_string.is_empty() { vec!["1".to_string()] } else { - denominator_string.split('*').map(|s| s.to_string()).collect() + denominator_string + .split('*') + .map(|s| s.to_string()) + .collect() }; numerator.extend(numerator_values); @@ -302,4 +302,3 @@ fn test_split_constant_string() { let actual = split_constant_string(input).unwrap(); assert_eq!(expected, actual); } - From 3f8503eed86f76b759db1967d206f09073b6b5dd Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:08:13 +0200 Subject: [PATCH 029/104] improve and add test cases --- .../datagen/src/transform/cldr/units/mod.rs | 76 ++++++++++++++----- 1 file changed, 57 insertions(+), 19 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index e4252b23ea8..a70cf5fcd3c 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -7,7 +7,6 @@ pub mod helpers; use std::collections::BTreeMap; use crate::transform::cldr::{cldr_serde, units::helpers::is_scientific_number}; - use icu_provider::{ datagen::IterableDataProvider, DataError, DataLocale, DataPayload, DataProvider, DataRequest, DataResponse, @@ -15,9 +14,7 @@ use icu_provider::{ use icu_unitsconversion::provider::{ ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, }; - use zerovec::{ZeroMap, ZeroVec}; - use self::helpers::{ contains_alphabetic_chars, convert_array_of_strings_to_fraction, remove_whitespace, split_constant_string, transform_fraction_to_constant_value, @@ -162,13 +159,17 @@ impl IterableDataProvider for crate::DatagenProvider { #[test] fn test_basic() { + use fraction::GenericFraction; use icu_locid::locale; use icu_provider::prelude::*; use icu_unitsconversion::provider::*; + use num_bigint::BigUint; + use num_traits::ToBytes; + use std::ops::Mul; let provider = crate::DatagenProvider::latest_tested_offline_subset(); - let _und: DataPayload = provider + let und: DataPayload = provider .load(DataRequest { locale: &locale!("und").into(), metadata: Default::default(), @@ -177,19 +178,56 @@ fn test_basic() { .take_payload() .unwrap(); - // let constants = &und.get().to_owned().constants_map; - - // let ft_to_m : ConstantValue = constants.get("ft_to_m").unwrap(); - - // assert_eq!(ft_to_m , ConstantValue { - // numerator: 3048, - // denominator: 10000, - // constant_type: ConstantType::Actual, - // }); - - // assert!(constants.get("ft2_to_m2").eq( ConstantValue { - // numerator: 3048, - // denominator: 10000, - // constant_type: ConstantType::Actual, - // })); + let constants = &und.get().to_owned().constants_map; + let ft_to_m = constants.get("ft_to_m").unwrap(); + let expected_ft_to_m = + GenericFraction::::new(BigUint::from(3048u32), BigUint::from(10000u32)); + assert_eq!( + ft_to_m, + zerovec::ule::encode_varule_to_box(&ConstantValue { + numerator: expected_ft_to_m.numer().unwrap().to_le_bytes().into(), + denominator: expected_ft_to_m.denom().unwrap().to_le_bytes().into(), + sign: Sign::Positive, + constant_type: ConstantType::Actual, + }) + .as_ref() + ); + + let ft2_to_m2 = constants.get("ft2_to_m2").unwrap(); + let expected_ft2_to_m2 = GenericFraction::::new( + BigUint::from(3048u32).mul(&BigUint::from(3048u32)), + BigUint::from(10000u32).mul(&BigUint::from(10000u32)), + ); + + assert_eq!( + ft2_to_m2, + zerovec::ule::encode_varule_to_box(&ConstantValue { + numerator: expected_ft2_to_m2.numer().unwrap().to_le_bytes().into(), + denominator: expected_ft2_to_m2.denom().unwrap().to_le_bytes().into(), + sign: Sign::Positive, + constant_type: ConstantType::Actual, + }) + .as_ref() + ); + + let ft3_to_m3 = constants.get("ft3_to_m3").unwrap(); + let expected_ft3_to_m3 = GenericFraction::::new( + BigUint::from(3048u32) + .mul(&BigUint::from(3048u32)) + .mul(&BigUint::from(3048u32)), + BigUint::from(10000u32) + .mul(&BigUint::from(10000u32)) + .mul(&BigUint::from(10000u32)), + ); + + assert_eq!( + ft3_to_m3, + zerovec::ule::encode_varule_to_box(&ConstantValue { + numerator: expected_ft3_to_m3.numer().unwrap().to_le_bytes().into(), + denominator: expected_ft3_to_m3.denom().unwrap().to_le_bytes().into(), + sign: Sign::Positive, + constant_type: ConstantType::Actual, + }) + .as_ref() + ); } From a6248f3104fd73ebea9e1fabaf52648248e856ca Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:13:09 +0200 Subject: [PATCH 030/104] fix clippy --- provider/datagen/src/transform/cldr/units/helpers.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index c3c383b5df7..ecd7af8e5bf 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -43,7 +43,7 @@ pub fn convert_scientific_notation_to_fraction( "the number is not a scientific notation number", )); } - let base = parts.get(0).unwrap_or(&"0"); + let base = parts.first().unwrap_or(&"0"); let exponent = parts.get(1).unwrap_or(&"0"); let base: GenericFraction = GenericFraction::from_str(base) .map_err(|_| DataError::custom("the number is not a valid number"))?; @@ -136,7 +136,7 @@ pub fn is_scientific_number(s: &str) -> bool { return false; } - let base = parts.get(0).unwrap_or(&"0"); + let base = parts.first().unwrap_or(&"0"); let exponent = parts.get(1).unwrap_or(&"0"); !contains_alphabetic_chars(base) && !contains_alphabetic_chars(exponent) From eef1612f6aa96f4f4c959400e8c25ffdf89bc1fb Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:17:30 +0200 Subject: [PATCH 031/104] fix the comments --- experimental/unitsconversion/src/provider.rs | 12 +++++++++++- provider/datagen/src/transform/cldr/units/mod.rs | 8 ++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 60ecc592b4a..3523502ecfd 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -12,7 +12,7 @@ use icu_provider::prelude::*; use zerovec::{ZeroMap, ZeroVec}; -/// This type contains all of the constants data for units conversion. +/// This type encapsulates all the constant data required for unit conversions. /// ///
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, @@ -36,6 +36,9 @@ pub struct UnitsConstantsV1<'data> { pub constants_map: ZeroMap<'data, str, ConstantValueULE>, } +/// This enum is used to represent the type of a constant value. +/// It can be either `ConstantType::Actual` or `ConstantType::Approximate`. +/// If the constant type is `ConstantType::Approximate`, it indicates that the value is not definitively accurate. #[zerovec::make_ule(ConstantTypeULE)] #[cfg_attr( feature = "datagen", @@ -51,6 +54,7 @@ pub enum ConstantType { Approximate = 1, } +/// This enum is used to represent the sign of a constant value. #[zerovec::make_ule(SignULE)] #[cfg_attr( feature = "datagen", @@ -66,6 +70,7 @@ pub enum Sign { Negative = 1, } +/// This struct represents a constant value, which is composed of a numerator, denominator, sign, and type. #[zerovec::make_varule(ConstantValueULE)] #[cfg_attr( feature = "datagen", @@ -76,12 +81,17 @@ pub enum Sign { #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] pub struct ConstantValue<'data> { + /// The numerator of the constant value. #[serde(borrow)] pub numerator: ZeroVec<'data, u8>, + + /// The denominator of the constant value. #[serde(borrow)] pub denominator: ZeroVec<'data, u8>, + /// The sign of the constant value. pub sign: Sign, + /// The type of the constant value. pub constant_type: ConstantType, } diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index a70cf5fcd3c..b8f22e0f91a 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -6,6 +6,10 @@ pub mod helpers; use std::collections::BTreeMap; +use self::helpers::{ + contains_alphabetic_chars, convert_array_of_strings_to_fraction, remove_whitespace, + split_constant_string, transform_fraction_to_constant_value, +}; use crate::transform::cldr::{cldr_serde, units::helpers::is_scientific_number}; use icu_provider::{ datagen::IterableDataProvider, DataError, DataLocale, DataPayload, DataProvider, DataRequest, @@ -15,10 +19,6 @@ use icu_unitsconversion::provider::{ ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, }; use zerovec::{ZeroMap, ZeroVec}; -use self::helpers::{ - contains_alphabetic_chars, convert_array_of_strings_to_fraction, remove_whitespace, - split_constant_string, transform_fraction_to_constant_value, -}; impl DataProvider for crate::DatagenProvider { fn load(&self, _req: DataRequest) -> Result, DataError> { From 50c03b092abd738a761ea1bb1e4728ee05ff77a5 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:19:19 +0200 Subject: [PATCH 032/104] rename --- provider/datagen/src/transform/cldr/units/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index b8f22e0f91a..1b054ca45d0 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -24,13 +24,13 @@ impl DataProvider for crate::DatagenProvider { fn load(&self, _req: DataRequest) -> Result, DataError> { self.check_req::(_req)?; - let _units_data: &cldr_serde::units::units_constants::Resource = self + let units_data: &cldr_serde::units::units_constants::Resource = self .cldr()? .core() .read_and_parse("supplemental/units.json")?; let _constants_map = BTreeMap::<&str, ConstantValue>::new(); - let constants = &_units_data.supplemental.unit_constants.constants; + let constants = &units_data.supplemental.unit_constants.constants; // Constants that has a constants in their value. // For exmaple: "ft2_to_m2": "ft_to_m * ft_to_m", From 42a7817b6940e12ec20383d08bc27c59c052ff3b Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:23:16 +0200 Subject: [PATCH 033/104] fix duplication --- .../datagen/src/transform/cldr/units/mod.rs | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 1b054ca45d0..f5d49439e96 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -28,15 +28,10 @@ impl DataProvider for crate::DatagenProvider { .cldr()? .core() .read_and_parse("supplemental/units.json")?; - let _constants_map = BTreeMap::<&str, ConstantValue>::new(); - let constants = &units_data.supplemental.unit_constants.constants; - // Constants that has a constants in their value. - // For exmaple: "ft2_to_m2": "ft_to_m * ft_to_m", - let mut constants_with_constants_map = + let mut constants_map_in_str_form = BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); - for (cons_name, cons_value) in constants { let value = remove_whitespace(&cons_value.value); let (num, den) = match split_constant_string(&value) { @@ -51,7 +46,7 @@ impl DataProvider for crate::DatagenProvider { _ => ConstantType::Actual, }; - constants_with_constants_map.insert(cons_name, (num, den, constant_type)); + constants_map_in_str_form.insert(cons_name, (num, den, constant_type)); } // This loop will replace all the constants in the value of a constant with their values. @@ -60,7 +55,7 @@ impl DataProvider for crate::DatagenProvider { cons_with_text = 0; let mut constants_with_constants_map_replaceable = BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); - for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { + for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { let mut temp_num = num.clone(); let mut temp_den = den.clone(); let mut temp_constant_type = *constant_type; @@ -74,7 +69,7 @@ impl DataProvider for crate::DatagenProvider { cons_with_text += 1; if let Some((rnum, rden, rconstant_type)) = - constants_with_constants_map.get(temp_num[i].as_str()) + constants_map_in_str_form.get(temp_num[i].as_str()) { temp_num.remove(i); // append the elements in rnum to num and rden to den @@ -96,7 +91,7 @@ impl DataProvider for crate::DatagenProvider { cons_with_text += 1; if let Some((rnum, rden, rconstant_type)) = - constants_with_constants_map.get(temp_den[i].as_str()) + constants_map_in_str_form.get(temp_den[i].as_str()) { temp_den.remove(i); // append the elements in rnum to den and rden to num @@ -113,8 +108,8 @@ impl DataProvider for crate::DatagenProvider { .insert(cons_name, (temp_num, temp_den, temp_constant_type)); } - constants_with_constants_map.clear(); - constants_with_constants_map = constants_with_constants_map_replaceable; + constants_map_in_str_form.clear(); + constants_map_in_str_form = constants_with_constants_map_replaceable; if cons_with_text == 0 { break; @@ -123,7 +118,7 @@ impl DataProvider for crate::DatagenProvider { let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); - for (cons_name, (num, den, constant_type)) in constants_with_constants_map.iter() { + for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { let value = convert_array_of_strings_to_fraction(num, den)?; let (num, den, sign, cons_type) = transform_fraction_to_constant_value(value, *constant_type)?; From 346bd9aa2a8d09afd83deb421dd96ec5b62f72cd Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:27:57 +0200 Subject: [PATCH 034/104] fix the code --- .../datagen/src/transform/cldr/units/mod.rs | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index f5d49439e96..6598d3e19b8 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -34,12 +34,8 @@ impl DataProvider for crate::DatagenProvider { BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); for (cons_name, cons_value) in constants { let value = remove_whitespace(&cons_value.value); - let (num, den) = match split_constant_string(&value) { - Ok((num, den)) => (num, den), - Err(e) => { - return Err(e); - } - }; + let (num, den) = split_constant_string(&value) + .map_err(|e| e)?; let constant_type = match cons_value.status.as_deref() { Some("approximate") => ConstantType::Approximate, @@ -49,10 +45,10 @@ impl DataProvider for crate::DatagenProvider { constants_map_in_str_form.insert(cons_name, (num, den, constant_type)); } - // This loop will replace all the constants in the value of a constant with their values. - let mut cons_with_text; + // This loop iterates over the constants, replacing any string values with their corresponding constant values. + let mut num_of_const_with_text; loop { - cons_with_text = 0; + num_of_const_with_text = 0; let mut constants_with_constants_map_replaceable = BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { @@ -67,7 +63,7 @@ impl DataProvider for crate::DatagenProvider { continue; } - cons_with_text += 1; + num_of_const_with_text += 1; if let Some((rnum, rden, rconstant_type)) = constants_map_in_str_form.get(temp_num[i].as_str()) { @@ -89,7 +85,7 @@ impl DataProvider for crate::DatagenProvider { continue; } - cons_with_text += 1; + num_of_const_with_text += 1; if let Some((rnum, rden, rconstant_type)) = constants_map_in_str_form.get(temp_den[i].as_str()) { @@ -111,7 +107,7 @@ impl DataProvider for crate::DatagenProvider { constants_map_in_str_form.clear(); constants_map_in_str_form = constants_with_constants_map_replaceable; - if cons_with_text == 0 { + if num_of_const_with_text == 0 { break; } } From b5790d6d0797e1b32e22bd31ee2179bd0e2a4570 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:32:07 +0200 Subject: [PATCH 035/104] fix data bake --- .../unitsconversion/data/data/macros/units_constants_v1.data.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/unitsconversion/data/data/macros/units_constants_v1.data.rs b/experimental/unitsconversion/data/data/macros/units_constants_v1.data.rs index 092682bdc79..59ee2249c42 100644 --- a/experimental/unitsconversion/data/data/macros/units_constants_v1.data.rs +++ b/experimental/unitsconversion/data/data/macros/units_constants_v1.data.rs @@ -12,7 +12,7 @@ macro_rules! __impl_units_constants_v1 { pub const SINGLETON_UNITS_CONSTANTS_V1: &'static ::Yokeable = &icu::unitsconversion::provider::UnitsConstantsV1 { constants_map: unsafe { #[allow(unused_unsafe)] - zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x0F\0\0\0\0\0\x01\0\x03\0\x0C\0\x15\0\x1C\0)\x002\0D\0K\0T\0a\0i\0v\0\x89\0GPIft2_to_m2ft3_to_m3ft_to_mgal_imp_to_m3gal_to_m3glucose_molar_massgravityin3_to_m3item_per_molelb_to_kgmeters_per_AUsec_per_julian_yearspeed_of_light_meters_per_second") }, unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x0F\0\0\0\0\0\x0B\0 \0/\0F\0L\0V\0c\0k\0r\0\x84\0\x92\0\x9C\0\xA8\0\xB0\x006.67408E-11411557987 / 131002976ft_to_m*ft_to_mft_to_m*ft_to_m*ft_to_m0.30480.00454609231*in3_to_m3180.15579.80665ft3_to_m3/12*12*126.02214076E+230.4535923714959787070031557600299792458") }) + zerovec::ZeroMap::from_parts_unchecked(unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x0F\0\0\0\0\0\x01\0\x03\0\x0C\0\x15\0\x1C\0)\x002\0D\0K\0T\0a\0i\0v\0\x89\0GPIft2_to_m2ft3_to_m3ft_to_mgal_imp_to_m3gal_to_m3glucose_molar_massgravityin3_to_m3item_per_molelb_to_kgmeters_per_AUsec_per_julian_yearspeed_of_light_meters_per_second") }, unsafe { zerovec::VarZeroVec::from_bytes_unchecked(b"\x0F\0\0\0\0\0\x17\0-\0A\0W\0i\0~\0\x95\0\xA8\0\xBB\0\xD1\0\xEA\0\0\x01\x14\x01'\x01\0\x01\x02\0\0\0\0\0\0\0\x02\0\0\0\xF1\xA2\0\x10\xFC&o8\x02\0\x01\x02\0\0\0\0\0\0\0\x04\0\0\0c\xE0\x87\x18`\xF2\xCE\x07\0\0\x02\0\0\0\0\0\0\0\x03\0\0\0\t7\x02\x84\xD7\x17\0\0\x02\0\0\0\0\0\0\0\x04\0\0\0e\xE8K\x03\x88Rjt\0\0\x02\0\0\0\0\0\0\0\x02\0\0\0}\x01\xE2\x04\0\0\x02\0\0\0\0\0\0\0\x03\0\0\0\xD1\xEF\x06\0\xE1\xF5\x05\0\0\x02\0\0\0\0\0\0\0\x04\0\0\0\x99\x194\x1C\0\xA2\x94\x1A\x1D\0\0\x02\0\0\0\0\0\0\0\x03\0\0\0U}\x1B\x10'\0\0\x02\0\0\0\0\0\0\0\x03\0\0\0%\xFE\x02 N\0\0\x02\0\0\0\0\0\0\0\x03\0\0\0\x7FA\x1F\0\xA2\x94\x1A\x1D\0\0\x02\0\0\0\0\0\0\0\n\0\0\0\0\0\xC6\\\x14_)\x17\x86\x7F\x01\0\0\x02\0\0\0\0\0\0\0\x04\0\0\0\x85 \xB4\x02\0\xE1\xF5\x05\0\0\x02\0\0\0\0\0\0\0\x05\0\0\0lZ\xBA\xD4\"\x01\0\0\x02\0\0\0\0\0\0\0\x04\0\0\0\xE0\x87\xE1\x01\x01\0\0\x02\0\0\0\0\0\0\0\x04\0\0\0Jx\xDE\x11\x01") }) }, }; } From 8efed21c7fa252fe13bc837bf5dd8b281b4c7814 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:36:22 +0200 Subject: [PATCH 036/104] fix --- provider/datagen/src/transform/cldr/units/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 6598d3e19b8..84177814a01 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -34,8 +34,7 @@ impl DataProvider for crate::DatagenProvider { BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); for (cons_name, cons_value) in constants { let value = remove_whitespace(&cons_value.value); - let (num, den) = split_constant_string(&value) - .map_err(|e| e)?; + let (num, den) = split_constant_string(&value).map_err(|e| e)?; let constant_type = match cons_value.status.as_deref() { Some("approximate") => ConstantType::Approximate, @@ -221,4 +220,8 @@ fn test_basic() { }) .as_ref() ); + + // TODO: Test the cases where the constant value has another constant in the denominator. + // For example, "12/ft2_to_m2" + // This is not existing in CLDR data yet. but we should test this case anyway. } From c2ff6851f7551eb8019b7ecf1dacc8d787609278 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:37:01 +0200 Subject: [PATCH 037/104] fix --- provider/datagen/src/transform/cldr/units/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 84177814a01..b59c696786b 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -221,7 +221,7 @@ fn test_basic() { .as_ref() ); - // TODO: Test the cases where the constant value has another constant in the denominator. - // For example, "12/ft2_to_m2" - // This is not existing in CLDR data yet. but we should test this case anyway. + // TODO: Implement tests for cases where the constant value includes another constant in the denominator. + // Example: "12/ft2_to_m2" + // Although this case is not currently present in CLDR data, it's important to test for it. } From c7fb8da60f69514c7b2df69c0c118d0614473a55 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 20 Sep 2023 17:42:30 +0200 Subject: [PATCH 038/104] fix clippy --- provider/datagen/src/transform/cldr/units/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index b59c696786b..2128401b2ff 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -34,7 +34,7 @@ impl DataProvider for crate::DatagenProvider { BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); for (cons_name, cons_value) in constants { let value = remove_whitespace(&cons_value.value); - let (num, den) = split_constant_string(&value).map_err(|e| e)?; + let (num, den) = split_constant_string(&value)?; let constant_type = match cons_value.status.as_deref() { Some("approximate") => ConstantType::Approximate, From a52ca57b3a24d4fb80b64f6c175be9259c9fcfb9 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 16:16:38 +0200 Subject: [PATCH 039/104] fix after merge --- Cargo.lock | 1 - .../tests/data/postcard/fingerprints.csv | 19 +------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2f0409de823..439d4afcd57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1977,7 +1977,6 @@ version = "0.0.0" dependencies = [ "databake", "displaydoc", - "fixed_decimal", "fraction", "icu_locid", "icu_provider", diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index 9647bbb8dcb..be3cdc4f842 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -2048,23 +2048,6 @@ time_zone/specific_short@1, sr-Latn, 119B, dcdb9855b7df4f90 time_zone/specific_short@1, th, 31B, 4b7af6a019fab889 time_zone/specific_short@1, tr, 31B, 4b7af6a019fab889 time_zone/specific_short@1, und, 31B, 4b7af6a019fab889 -transliterator/rules@1, und+de-t-de-d0-ascii, 16784B, cbb0f7f795d1c6dc -transliterator/rules@1, und+el-Latn-t-el-m0-bgn, 13818B, 2293dcb5f5e7fc0b -transliterator/rules@1, und+und-Arab-t-s0-intindic, 24110B, 9f9bbc5a58d2831 -transliterator/rules@1, und+und-Arab-t-und-Beng, 399B, 46e4893392ef2e5f -transliterator/rules@1, und+und-Latn-t-s0-ascii, 126B, bd845d3dc35e3b57 -transliterator/rules@1, und+und-t-d0-publish, 3477B, c9558c0f7daee292 -transliterator/rules@1, und+und-t-s0-publish, 1343B, 6084ebbdd93523c2 -transliterator/rules@1, und+und-t-und-Beng-d0-intindic, 2621B, 24a04df29d08559d -transliterator/rules@1, und+und-t-und-Latn-d0-ascii, 27110B, c66743617e3238ff -transliterator/rules@1, und+und-t-und-d0-test-m0-cursfilt-s0-test, 93B, ac67e05bc986cd23 -transliterator/rules@1, und+und-t-und-d0-test-m0-emtymach-s0-test, 105B, 12b65cade4ce4468 -transliterator/rules@1, und+und-t-und-d0-test-m0-hexrust-s0-test, 98B, b8802989a6bfec0f -transliterator/rules@1, und+und-t-und-d0-test-m0-hexuni-s0-test, 104B, 4335c71013bd81d -transliterator/rules@1, und+und-t-und-d0-test-m0-niels-s0-test, 1800B, 6a560a4143a4b60c -transliterator/rules@1, und+und-t-und-d0-test-m0-rectesta-s0-test, 370B, af652bcb33e1038b -transliterator/rules@1, und+und-t-und-d0-test-m0-rectestr-s0-test, 281B, 51be7571fd233bd6 -units/constants@1, und, 555B, b463e4109a02b639 transliterator/rules@1, und+de-t-de-d0-ascii, 16754B, 373dc989a6f2feb0 transliterator/rules@1, und+el-Latn-t-el-m0-bgn, 13802B, 676fd7d03e5f65ba transliterator/rules@1, und+und-Arab-t-s0-intindic, 24093B, bb464298570e790f @@ -2081,4 +2064,4 @@ transliterator/rules@1, und+und-t-und-d0-test-m0-hexuni-s0-test, 80B, 55d96425b7 transliterator/rules@1, und+und-t-und-d0-test-m0-niels-s0-test, 1769B, 45400449cf43ecf6 transliterator/rules@1, und+und-t-und-d0-test-m0-rectesta-s0-test, 369B, 69c41d4b5c828833 transliterator/rules@1, und+und-t-und-d0-test-m0-rectestr-s0-test, 237B, 3345ed066cbb729f -units/constants@1, und, 426B, e0c7eeb9e702371c +units/constants@1, und, 555B, b463e4109a02b639 From 754f33e9b8160dce9d6027072172f2ce1a965a47 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 16:22:50 +0200 Subject: [PATCH 040/104] Update provider/datagen/src/transform/cldr/units/helpers.rs Co-authored-by: Shane F. Carr --- provider/datagen/src/transform/cldr/units/helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index ecd7af8e5bf..68351f68070 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -170,7 +170,7 @@ pub fn transform_fraction_to_constant_value( /// Converts vectors of numerator and denominator strings to a fraction. /// Examples: -/// - ["1"], ["2"] is converted to 1*2 --> 2 +/// - ["1"], ["2"] is converted to 1/2 /// - ["1", "2"], ["3", "1E2"] is converted to 1*2/(3*1E2) --> 2/300 /// - ["1", "2"], ["3", "1E-2"] is converted to 1*2/(3*1E-2) --> 200/3 /// - ["1", "2"], ["3", "1E-2.5"] is an invalid scientific notation number From 8cba11b20f3203cc59e4c7f7900d0f560ec8fb8c Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 17:23:04 +0200 Subject: [PATCH 041/104] Add TODOs --- experimental/unitsconversion/src/provider.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 3523502ecfd..6230010460b 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -81,10 +81,12 @@ pub enum Sign { #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] pub struct ConstantValue<'data> { + // TODO(https://github.com/unicode-org/icu4x/issues/4092). /// The numerator of the constant value. #[serde(borrow)] pub numerator: ZeroVec<'data, u8>, + // TODO(https://github.com/unicode-org/icu4x/issues/4092). /// The denominator of the constant value. #[serde(borrow)] pub denominator: ZeroVec<'data, u8>, From c04611014ef62cacb06c224e740c88a45f661a86 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 17:26:28 +0200 Subject: [PATCH 042/104] fix the places of dependencies --- provider/datagen/Cargo.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/provider/datagen/Cargo.toml b/provider/datagen/Cargo.toml index 0c58f7e4a58..297dd245777 100644 --- a/provider/datagen/Cargo.toml +++ b/provider/datagen/Cargo.toml @@ -32,10 +32,6 @@ rust-version.workspace = true all-features = true [dependencies] -fraction = { version = "0.13.1", default-features = false } -num-bigint = { version = "0.4.4", default-features = false } -num-traits = { version = "0.2.14", default-features = false } - # ICU components icu_calendar = { workspace = true, features = ["datagen"] } icu_casemap = { workspace = true, features = ["datagen"] } @@ -93,6 +89,10 @@ zip = { version = ">=0.5, <0.7", default-features = false, features = ["deflate" rayon = { version = "1.5", optional = true } ureq = { version = "2", optional = true } +fraction = { version = "0.13.1", default-features = false } +num-bigint = { version = "0.4.4", default-features = false } +num-traits = { version = "0.2.14", default-features = false } + # Dependencies for "bin" feature clap = { version = "4", optional = true, features = ["derive"] } eyre = { version = "0.6", optional = true } From f7467e692024b1478e8e7756e7f14cff8ad90fe0 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 18:53:59 +0200 Subject: [PATCH 043/104] fix uses --- Cargo.lock | 4 ---- experimental/unitsconversion/Cargo.toml | 6 +----- experimental/unitsconversion/src/provider.rs | 2 +- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 439d4afcd57..50f5a61335a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1977,14 +1977,10 @@ version = "0.0.0" dependencies = [ "databake", "displaydoc", - "fraction", "icu_locid", "icu_provider", "icu_unitsconversion_data", - "num-bigint", - "num-traits", "serde", - "tinystr", "zerovec", ] diff --git a/experimental/unitsconversion/Cargo.toml b/experimental/unitsconversion/Cargo.toml index 27c664e340b..e260f043882 100644 --- a/experimental/unitsconversion/Cargo.toml +++ b/experimental/unitsconversion/Cargo.toml @@ -25,15 +25,11 @@ displaydoc = { version = "0.2.3", default-features = false } icu_locid = { workspace = true } icu_provider = { workspace = true, features = ["macros"] } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } -tinystr = { workspace = true, features = ["zerovec"], default-features = false } zerovec = { workspace = true, features = ["yoke"] } -num-bigint = { version = "0.4.4", default-features = false } -num-traits = { version = "0.2.14", default-features = false } -fraction = { version = "0.13.1", default-features = false } icu_unitsconversion_data = { workspace = true, optional = true } [features] default = ["compiled_data"] serde = ["dep:serde", "zerovec/serde", "icu_locid/serde", "icu_provider/serde"] datagen = ["serde", "zerovec/databake", "databake"] -compiled_data = ["dep:icu_unitsconversion_data"] +compiled_data = ["dep:icu_unitsconversion_data"] \ No newline at end of file diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 6230010460b..3a39e499fa1 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -9,7 +9,7 @@ //! //! Read more about data providers: [`icu_provider`] -use icu_provider::prelude::*; +use icu_provider::{yoke, zerofrom}; use zerovec::{ZeroMap, ZeroVec}; /// This type encapsulates all the constant data required for unit conversions. From d098dfe2bae95a2d6150b427afdf3eab9415f943 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 20:03:12 +0200 Subject: [PATCH 044/104] fix --- experimental/unitsconversion/src/provider.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 3a39e499fa1..40c2e64c21a 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -72,23 +72,23 @@ pub enum Sign { /// This struct represents a constant value, which is composed of a numerator, denominator, sign, and type. #[zerovec::make_varule(ConstantValueULE)] +#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] +#[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[cfg_attr( feature = "datagen", derive(serde::Serialize, databake::Bake), databake(path = icu_unitsconversion::provider), )] #[zerovec::derive(Serialize, Deserialize, Debug)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize))] -#[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] pub struct ConstantValue<'data> { // TODO(https://github.com/unicode-org/icu4x/issues/4092). /// The numerator of the constant value. - #[serde(borrow)] + #[cfg_attr(feature = "serde", serde(borrow))] pub numerator: ZeroVec<'data, u8>, // TODO(https://github.com/unicode-org/icu4x/issues/4092). /// The denominator of the constant value. - #[serde(borrow)] + #[cfg_attr(feature = "serde", serde(borrow))] pub denominator: ZeroVec<'data, u8>, /// The sign of the constant value. From 6c7cda60aa989d5d77c11ff0017801566ac1f534 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 20:16:53 +0200 Subject: [PATCH 045/104] fix tidy --- provider/testdata/Cargo.lock | 68 ++++++++++++++++----------------- tools/depcheck/src/allowlist.rs | 5 +++ 2 files changed, 39 insertions(+), 34 deletions(-) diff --git a/provider/testdata/Cargo.lock b/provider/testdata/Cargo.lock index c3ad4470305..2919829e163 100644 --- a/provider/testdata/Cargo.lock +++ b/provider/testdata/Cargo.lock @@ -227,7 +227,7 @@ checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "fixed_decimal" -version = "0.5.3" +version = "0.5.4" dependencies = [ "displaydoc", "smallvec", @@ -263,7 +263,7 @@ checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" [[package]] name = "icu" -version = "1.2.0" +version = "1.3.0" dependencies = [ "icu_calendar", "icu_casemap", @@ -284,7 +284,7 @@ dependencies = [ [[package]] name = "icu_calendar" -version = "1.2.0" +version = "1.3.0" dependencies = [ "calendrical_calculations", "displaydoc", @@ -297,7 +297,7 @@ dependencies = [ [[package]] name = "icu_casemap" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "icu_collections", @@ -310,7 +310,7 @@ dependencies = [ [[package]] name = "icu_collator" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "icu_collections", @@ -326,7 +326,7 @@ dependencies = [ [[package]] name = "icu_collections" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "yoke", @@ -336,7 +336,7 @@ dependencies = [ [[package]] name = "icu_compactdecimal" -version = "0.2.0" +version = "0.2.1" dependencies = [ "displaydoc", "fixed_decimal", @@ -349,7 +349,7 @@ dependencies = [ [[package]] name = "icu_datetime" -version = "1.2.1" +version = "1.3.0" dependencies = [ "displaydoc", "either", @@ -369,7 +369,7 @@ dependencies = [ [[package]] name = "icu_decimal" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "fixed_decimal", @@ -380,7 +380,7 @@ dependencies = [ [[package]] name = "icu_displaynames" -version = "0.10.0" +version = "0.11.0" dependencies = [ "icu_locid", "icu_provider", @@ -390,7 +390,7 @@ dependencies = [ [[package]] name = "icu_list" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "icu_provider", @@ -400,7 +400,7 @@ dependencies = [ [[package]] name = "icu_locid" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "litemap", @@ -412,7 +412,7 @@ dependencies = [ [[package]] name = "icu_locid_transform" -version = "1.2.1" +version = "1.3.0" dependencies = [ "displaydoc", "icu_locid", @@ -424,7 +424,7 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "icu_collections", @@ -439,7 +439,7 @@ dependencies = [ [[package]] name = "icu_plurals" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "fixed_decimal", @@ -450,7 +450,7 @@ dependencies = [ [[package]] name = "icu_properties" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "icu_collections", @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "icu_provider" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "icu_locid", @@ -478,7 +478,7 @@ dependencies = [ [[package]] name = "icu_provider_adapters" -version = "1.2.0" +version = "1.3.0" dependencies = [ "icu_locid", "icu_locid_transform", @@ -490,7 +490,7 @@ dependencies = [ [[package]] name = "icu_provider_blob" -version = "1.2.0" +version = "1.3.0" dependencies = [ "icu_provider", "postcard", @@ -501,7 +501,7 @@ dependencies = [ [[package]] name = "icu_provider_macros" -version = "1.2.0" +version = "1.3.0" dependencies = [ "proc-macro2", "quote", @@ -510,7 +510,7 @@ dependencies = [ [[package]] name = "icu_relativetime" -version = "0.1.1" +version = "0.1.2" dependencies = [ "displaydoc", "fixed_decimal", @@ -523,7 +523,7 @@ dependencies = [ [[package]] name = "icu_segmenter" -version = "1.2.1" +version = "1.3.0" dependencies = [ "displaydoc", "icu_collections", @@ -535,7 +535,7 @@ dependencies = [ [[package]] name = "icu_testdata" -version = "1.2.0" +version = "1.3.0" dependencies = [ "criterion", "icu", @@ -565,7 +565,7 @@ dependencies = [ [[package]] name = "icu_timezone" -version = "1.2.0" +version = "1.3.0" dependencies = [ "displaydoc", "icu_calendar", @@ -630,7 +630,7 @@ checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "litemap" -version = "0.7.0" +version = "0.7.1" [[package]] name = "log" @@ -901,7 +901,7 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "tinystr" -version = "0.7.1" +version = "0.7.3" dependencies = [ "displaydoc", "serde", @@ -1055,11 +1055,11 @@ checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" [[package]] name = "writeable" -version = "0.5.2" +version = "0.5.3" [[package]] name = "yoke" -version = "0.7.1" +version = "0.7.2" dependencies = [ "serde", "stable_deref_trait", @@ -1069,7 +1069,7 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.1" +version = "0.7.2" dependencies = [ "proc-macro2", "quote", @@ -1079,14 +1079,14 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.2" +version = "0.1.3" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.2" +version = "0.1.3" dependencies = [ "proc-macro2", "quote", @@ -1096,7 +1096,7 @@ dependencies = [ [[package]] name = "zerotrie" -version = "0.0.0" +version = "0.1.0" dependencies = [ "displaydoc", "yoke", @@ -1105,7 +1105,7 @@ dependencies = [ [[package]] name = "zerovec" -version = "0.9.4" +version = "0.10.0" dependencies = [ "serde", "yoke", @@ -1115,7 +1115,7 @@ dependencies = [ [[package]] name = "zerovec-derive" -version = "0.9.4" +version = "0.10.0" dependencies = [ "proc-macro2", "quote", diff --git a/tools/depcheck/src/allowlist.rs b/tools/depcheck/src/allowlist.rs index 08d3a4dda6b..906a4472d10 100644 --- a/tools/depcheck/src/allowlist.rs +++ b/tools/depcheck/src/allowlist.rs @@ -140,6 +140,7 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "crlify", "databake", "databake-derive", + "fraction", "elsa", "erased-serde", "icu_codepointtrie_builder", @@ -147,8 +148,12 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "itoa", "matrixmultiply", "ndarray", + "num", + "num-bigint", "num-complex", "num-integer", + "num-iter", + "num-rational", "num-traits", "once_cell", "rawpointer", From edd6035041914d69c507c293dcec705a162f1ed1 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 21:09:28 +0200 Subject: [PATCH 046/104] fix order --- tools/depcheck/src/allowlist.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/depcheck/src/allowlist.rs b/tools/depcheck/src/allowlist.rs index 906a4472d10..6717b7b9841 100644 --- a/tools/depcheck/src/allowlist.rs +++ b/tools/depcheck/src/allowlist.rs @@ -140,12 +140,12 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "crlify", "databake", "databake-derive", - "fraction", "elsa", "erased-serde", + "fraction", "icu_codepointtrie_builder", - "itertools", "itoa", + "itertools", "matrixmultiply", "ndarray", "num", From ea7e1bfb45167f3effeebaa36156c0253a54e82e Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 25 Sep 2023 23:59:15 +0200 Subject: [PATCH 047/104] fix : cargo make ci-job-msrv-features-3 --- experimental/unitsconversion/Cargo.toml | 2 +- experimental/unitsconversion/src/provider.rs | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/experimental/unitsconversion/Cargo.toml b/experimental/unitsconversion/Cargo.toml index e260f043882..ebb0f4f7b0e 100644 --- a/experimental/unitsconversion/Cargo.toml +++ b/experimental/unitsconversion/Cargo.toml @@ -25,7 +25,7 @@ displaydoc = { version = "0.2.3", default-features = false } icu_locid = { workspace = true } icu_provider = { workspace = true, features = ["macros"] } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } -zerovec = { workspace = true, features = ["yoke"] } +zerovec = { workspace = true, features = ["yoke", "serde"] } icu_unitsconversion_data = { workspace = true, optional = true } [features] diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 40c2e64c21a..ea04ad6c938 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -73,13 +73,22 @@ pub enum Sign { /// This struct represents a constant value, which is composed of a numerator, denominator, sign, and type. #[zerovec::make_varule(ConstantValueULE)] #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] -#[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[cfg_attr( feature = "datagen", - derive(serde::Serialize, databake::Bake), + derive(databake::Bake), databake(path = icu_unitsconversion::provider), )] -#[zerovec::derive(Serialize, Deserialize, Debug)] +#[cfg_attr( + feature = "datagen", + derive(serde::Serialize), + zerovec::derive(Serialize) +)] +#[cfg_attr( + feature = "serde", + derive(serde::Deserialize), + zerovec::derive(Deserialize) +)] +#[zerovec::derive(Debug)] pub struct ConstantValue<'data> { // TODO(https://github.com/unicode-org/icu4x/issues/4092). /// The numerator of the constant value. From 408435dbee7c332b85eb15077b3ad417e6b47e43 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 09:48:24 +0200 Subject: [PATCH 048/104] Update experimental/unitsconversion/src/provider.rs Co-authored-by: Robert Bastian <4706271+robertbastian@users.noreply.github.com> --- experimental/unitsconversion/src/provider.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index ea04ad6c938..b794ca8aad4 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -38,7 +38,7 @@ pub struct UnitsConstantsV1<'data> { /// This enum is used to represent the type of a constant value. /// It can be either `ConstantType::Actual` or `ConstantType::Approximate`. -/// If the constant type is `ConstantType::Approximate`, it indicates that the value is not definitively accurate. +/// If the constant type is `ConstantType::Approximate`, it indicates that the value is not numerically accurate. #[zerovec::make_ule(ConstantTypeULE)] #[cfg_attr( feature = "datagen", From bcf11b1ae39cd13d0c65e31a0f6e47266ee3dd8e Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 10:22:33 +0200 Subject: [PATCH 049/104] make it more concise. --- .../src/transform/cldr/units/helpers.rs | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 68351f68070..3232fc1a1be 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -50,18 +50,17 @@ pub fn convert_scientific_notation_to_fraction( let exponent = i64::from_str(exponent) .map_err(|_| DataError::custom("the exponent is not a valid number"))?; - let mut result = base; - let generic_ten: GenericFraction = - GenericFraction::new(BigUint::from(10u32), BigUint::from(1u32)); - if exponent > 0 { - for _ in 0..exponent as u32 { - result = result.mul(generic_ten.clone()); - } + let result = if exponent >= 0 { + base.mul(GenericFraction::new( + BigUint::from(10u32).pow(exponent as u32), + BigUint::from(1u32), + )) } else { - for _ in 0..(-exponent) as u32 { - result = result.div(generic_ten.clone()); - } - } + base.div(GenericFraction::new( + BigUint::from(10u32).pow((-exponent) as u32), + BigUint::from(1u32), + )) + }; Ok(result) } From 9b2926a6045897c4d4eba24d1246388a845e7ff1 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 10:26:59 +0200 Subject: [PATCH 050/104] rename a fn --- provider/datagen/src/transform/cldr/units/helpers.rs | 10 +++++----- provider/datagen/src/transform/cldr/units/mod.rs | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 3232fc1a1be..3abfaea97f4 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -240,7 +240,7 @@ fn test_convert_array_of_strings_to_fraction() { /// - "1 * 2 / 3 * ft_to_m" is split into (["1", "2"], ["3" , "ft_to_m"]) /// - "/2" is split into (["1"], ["2"]) /// - "2" is split into (["2"], ["1"]) -pub fn split_constant_string( +pub fn convert_constant_to_num_denom_strings( constant_string: &str, ) -> Result<(Vec, Vec), DataError> { let cleaned_string = remove_whitespace(constant_string); @@ -280,7 +280,7 @@ pub fn split_constant_string( fn test_split_constant_string() { let input = "1/2"; let expected = (vec!["1".to_string()], vec!["2".to_string()]); - let actual = split_constant_string(input).unwrap(); + let actual = convert_constant_to_num_denom_strings(input).unwrap(); assert_eq!(expected, actual); let input = "1 * 2 / 3 * ft_to_m"; @@ -288,16 +288,16 @@ fn test_split_constant_string() { vec!["1".to_string(), "2".to_string()], vec!["3".to_string(), "ft_to_m".to_string()], ); - let actual = split_constant_string(input).unwrap(); + let actual = convert_constant_to_num_denom_strings(input).unwrap(); assert_eq!(expected, actual); let input = "/2"; let expected = (vec!["1".to_string()], vec!["2".to_string()]); - let actual = split_constant_string(input).unwrap(); + let actual = convert_constant_to_num_denom_strings(input).unwrap(); assert_eq!(expected, actual); let input = "2"; let expected = (vec!["2".to_string()], vec!["1".to_string()]); - let actual = split_constant_string(input).unwrap(); + let actual = convert_constant_to_num_denom_strings(input).unwrap(); assert_eq!(expected, actual); } diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index a44e66dbcfd..ae36cab02e2 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -7,8 +7,8 @@ pub mod helpers; use std::collections::BTreeMap; use self::helpers::{ - contains_alphabetic_chars, convert_array_of_strings_to_fraction, remove_whitespace, - split_constant_string, transform_fraction_to_constant_value, + contains_alphabetic_chars, convert_array_of_strings_to_fraction, + convert_constant_to_num_denom_strings, remove_whitespace, transform_fraction_to_constant_value, }; use crate::transform::cldr::{cldr_serde, units::helpers::is_scientific_number}; use icu_provider::{ @@ -34,7 +34,7 @@ impl DataProvider for crate::DatagenProvider { BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); for (cons_name, cons_value) in constants { let value = remove_whitespace(&cons_value.value); - let (num, den) = split_constant_string(&value)?; + let (num, den) = convert_constant_to_num_denom_strings(&value)?; let constant_type = match cons_value.status.as_deref() { Some("approximate") => ConstantType::Approximate, From eea4da088937d61781a4597c96e088af30557a77 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 10:30:28 +0200 Subject: [PATCH 051/104] make the removal more clear --- provider/datagen/src/transform/cldr/units/helpers.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 3abfaea97f4..1bac9a77019 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -243,11 +243,11 @@ fn test_convert_array_of_strings_to_fraction() { pub fn convert_constant_to_num_denom_strings( constant_string: &str, ) -> Result<(Vec, Vec), DataError> { - let cleaned_string = remove_whitespace(constant_string); + let constant_string = remove_whitespace(constant_string); let mut numerator = Vec::::new(); let mut denominator = Vec::::new(); - let mut split = cleaned_string.split('/'); + let mut split = constant_string.split('/'); if split.clone().count() > 2 { return Err(DataError::custom("Invalid constant string")); } From 242fbf0b4cbf562bc6c66735113efefb3c6169c3 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 10:41:46 +0200 Subject: [PATCH 052/104] fix name --- experimental/unitsconversion/src/provider.rs | 4 ++-- .../src/transform/cldr/units/helpers.rs | 6 ++--- .../datagen/src/transform/cldr/units/mod.rs | 24 +++++++++---------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index b794ca8aad4..3456b5032df 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -48,7 +48,7 @@ pub struct UnitsConstantsV1<'data> { #[cfg_attr(feature = "serde", derive(serde::Deserialize))] #[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] #[repr(u8)] -pub enum ConstantType { +pub enum ConstantExactness { #[default] Actual = 0, Approximate = 1, @@ -104,5 +104,5 @@ pub struct ConstantValue<'data> { pub sign: Sign, /// The type of the constant value. - pub constant_type: ConstantType, + pub constant_type: ConstantExactness, } diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 1bac9a77019..6620ad1a856 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -9,7 +9,7 @@ use std::{ use fraction::GenericFraction; use icu_provider::DataError; -use icu_unitsconversion::provider::{ConstantType, Sign}; +use icu_unitsconversion::provider::{ConstantExactness, Sign}; use num_bigint::BigUint; /// Removes all whitespace from a string. @@ -144,8 +144,8 @@ pub fn is_scientific_number(s: &str) -> bool { /// Transforms a fractional number into a constant value. pub fn transform_fraction_to_constant_value( fraction: GenericFraction, - constant_type: ConstantType, -) -> Result<(Vec, Vec, Sign, ConstantType), DataError> { + constant_type: ConstantExactness, +) -> Result<(Vec, Vec, Sign, ConstantExactness), DataError> { let numerator = match fraction.numer() { Some(numerator) => numerator.to_bytes_le(), None => return Err(DataError::custom("the numerator is too large")), diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index ae36cab02e2..3629a2f9b29 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -16,7 +16,7 @@ use icu_provider::{ DataResponse, }; use icu_unitsconversion::provider::{ - ConstantType, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, + ConstantExactness, ConstantValue, UnitsConstantsV1, UnitsConstantsV1Marker, }; use zerovec::{ZeroMap, ZeroVec}; @@ -31,14 +31,14 @@ impl DataProvider for crate::DatagenProvider { let constants = &units_data.supplemental.unit_constants.constants; let mut constants_map_in_str_form = - BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); + BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); for (cons_name, cons_value) in constants { let value = remove_whitespace(&cons_value.value); let (num, den) = convert_constant_to_num_denom_strings(&value)?; let constant_type = match cons_value.status.as_deref() { - Some("approximate") => ConstantType::Approximate, - _ => ConstantType::Actual, + Some("approximate") => ConstantExactness::Approximate, + _ => ConstantExactness::Actual, }; constants_map_in_str_form.insert(cons_name, (num, den, constant_type)); @@ -49,7 +49,7 @@ impl DataProvider for crate::DatagenProvider { loop { num_of_const_with_text = 0; let mut constants_with_constants_map_replaceable = - BTreeMap::<&str, (Vec, Vec, ConstantType)>::new(); + BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { let mut temp_num = num.clone(); let mut temp_den = den.clone(); @@ -71,8 +71,8 @@ impl DataProvider for crate::DatagenProvider { temp_num.append(&mut rnum.clone()); temp_den.append(&mut rden.clone()); - if *rconstant_type == ConstantType::Approximate { - temp_constant_type = ConstantType::Approximate; + if *rconstant_type == ConstantExactness::Approximate { + temp_constant_type = ConstantExactness::Approximate; } } } @@ -93,8 +93,8 @@ impl DataProvider for crate::DatagenProvider { temp_num.append(&mut rden.clone()); temp_den.append(&mut rnum.clone()); - if *rconstant_type == ConstantType::Approximate { - temp_constant_type = ConstantType::Approximate; + if *rconstant_type == ConstantExactness::Approximate { + temp_constant_type = ConstantExactness::Approximate; } } } @@ -178,7 +178,7 @@ fn test_basic() { numerator: expected_ft_to_m.numer().unwrap().to_le_bytes().into(), denominator: expected_ft_to_m.denom().unwrap().to_le_bytes().into(), sign: Sign::Positive, - constant_type: ConstantType::Actual, + constant_type: ConstantExactness::Actual, }) .as_ref() ); @@ -195,7 +195,7 @@ fn test_basic() { numerator: expected_ft2_to_m2.numer().unwrap().to_le_bytes().into(), denominator: expected_ft2_to_m2.denom().unwrap().to_le_bytes().into(), sign: Sign::Positive, - constant_type: ConstantType::Actual, + constant_type: ConstantExactness::Actual, }) .as_ref() ); @@ -216,7 +216,7 @@ fn test_basic() { numerator: expected_ft3_to_m3.numer().unwrap().to_le_bytes().into(), denominator: expected_ft3_to_m3.denom().unwrap().to_le_bytes().into(), sign: Sign::Positive, - constant_type: ConstantType::Actual, + constant_type: ConstantExactness::Actual, }) .as_ref() ); From 9c279424f42c2f8b2e3141e2b01175604dc334e8 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 10:59:00 +0200 Subject: [PATCH 053/104] fix comments --- experimental/unitsconversion/src/provider.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 3456b5032df..fcae7a6a435 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -39,7 +39,7 @@ pub struct UnitsConstantsV1<'data> { /// This enum is used to represent the type of a constant value. /// It can be either `ConstantType::Actual` or `ConstantType::Approximate`. /// If the constant type is `ConstantType::Approximate`, it indicates that the value is not numerically accurate. -#[zerovec::make_ule(ConstantTypeULE)] +#[zerovec::make_ule(ConstantExactnessULE)] #[cfg_attr( feature = "datagen", derive(serde::Serialize, databake::Bake), @@ -91,18 +91,18 @@ pub enum Sign { #[zerovec::derive(Debug)] pub struct ConstantValue<'data> { // TODO(https://github.com/unicode-org/icu4x/issues/4092). - /// The numerator of the constant value. + /// The numerator of the constant value in bytes starting with the least significant byte. #[cfg_attr(feature = "serde", serde(borrow))] pub numerator: ZeroVec<'data, u8>, // TODO(https://github.com/unicode-org/icu4x/issues/4092). - /// The denominator of the constant value. + /// The denominator of the constant value in bytes starting with the least significant byte. #[cfg_attr(feature = "serde", serde(borrow))] pub denominator: ZeroVec<'data, u8>, - /// The sign of the constant value. + /// Determines whether the constant value is positive or negative. pub sign: Sign, - /// The type of the constant value. + /// Determines whether the constant value is actual or approximate. pub constant_type: ConstantExactness, } From a45cd8ca342d5479f60f27cc1fe5a2bac5079e64 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 11:04:50 +0200 Subject: [PATCH 054/104] adjust function --- provider/datagen/src/transform/cldr/units/helpers.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 6620ad1a856..d40496836c9 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -244,8 +244,6 @@ pub fn convert_constant_to_num_denom_strings( constant_string: &str, ) -> Result<(Vec, Vec), DataError> { let constant_string = remove_whitespace(constant_string); - let mut numerator = Vec::::new(); - let mut denominator = Vec::::new(); let mut split = constant_string.split('/'); if split.clone().count() > 2 { @@ -270,10 +268,7 @@ pub fn convert_constant_to_num_denom_strings( .collect() }; - numerator.extend(numerator_values); - denominator.extend(denominator_values); - - Ok((numerator, denominator)) + Ok((numerator_values, denominator_values)) } // TODO: move this to the comment above. #[test] From e339d6341107a245c550a0050c776d9462fcadbf Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 11:05:06 +0200 Subject: [PATCH 055/104] fix loop breaker --- provider/datagen/src/transform/cldr/units/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 3629a2f9b29..af2692cbd74 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -47,7 +47,7 @@ impl DataProvider for crate::DatagenProvider { // This loop iterates over the constants, replacing any string values with their corresponding constant values. let mut num_of_const_with_text; loop { - num_of_const_with_text = 0; + num_of_const_with_text = false; let mut constants_with_constants_map_replaceable = BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { @@ -62,7 +62,7 @@ impl DataProvider for crate::DatagenProvider { continue; } - num_of_const_with_text += 1; + num_of_const_with_text = true; if let Some((rnum, rden, rconstant_type)) = constants_map_in_str_form.get(temp_num[i].as_str()) { @@ -84,7 +84,7 @@ impl DataProvider for crate::DatagenProvider { continue; } - num_of_const_with_text += 1; + num_of_const_with_text = true; if let Some((rnum, rden, rconstant_type)) = constants_map_in_str_form.get(temp_den[i].as_str()) { @@ -106,7 +106,7 @@ impl DataProvider for crate::DatagenProvider { constants_map_in_str_form.clear(); constants_map_in_str_form = constants_with_constants_map_replaceable; - if num_of_const_with_text == 0 { + if num_of_const_with_text == false { break; } } From 20fac57d641f85109ba08b14f505e66fcde193d4 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 11:13:32 +0200 Subject: [PATCH 056/104] make the code concise --- provider/datagen/src/transform/cldr/units/mod.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index af2692cbd74..f47f858bffa 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -68,8 +68,8 @@ impl DataProvider for crate::DatagenProvider { { temp_num.remove(i); // append the elements in rnum to num and rden to den - temp_num.append(&mut rnum.clone()); - temp_den.append(&mut rden.clone()); + temp_num.extend(rnum.clone().into_iter()); + temp_den.extend(rden.clone().into_iter()); if *rconstant_type == ConstantExactness::Approximate { temp_constant_type = ConstantExactness::Approximate; @@ -90,8 +90,8 @@ impl DataProvider for crate::DatagenProvider { { temp_den.remove(i); // append the elements in rnum to den and rden to num - temp_num.append(&mut rden.clone()); - temp_den.append(&mut rnum.clone()); + temp_num.extend(rden.clone().into_iter()); + temp_den.extend(rnum.clone().into_iter()); if *rconstant_type == ConstantExactness::Approximate { temp_constant_type = ConstantExactness::Approximate; @@ -103,7 +103,6 @@ impl DataProvider for crate::DatagenProvider { .insert(cons_name, (temp_num, temp_den, temp_constant_type)); } - constants_map_in_str_form.clear(); constants_map_in_str_form = constants_with_constants_map_replaceable; if num_of_const_with_text == false { From 1860cbd57f59c6a7c09d95bd3ed324c02e618b99 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 11:20:52 +0200 Subject: [PATCH 057/104] make test concise --- provider/datagen/src/transform/cldr/units/mod.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index f47f858bffa..743f55d2de2 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -154,7 +154,6 @@ fn test_basic() { use icu_unitsconversion::provider::*; use num_bigint::BigUint; use num_traits::ToBytes; - use std::ops::Mul; let provider = crate::DatagenProvider::new_testing(); @@ -184,8 +183,8 @@ fn test_basic() { let ft2_to_m2 = constants.get("ft2_to_m2").unwrap(); let expected_ft2_to_m2 = GenericFraction::::new( - BigUint::from(3048u32).mul(&BigUint::from(3048u32)), - BigUint::from(10000u32).mul(&BigUint::from(10000u32)), + BigUint::from(3048u32).pow(2), + BigUint::from(10000u32).pow(2), ); assert_eq!( @@ -201,12 +200,8 @@ fn test_basic() { let ft3_to_m3 = constants.get("ft3_to_m3").unwrap(); let expected_ft3_to_m3 = GenericFraction::::new( - BigUint::from(3048u32) - .mul(&BigUint::from(3048u32)) - .mul(&BigUint::from(3048u32)), - BigUint::from(10000u32) - .mul(&BigUint::from(10000u32)) - .mul(&BigUint::from(10000u32)), + BigUint::from(3048u32).pow(3), + BigUint::from(10000u32).pow(3), ); assert_eq!( From 4a1096743b0f70c09c40e3d3808ea96b6bb5730c Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 11:25:33 +0200 Subject: [PATCH 058/104] fix clippy --- provider/datagen/src/transform/cldr/units/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 743f55d2de2..968f3088f4c 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -105,7 +105,7 @@ impl DataProvider for crate::DatagenProvider { constants_map_in_str_form = constants_with_constants_map_replaceable; - if num_of_const_with_text == false { + if !num_of_const_with_text { break; } } From 940909e0adfae2cb467a00c10781e8681d51d7b4 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 11:27:23 +0200 Subject: [PATCH 059/104] fix naming --- provider/datagen/src/transform/cldr/units/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 968f3088f4c..1efe9083a2b 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -45,9 +45,9 @@ impl DataProvider for crate::DatagenProvider { } // This loop iterates over the constants, replacing any string values with their corresponding constant values. - let mut num_of_const_with_text; + let mut has_internal_constants; loop { - num_of_const_with_text = false; + has_internal_constants = false; let mut constants_with_constants_map_replaceable = BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { @@ -62,7 +62,7 @@ impl DataProvider for crate::DatagenProvider { continue; } - num_of_const_with_text = true; + has_internal_constants = true; if let Some((rnum, rden, rconstant_type)) = constants_map_in_str_form.get(temp_num[i].as_str()) { @@ -84,7 +84,7 @@ impl DataProvider for crate::DatagenProvider { continue; } - num_of_const_with_text = true; + has_internal_constants = true; if let Some((rnum, rden, rconstant_type)) = constants_map_in_str_form.get(temp_den[i].as_str()) { @@ -105,7 +105,7 @@ impl DataProvider for crate::DatagenProvider { constants_map_in_str_form = constants_with_constants_map_replaceable; - if !num_of_const_with_text { + if !has_internal_constants { break; } } From 8248b96a313794884b760108710edbcd948816fc Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 21:25:08 +0200 Subject: [PATCH 060/104] fix convert_constant_to_num_denom_strings --- .../src/transform/cldr/units/helpers.rs | 41 ++++++++++++++++--- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index d40496836c9..a27250658e3 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -240,11 +240,11 @@ fn test_convert_array_of_strings_to_fraction() { /// - "1 * 2 / 3 * ft_to_m" is split into (["1", "2"], ["3" , "ft_to_m"]) /// - "/2" is split into (["1"], ["2"]) /// - "2" is split into (["2"], ["1"]) +/// - "1E2" is split into (["1E2"], ["1"]) +/// - "1 2 * 3" is an invalid constant string pub fn convert_constant_to_num_denom_strings( constant_string: &str, ) -> Result<(Vec, Vec), DataError> { - let constant_string = remove_whitespace(constant_string); - let mut split = constant_string.split('/'); if split.clone().count() > 2 { return Err(DataError::custom("Invalid constant string")); @@ -253,10 +253,20 @@ pub fn convert_constant_to_num_denom_strings( let numerator_string = split.next().unwrap_or("1"); let denominator_string = split.next().unwrap_or("1"); + let mut has_whitespace_within = false; let numerator_values = if numerator_string.is_empty() { vec!["1".to_string()] } else { - numerator_string.split('*').map(|s| s.to_string()).collect() + numerator_string + .split('*') + .map(|s| { + let s = s.trim(); + if s.chars().any(char::is_whitespace) { + has_whitespace_within = true; + } + s.to_string() + }) + .collect() }; let denominator_values = if denominator_string.is_empty() { @@ -264,10 +274,22 @@ pub fn convert_constant_to_num_denom_strings( } else { denominator_string .split('*') - .map(|s| s.to_string()) - .collect() + .map(|s| { + let s = s.trim(); + if s.chars().any(char::is_whitespace) { + has_whitespace_within = true; + } + s.to_string() + }) + .collect::>() }; + if has_whitespace_within { + return Err(DataError::custom( + "The constant string contains internal white spaces", + )); + } + Ok((numerator_values, denominator_values)) } // TODO: move this to the comment above. @@ -295,4 +317,13 @@ fn test_split_constant_string() { let expected = (vec!["2".to_string()], vec!["1".to_string()]); let actual = convert_constant_to_num_denom_strings(input).unwrap(); assert_eq!(expected, actual); + + let input = "1E2"; + let expected = (vec!["1E2".to_string()], vec!["1".to_string()]); + let actual = convert_constant_to_num_denom_strings(input).unwrap(); + assert_eq!(expected, actual); + + let input = "1 2 * 3"; + let actual = convert_constant_to_num_denom_strings(input); + assert!(actual.is_err()); } From e965ce53a2ab01afa8a722392a111276824084b0 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 22:17:18 +0200 Subject: [PATCH 061/104] use BigRational --- Cargo.lock | 2 + provider/datagen/Cargo.toml | 1 + .../src/transform/cldr/units/helpers.rs | 97 ++++++++++--------- 3 files changed, 52 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50f5a61335a..6000129680e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1457,6 +1457,7 @@ dependencies = [ "memchr", "ndarray", "num-bigint", + "num-rational", "num-traits", "once_cell", "postcard", @@ -2323,6 +2324,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", + "num-bigint", "num-integer", "num-traits", ] diff --git a/provider/datagen/Cargo.toml b/provider/datagen/Cargo.toml index 297dd245777..2e56782b02e 100644 --- a/provider/datagen/Cargo.toml +++ b/provider/datagen/Cargo.toml @@ -92,6 +92,7 @@ ureq = { version = "2", optional = true } fraction = { version = "0.13.1", default-features = false } num-bigint = { version = "0.4.4", default-features = false } num-traits = { version = "0.2.14", default-features = false } +num-rational = { version = "0.4.1", features = ["num-bigint"], default-features = false } # Dependencies for "bin" feature clap = { version = "4", optional = true, features = ["derive"] } diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index a27250658e3..4177919172e 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -2,15 +2,12 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use std::{ - ops::{Div, Mul}, - str::FromStr, -}; +use core::str::FromStr; -use fraction::GenericFraction; use icu_provider::DataError; use icu_unitsconversion::provider::{ConstantExactness, Sign}; -use num_bigint::BigUint; +use num_bigint::BigInt; +use num_rational::BigRational; /// Removes all whitespace from a string. pub fn remove_whitespace(s: &str) -> String { @@ -26,6 +23,22 @@ fn test_remove_whitespace() { assert_eq!(expected, actual); } +fn convert_decimal_to_bigrational(decimal: &str) -> Result { + let parts: Vec<&str> = decimal.split('.').collect(); + match parts.len() { + 1 => BigRational::from_str(parts[0]) + .map_err(|_| DataError::custom("the integer-part is not a valid number")), + 2 => { + let numerator = BigInt::from_str(parts.join("").as_str()).map_err(|_| { + DataError::custom("the integer-part and fractional-part are not a valid number") + })?; + let denominator = BigInt::from(10u32).pow(parts[1].len() as u32); + Ok(BigRational::new(numerator, denominator)) + } + _ => Err(DataError::custom("the base is not a valid number")), + } +} + /// Converts a scientific notation number represented as a string into a GenericFraction. /// Examples: /// - "1E2" is converted to 100 @@ -33,9 +46,7 @@ fn test_remove_whitespace() { /// - "1.5E2" is converted to 150 /// - "1.5E-2" is converted to 15/1000 /// - "1.5E-2.5" is an invalid scientific notation number -pub fn convert_scientific_notation_to_fraction( - number: &str, -) -> Result, DataError> { +pub fn convert_scientific_notation_to_fraction(number: &str) -> Result { let number = remove_whitespace(number); let parts: Vec<&str> = number.split('E').collect(); if parts.len() > 2 { @@ -43,48 +54,38 @@ pub fn convert_scientific_notation_to_fraction( "the number is not a scientific notation number", )); } - let base = parts.first().unwrap_or(&"0"); + let base = parts.first().unwrap_or(&"1"); let exponent = parts.get(1).unwrap_or(&"0"); - let base: GenericFraction = GenericFraction::from_str(base) - .map_err(|_| DataError::custom("the number is not a valid number"))?; - let exponent = i64::from_str(exponent) - .map_err(|_| DataError::custom("the exponent is not a valid number"))?; - let result = if exponent >= 0 { - base.mul(GenericFraction::new( - BigUint::from(10u32).pow(exponent as u32), - BigUint::from(1u32), - )) - } else { - base.div(GenericFraction::new( - BigUint::from(10u32).pow((-exponent) as u32), - BigUint::from(1u32), - )) - }; + let ten = BigRational::from(BigInt::from(10u32)); + let base = convert_decimal_to_bigrational(base) + .map_err(|_| DataError::custom("the base is not a valid number"))?; + let exponent = i32::from_str(exponent) + .map_err(|_| DataError::custom("the exponent is not a valid number"))?; - Ok(result) + Ok(base * ten.pow(exponent)) } // TODO: move this to the comment above. #[test] fn test_convert_scientific_notation_to_fraction() { let input = "1E2"; - let expected = GenericFraction::new(BigUint::from(100u32), BigUint::from(1u32)); + let expected = BigRational::new(BigInt::from(100u32), BigInt::from(1u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); let input = "1E-2"; - let expected = GenericFraction::new(BigUint::from(1u32), BigUint::from(100u32)); + let expected = BigRational::new(BigInt::from(1u32), BigInt::from(100u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); let input = "1.5E2"; - let expected = GenericFraction::new(BigUint::from(150u32), BigUint::from(1u32)); + let expected = BigRational::new(BigInt::from(150u32), BigInt::from(1u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); let input = "1.5E-2"; - let expected = GenericFraction::new(BigUint::from(15u32), BigUint::from(1000u32)); + let expected = BigRational::new(BigInt::from(15u32), BigInt::from(1000u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); @@ -143,25 +144,25 @@ pub fn is_scientific_number(s: &str) -> bool { /// Transforms a fractional number into a constant value. pub fn transform_fraction_to_constant_value( - fraction: GenericFraction, + fraction: BigRational, constant_type: ConstantExactness, ) -> Result<(Vec, Vec, Sign, ConstantExactness), DataError> { - let numerator = match fraction.numer() { + let numerator = match fraction.numer().to_biguint() { Some(numerator) => numerator.to_bytes_le(), None => return Err(DataError::custom("the numerator is too large")), }; - let denominator = match fraction.denom() { + let denominator = match fraction.denom().to_biguint() { Some(denominator) => denominator.to_bytes_le(), None => return Err(DataError::custom("the denominator is too large")), }; - let sign = match fraction.sign() { - Some(sign) => match sign { - fraction::Sign::Plus => Sign::Positive, - fraction::Sign::Minus => Sign::Negative, - }, - None => return Err(DataError::custom("the sign is not valid")), + let sign = match fraction.numer().sign() { + num_bigint::Sign::Plus => Sign::Positive, + num_bigint::Sign::Minus => Sign::Negative, + num_bigint::Sign::NoSign => { + return Err(DataError::custom("the numerator is zero")); + } }; Ok((numerator, denominator, sign, constant_type)) @@ -178,17 +179,17 @@ pub fn transform_fraction_to_constant_value( pub fn convert_array_of_strings_to_fraction( numerator_strings: &[String], denominator_strings: &[String], -) -> Result, DataError> { - let mut fraction = GenericFraction::new(BigUint::from(1u32), BigUint::from(1u32)); +) -> Result { + let mut fraction = BigRational::new(BigInt::from(1u32), BigInt::from(1u32)); for numerator in numerator_strings { let num_fraction = convert_scientific_notation_to_fraction(numerator)?; - fraction = fraction.mul(num_fraction); + fraction = fraction * num_fraction; } for denominator in denominator_strings { let den_fraction = convert_scientific_notation_to_fraction(denominator)?; - fraction = fraction.div(den_fraction); + fraction = fraction / den_fraction; } Ok(fraction) @@ -199,19 +200,19 @@ pub fn convert_array_of_strings_to_fraction( fn test_convert_array_of_strings_to_fraction() { let numerator = vec!["1".to_string()]; let denominator = vec!["2".to_string()]; - let expected = GenericFraction::new(BigUint::from(1u32), BigUint::from(2u32)); + let expected = BigRational::new(BigInt::from(1u32), BigInt::from(2u32)); let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E2".to_string()]; - let expected = GenericFraction::new(BigUint::from(2u32), BigUint::from(300u32)); + let expected = BigRational::new(BigInt::from(2u32), BigInt::from(300u32)); let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E-2".to_string()]; - let expected = GenericFraction::new(BigUint::from(200u32), BigUint::from(3u32)); + let expected = BigRational::new(BigInt::from(200u32), BigInt::from(3u32)); let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); @@ -222,13 +223,13 @@ fn test_convert_array_of_strings_to_fraction() { let numerator = vec!["1E2".to_string()]; let denominator = vec!["2".to_string()]; - let expected = GenericFraction::new(BigUint::from(50u32), BigUint::from(1u32)); + let expected = BigRational::new(BigInt::from(50u32), BigInt::from(1u32)); let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1E2".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E2".to_string()]; - let expected = GenericFraction::new(BigUint::from(2u32), BigUint::from(3u32)); + let expected = BigRational::new(BigInt::from(2u32), BigInt::from(3u32)); let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); } From ac9daf5cdbd2d2d32b4b69b4a4c6642b83951e9f Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 22:19:41 +0200 Subject: [PATCH 062/104] fix fn name --- .../datagen/src/transform/cldr/units/helpers.rs | 16 ++++++++-------- provider/datagen/src/transform/cldr/units/mod.rs | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 4177919172e..595713b290a 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -168,7 +168,7 @@ pub fn transform_fraction_to_constant_value( Ok((numerator, denominator, sign, constant_type)) } -/// Converts vectors of numerator and denominator strings to a fraction. +/// Converts slices of numerator and denominator strings to a fraction. /// Examples: /// - ["1"], ["2"] is converted to 1/2 /// - ["1", "2"], ["3", "1E2"] is converted to 1*2/(3*1E2) --> 2/300 @@ -176,7 +176,7 @@ pub fn transform_fraction_to_constant_value( /// - ["1", "2"], ["3", "1E-2.5"] is an invalid scientific notation number /// - ["1E2"], ["2"] is converted to 1E2/2 --> 100/2 --> 50/1 /// - ["1E2", "2"], ["3", "1E2"] is converted to 1E2*2/(3*1E2) --> 2/3 -pub fn convert_array_of_strings_to_fraction( +pub fn convert_slices_to_fraction( numerator_strings: &[String], denominator_strings: &[String], ) -> Result { @@ -201,36 +201,36 @@ fn test_convert_array_of_strings_to_fraction() { let numerator = vec!["1".to_string()]; let denominator = vec!["2".to_string()]; let expected = BigRational::new(BigInt::from(1u32), BigInt::from(2u32)); - let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E2".to_string()]; let expected = BigRational::new(BigInt::from(2u32), BigInt::from(300u32)); - let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E-2".to_string()]; let expected = BigRational::new(BigInt::from(200u32), BigInt::from(3u32)); - let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E-2.5".to_string()]; - let actual = convert_array_of_strings_to_fraction(&numerator, &denominator); + let actual = convert_slices_to_fraction(&numerator, &denominator); assert!(actual.is_err()); let numerator = vec!["1E2".to_string()]; let denominator = vec!["2".to_string()]; let expected = BigRational::new(BigInt::from(50u32), BigInt::from(1u32)); - let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1E2".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E2".to_string()]; let expected = BigRational::new(BigInt::from(2u32), BigInt::from(3u32)); - let actual = convert_array_of_strings_to_fraction(&numerator, &denominator).unwrap(); + let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); } diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 1efe9083a2b..f685c58cabb 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -7,8 +7,8 @@ pub mod helpers; use std::collections::BTreeMap; use self::helpers::{ - contains_alphabetic_chars, convert_array_of_strings_to_fraction, - convert_constant_to_num_denom_strings, remove_whitespace, transform_fraction_to_constant_value, + contains_alphabetic_chars, convert_constant_to_num_denom_strings, convert_slices_to_fraction, + remove_whitespace, transform_fraction_to_constant_value, }; use crate::transform::cldr::{cldr_serde, units::helpers::is_scientific_number}; use icu_provider::{ @@ -113,7 +113,7 @@ impl DataProvider for crate::DatagenProvider { let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { - let value = convert_array_of_strings_to_fraction(num, den)?; + let value = convert_slices_to_fraction(num, den)?; let (num, den, sign, cons_type) = transform_fraction_to_constant_value(value, *constant_type)?; constants_map.insert( From 9af5b98dff0ae79d406ce4f168645a406955e3ad Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 22:20:23 +0200 Subject: [PATCH 063/104] rename --- provider/datagen/src/transform/cldr/units/helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 595713b290a..ce89b578fab 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -295,7 +295,7 @@ pub fn convert_constant_to_num_denom_strings( } // TODO: move this to the comment above. #[test] -fn test_split_constant_string() { +fn test_convert_constant_to_num_denom_strings() { let input = "1/2"; let expected = (vec!["1".to_string()], vec!["2".to_string()]); let actual = convert_constant_to_num_denom_strings(input).unwrap(); From 37bae977ff960f43fe36caa904cfc974b575203b Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 22:22:46 +0200 Subject: [PATCH 064/104] fix --- provider/datagen/src/transform/cldr/units/mod.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index f685c58cabb..049b62334d7 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -8,7 +8,7 @@ use std::collections::BTreeMap; use self::helpers::{ contains_alphabetic_chars, convert_constant_to_num_denom_strings, convert_slices_to_fraction, - remove_whitespace, transform_fraction_to_constant_value, + transform_fraction_to_constant_value, }; use crate::transform::cldr::{cldr_serde, units::helpers::is_scientific_number}; use icu_provider::{ @@ -33,8 +33,7 @@ impl DataProvider for crate::DatagenProvider { let mut constants_map_in_str_form = BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); for (cons_name, cons_value) in constants { - let value = remove_whitespace(&cons_value.value); - let (num, den) = convert_constant_to_num_denom_strings(&value)?; + let (num, den) = convert_constant_to_num_denom_strings(&cons_value.value)?; let constant_type = match cons_value.status.as_deref() { Some("approximate") => ConstantExactness::Approximate, From a107b75c0445b37696790dfa814fea0edf9765ad Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 22:28:21 +0200 Subject: [PATCH 065/104] fix the maximum depth --- provider/datagen/src/transform/cldr/units/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 049b62334d7..5535da43750 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -44,8 +44,12 @@ impl DataProvider for crate::DatagenProvider { } // This loop iterates over the constants, replacing any string values with their corresponding constant values. + // For example, if the constant "ft_to_m" has the value "0.3048", and the constant "ft2_to_m2" has the value "ft_to_m * ft_to_m", + // the maximum depth represents the maximum number of nested constants that can be replaced. + // If CLDR added more constants that are defined in terms of other constants, the maximum depth should be increased. + let maximum_depth = 10; let mut has_internal_constants; - loop { + for _ in 0..maximum_depth { has_internal_constants = false; let mut constants_with_constants_map_replaceable = BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); From 9483c8fb80924a1f13887d2179e0259156dd5464 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 22:48:34 +0200 Subject: [PATCH 066/104] fix --- experimental/unitsconversion/src/provider.rs | 5 +- .../datagen/src/transform/cldr/units/mod.rs | 47 ++++++++++--------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index fcae7a6a435..d3cd1865cd8 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -70,7 +70,8 @@ pub enum Sign { Negative = 1, } -/// This struct represents a constant value, which is composed of a numerator, denominator, sign, and type. +// TODO(#4098): Improve the ULE representation. Consider using a single byte for sign and type representation. +/// This struct encapsulates a constant value, comprising a numerator, denominator, sign, and type. #[zerovec::make_varule(ConstantValueULE)] #[derive(Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Default)] #[cfg_attr( @@ -104,5 +105,5 @@ pub struct ConstantValue<'data> { pub sign: Sign, /// Determines whether the constant value is actual or approximate. - pub constant_type: ConstantExactness, + pub constant_exactness: ConstantExactness, } diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 5535da43750..917d98e97f2 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -113,27 +113,32 @@ impl DataProvider for crate::DatagenProvider { } } - let mut constants_map = BTreeMap::<&str, ConstantValue>::new(); - - for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { - let value = convert_slices_to_fraction(num, den)?; - let (num, den, sign, cons_type) = - transform_fraction_to_constant_value(value, *constant_type)?; - constants_map.insert( - cons_name, - ConstantValue { - numerator: ZeroVec::from_iter(num), - denominator: ZeroVec::from_iter(den), - sign, - constant_type: cons_type, - }, - ); - } let constants_map = ZeroMap::from_iter( - constants_map + constants_map_in_str_form .into_iter() - .map(|(k, v)| (k, zerovec::ule::encode_varule_to_box(&v))), + .map(|(cons_name, (num, den, constant_type))| { + let value = match convert_slices_to_fraction(&num, &den) { + Ok(value) => value, + Err(e) => return Err(e), + }; + let (num, den, sign, cons_type) = + match transform_fraction_to_constant_value(value, constant_type) { + Ok(value) => value, + Err(e) => return Err(e), + }; + Ok(( + cons_name, + zerovec::ule::encode_varule_to_box(&ConstantValue { + numerator: ZeroVec::from_iter(num), + denominator: ZeroVec::from_iter(den), + sign, + constant_exactness: cons_type, + }), + )) + }) + .collect::, _>>()?, ); + let result = UnitsConstantsV1 { constants_map }; Ok(DataResponse { @@ -179,7 +184,7 @@ fn test_basic() { numerator: expected_ft_to_m.numer().unwrap().to_le_bytes().into(), denominator: expected_ft_to_m.denom().unwrap().to_le_bytes().into(), sign: Sign::Positive, - constant_type: ConstantExactness::Actual, + constant_exactness: ConstantExactness::Actual, }) .as_ref() ); @@ -196,7 +201,7 @@ fn test_basic() { numerator: expected_ft2_to_m2.numer().unwrap().to_le_bytes().into(), denominator: expected_ft2_to_m2.denom().unwrap().to_le_bytes().into(), sign: Sign::Positive, - constant_type: ConstantExactness::Actual, + constant_exactness: ConstantExactness::Actual, }) .as_ref() ); @@ -213,7 +218,7 @@ fn test_basic() { numerator: expected_ft3_to_m3.numer().unwrap().to_le_bytes().into(), denominator: expected_ft3_to_m3.denom().unwrap().to_le_bytes().into(), sign: Sign::Positive, - constant_type: ConstantExactness::Actual, + constant_exactness: ConstantExactness::Actual, }) .as_ref() ); From d1b022c692e986b7d123450edb4cefc158b226b9 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 22:50:26 +0200 Subject: [PATCH 067/104] fix constant name --- .../src/transform/cldr/units/helpers.rs | 4 +-- .../datagen/src/transform/cldr/units/mod.rs | 26 ++++++++-------- .../data/json/units/constants@1/und.json | 30 +++++++++---------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index ce89b578fab..f15e4bd0f78 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -145,7 +145,7 @@ pub fn is_scientific_number(s: &str) -> bool { /// Transforms a fractional number into a constant value. pub fn transform_fraction_to_constant_value( fraction: BigRational, - constant_type: ConstantExactness, + constant_exactness: ConstantExactness, ) -> Result<(Vec, Vec, Sign, ConstantExactness), DataError> { let numerator = match fraction.numer().to_biguint() { Some(numerator) => numerator.to_bytes_le(), @@ -165,7 +165,7 @@ pub fn transform_fraction_to_constant_value( } }; - Ok((numerator, denominator, sign, constant_type)) + Ok((numerator, denominator, sign, constant_exactness)) } /// Converts slices of numerator and denominator strings to a fraction. diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 917d98e97f2..d6c2796ab5a 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -35,12 +35,12 @@ impl DataProvider for crate::DatagenProvider { for (cons_name, cons_value) in constants { let (num, den) = convert_constant_to_num_denom_strings(&cons_value.value)?; - let constant_type = match cons_value.status.as_deref() { + let constant_exactness = match cons_value.status.as_deref() { Some("approximate") => ConstantExactness::Approximate, _ => ConstantExactness::Actual, }; - constants_map_in_str_form.insert(cons_name, (num, den, constant_type)); + constants_map_in_str_form.insert(cons_name, (num, den, constant_exactness)); } // This loop iterates over the constants, replacing any string values with their corresponding constant values. @@ -53,10 +53,10 @@ impl DataProvider for crate::DatagenProvider { has_internal_constants = false; let mut constants_with_constants_map_replaceable = BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); - for (cons_name, (num, den, constant_type)) in constants_map_in_str_form.iter() { + for (cons_name, (num, den, constant_exactness)) in constants_map_in_str_form.iter() { let mut temp_num = num.clone(); let mut temp_den = den.clone(); - let mut temp_constant_type = *constant_type; + let mut temp_constant_exactness = *constant_exactness; for i in 0..temp_num.len() { if !contains_alphabetic_chars(temp_num[i].as_str()) @@ -66,7 +66,7 @@ impl DataProvider for crate::DatagenProvider { } has_internal_constants = true; - if let Some((rnum, rden, rconstant_type)) = + if let Some((rnum, rden, rconstant_exactness)) = constants_map_in_str_form.get(temp_num[i].as_str()) { temp_num.remove(i); @@ -74,8 +74,8 @@ impl DataProvider for crate::DatagenProvider { temp_num.extend(rnum.clone().into_iter()); temp_den.extend(rden.clone().into_iter()); - if *rconstant_type == ConstantExactness::Approximate { - temp_constant_type = ConstantExactness::Approximate; + if *rconstant_exactness == ConstantExactness::Approximate { + temp_constant_exactness = ConstantExactness::Approximate; } } } @@ -88,7 +88,7 @@ impl DataProvider for crate::DatagenProvider { } has_internal_constants = true; - if let Some((rnum, rden, rconstant_type)) = + if let Some((rnum, rden, rconstant_exactness)) = constants_map_in_str_form.get(temp_den[i].as_str()) { temp_den.remove(i); @@ -96,14 +96,14 @@ impl DataProvider for crate::DatagenProvider { temp_num.extend(rden.clone().into_iter()); temp_den.extend(rnum.clone().into_iter()); - if *rconstant_type == ConstantExactness::Approximate { - temp_constant_type = ConstantExactness::Approximate; + if *rconstant_exactness == ConstantExactness::Approximate { + temp_constant_exactness = ConstantExactness::Approximate; } } } constants_with_constants_map_replaceable - .insert(cons_name, (temp_num, temp_den, temp_constant_type)); + .insert(cons_name, (temp_num, temp_den, temp_constant_exactness)); } constants_map_in_str_form = constants_with_constants_map_replaceable; @@ -116,13 +116,13 @@ impl DataProvider for crate::DatagenProvider { let constants_map = ZeroMap::from_iter( constants_map_in_str_form .into_iter() - .map(|(cons_name, (num, den, constant_type))| { + .map(|(cons_name, (num, den, constant_exactness))| { let value = match convert_slices_to_fraction(&num, &den) { Ok(value) => value, Err(e) => return Err(e), }; let (num, den, sign, cons_type) = - match transform_fraction_to_constant_value(value, constant_type) { + match transform_fraction_to_constant_value(value, constant_exactness) { Ok(value) => value, Err(e) => return Err(e), }; diff --git a/provider/datagen/tests/data/json/units/constants@1/und.json b/provider/datagen/tests/data/json/units/constants@1/und.json index acd36827e8f..510cfc36220 100644 --- a/provider/datagen/tests/data/json/units/constants@1/und.json +++ b/provider/datagen/tests/data/json/units/constants@1/und.json @@ -15,7 +15,7 @@ 2 ], "sign": "Positive", - "constant_type": "Approximate" + "constant_exactness": "Approximate" }, "PI": { "numerator": [ @@ -31,7 +31,7 @@ 7 ], "sign": "Positive", - "constant_type": "Approximate" + "constant_exactness": "Approximate" }, "ft2_to_m2": { "numerator": [ @@ -45,7 +45,7 @@ 23 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "ft3_to_m3": { "numerator": [ @@ -61,7 +61,7 @@ 116 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "ft_to_m": { "numerator": [ @@ -73,7 +73,7 @@ 4 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "gal_imp_to_m3": { "numerator": [ @@ -88,7 +88,7 @@ 5 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "gal_to_m3": { "numerator": [ @@ -105,7 +105,7 @@ 29 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "glucose_molar_mass": { "numerator": [ @@ -118,7 +118,7 @@ 39 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "gravity": { "numerator": [ @@ -131,7 +131,7 @@ 78 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "in3_to_m3": { "numerator": [ @@ -147,7 +147,7 @@ 29 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "item_per_mole": { "numerator": [ @@ -166,7 +166,7 @@ 1 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "lb_to_kg": { "numerator": [ @@ -182,7 +182,7 @@ 5 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "meters_per_AU": { "numerator": [ @@ -196,7 +196,7 @@ 1 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "sec_per_julian_year": { "numerator": [ @@ -209,7 +209,7 @@ 1 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" }, "speed_of_light_meters_per_second": { "numerator": [ @@ -222,7 +222,7 @@ 1 ], "sign": "Positive", - "constant_type": "Actual" + "constant_exactness": "Actual" } } } From 8cc2a1128aa49911187c97b1c7bd55cf69af2d0c Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 26 Sep 2023 22:55:58 +0200 Subject: [PATCH 068/104] fix clibby --- provider/datagen/src/transform/cldr/units/helpers.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index f15e4bd0f78..3e1a633dfa0 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -184,12 +184,12 @@ pub fn convert_slices_to_fraction( for numerator in numerator_strings { let num_fraction = convert_scientific_notation_to_fraction(numerator)?; - fraction = fraction * num_fraction; + fraction *= num_fraction; } for denominator in denominator_strings { let den_fraction = convert_scientific_notation_to_fraction(denominator)?; - fraction = fraction / den_fraction; + fraction /= den_fraction; } Ok(fraction) From e781cf5e9f3f7f477cc9c552637d45d6e24db9dd Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 27 Sep 2023 16:49:39 +0200 Subject: [PATCH 069/104] make the code more concise --- .../datagen/src/transform/cldr/units/mod.rs | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index d6c2796ab5a..7487d2f67af 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -64,16 +64,14 @@ impl DataProvider for crate::DatagenProvider { { continue; } - has_internal_constants = true; + if let Some((rnum, rden, rconstant_exactness)) = constants_map_in_str_form.get(temp_num[i].as_str()) { temp_num.remove(i); - // append the elements in rnum to num and rden to den - temp_num.extend(rnum.clone().into_iter()); - temp_den.extend(rden.clone().into_iter()); - + temp_num.extend(rnum.clone()); + temp_den.extend(rden.clone()); if *rconstant_exactness == ConstantExactness::Approximate { temp_constant_exactness = ConstantExactness::Approximate; } @@ -86,15 +84,14 @@ impl DataProvider for crate::DatagenProvider { { continue; } - has_internal_constants = true; + if let Some((rnum, rden, rconstant_exactness)) = constants_map_in_str_form.get(temp_den[i].as_str()) { temp_den.remove(i); - // append the elements in rnum to den and rden to num - temp_num.extend(rden.clone().into_iter()); - temp_den.extend(rnum.clone().into_iter()); + temp_num.extend(rden.clone()); + temp_den.extend(rnum.clone()); if *rconstant_exactness == ConstantExactness::Approximate { temp_constant_exactness = ConstantExactness::Approximate; @@ -113,14 +110,20 @@ impl DataProvider for crate::DatagenProvider { } } + // Transforming the `constants_map_in_str_form` map into a ZeroMap of `ConstantValue`. + // This is done by converting the numerator and denominator slices into a fraction, + // and then transforming the fraction into a `ConstantValue`. let constants_map = ZeroMap::from_iter( constants_map_in_str_form .into_iter() .map(|(cons_name, (num, den, constant_exactness))| { + // Converting slices to fraction let value = match convert_slices_to_fraction(&num, &den) { Ok(value) => value, Err(e) => return Err(e), }; + + // Transforming the fraction to a constant value let (num, den, sign, cons_type) = match transform_fraction_to_constant_value(value, constant_exactness) { Ok(value) => value, From 35ef822ddb7c36673300b94cbd3d00c8d8555e14 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 27 Sep 2023 16:51:07 +0200 Subject: [PATCH 070/104] fix fmt --- provider/datagen/src/transform/cldr/units/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 7487d2f67af..b553e9e2844 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -122,7 +122,7 @@ impl DataProvider for crate::DatagenProvider { Ok(value) => value, Err(e) => return Err(e), }; - + // Transforming the fraction to a constant value let (num, den, sign, cons_type) = match transform_fraction_to_constant_value(value, constant_exactness) { From 21ce45d2d63741288280c81d94e2922c59c8cd5a Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 27 Sep 2023 18:38:55 +0200 Subject: [PATCH 071/104] fix --- experimental/unitsconversion/src/provider.rs | 2 +- .../src/transform/cldr/units/helpers.rs | 39 ++++++++++++++++++- .../datagen/src/transform/cldr/units/mod.rs | 39 ++++++++----------- .../data/json/units/constants@1/und.json | 26 ++++++------- 4 files changed, 69 insertions(+), 37 deletions(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index d3cd1865cd8..c9f190ae128 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -50,7 +50,7 @@ pub struct UnitsConstantsV1<'data> { #[repr(u8)] pub enum ConstantExactness { #[default] - Actual = 0, + Exact = 0, Approximate = 1, } diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 3e1a633dfa0..ebccd0e893b 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -23,7 +23,17 @@ fn test_remove_whitespace() { assert_eq!(expected, actual); } -fn convert_decimal_to_bigrational(decimal: &str) -> Result { +/// Converts a decimal number represented as a string into a BigRational. +/// Examples: +/// - "1" is converted to 1/1 +/// - "1.5" is converted to 15/10 +/// - "1.05" is converted to 105/100 +/// - "1.005" is converted to 1005/1000 +/// - "1.5.5" is an invalid decimal number +/// NOTE: +/// - "1." is not a valid decimal number +/// - BigRational represents a rational number in the simplest form. For example, 15/10 is converted to 3/2. +pub fn convert_decimal_to_bigrational(decimal: &str) -> Result { let parts: Vec<&str> = decimal.split('.').collect(); match parts.len() { 1 => BigRational::from_str(parts[0]) @@ -39,6 +49,33 @@ fn convert_decimal_to_bigrational(decimal: &str) -> Result for crate::DatagenProvider { let constant_exactness = match cons_value.status.as_deref() { Some("approximate") => ConstantExactness::Approximate, - _ => ConstantExactness::Actual, + _ => ConstantExactness::Exact, }; constants_map_in_str_form.insert(cons_name, (num, den, constant_exactness)); @@ -159,11 +159,11 @@ impl IterableDataProvider for crate::DatagenProvider { #[test] fn test_basic() { - use fraction::GenericFraction; use icu_locid::locale; use icu_provider::prelude::*; use icu_unitsconversion::provider::*; - use num_bigint::BigUint; + use num_bigint::BigInt; + use num_rational::BigRational; use num_traits::ToBytes; let provider = crate::DatagenProvider::new_testing(); @@ -179,49 +179,44 @@ fn test_basic() { let constants = &und.get().to_owned().constants_map; let ft_to_m = constants.get("ft_to_m").unwrap(); - let expected_ft_to_m = - GenericFraction::::new(BigUint::from(3048u32), BigUint::from(10000u32)); + let expected_ft_to_m = BigRational::new(BigInt::from(3048u32), BigInt::from(10000u32)); assert_eq!( ft_to_m, zerovec::ule::encode_varule_to_box(&ConstantValue { - numerator: expected_ft_to_m.numer().unwrap().to_le_bytes().into(), - denominator: expected_ft_to_m.denom().unwrap().to_le_bytes().into(), + numerator: expected_ft_to_m.numer().to_le_bytes().into(), + denominator: expected_ft_to_m.denom().to_le_bytes().into(), sign: Sign::Positive, - constant_exactness: ConstantExactness::Actual, + constant_exactness: ConstantExactness::Exact, }) .as_ref() ); let ft2_to_m2 = constants.get("ft2_to_m2").unwrap(); - let expected_ft2_to_m2 = GenericFraction::::new( - BigUint::from(3048u32).pow(2), - BigUint::from(10000u32).pow(2), - ); + let expected_ft2_to_m2 = + BigRational::new(BigInt::from(3048u32).pow(2), BigInt::from(10000u32).pow(2)); assert_eq!( ft2_to_m2, zerovec::ule::encode_varule_to_box(&ConstantValue { - numerator: expected_ft2_to_m2.numer().unwrap().to_le_bytes().into(), - denominator: expected_ft2_to_m2.denom().unwrap().to_le_bytes().into(), + numerator: expected_ft2_to_m2.numer().to_le_bytes().into(), + denominator: expected_ft2_to_m2.denom().to_le_bytes().into(), sign: Sign::Positive, - constant_exactness: ConstantExactness::Actual, + constant_exactness: ConstantExactness::Exact, }) .as_ref() ); let ft3_to_m3 = constants.get("ft3_to_m3").unwrap(); - let expected_ft3_to_m3 = GenericFraction::::new( - BigUint::from(3048u32).pow(3), - BigUint::from(10000u32).pow(3), - ); + let expected_ft3_to_m3 = + BigRational::new(BigInt::from(3048u32).pow(3), BigInt::from(10000u32).pow(3)); assert_eq!( ft3_to_m3, zerovec::ule::encode_varule_to_box(&ConstantValue { - numerator: expected_ft3_to_m3.numer().unwrap().to_le_bytes().into(), - denominator: expected_ft3_to_m3.denom().unwrap().to_le_bytes().into(), + numerator: expected_ft3_to_m3.numer().to_le_bytes().into(), + denominator: expected_ft3_to_m3.denom().to_le_bytes().into(), sign: Sign::Positive, - constant_exactness: ConstantExactness::Actual, + constant_exactness: ConstantExactness::Exact, }) .as_ref() ); diff --git a/provider/datagen/tests/data/json/units/constants@1/und.json b/provider/datagen/tests/data/json/units/constants@1/und.json index 510cfc36220..b754a9cde1a 100644 --- a/provider/datagen/tests/data/json/units/constants@1/und.json +++ b/provider/datagen/tests/data/json/units/constants@1/und.json @@ -45,7 +45,7 @@ 23 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "ft3_to_m3": { "numerator": [ @@ -61,7 +61,7 @@ 116 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "ft_to_m": { "numerator": [ @@ -73,7 +73,7 @@ 4 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "gal_imp_to_m3": { "numerator": [ @@ -88,7 +88,7 @@ 5 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "gal_to_m3": { "numerator": [ @@ -105,7 +105,7 @@ 29 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "glucose_molar_mass": { "numerator": [ @@ -118,7 +118,7 @@ 39 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "gravity": { "numerator": [ @@ -131,7 +131,7 @@ 78 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "in3_to_m3": { "numerator": [ @@ -147,7 +147,7 @@ 29 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "item_per_mole": { "numerator": [ @@ -166,7 +166,7 @@ 1 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "lb_to_kg": { "numerator": [ @@ -182,7 +182,7 @@ 5 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "meters_per_AU": { "numerator": [ @@ -196,7 +196,7 @@ 1 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "sec_per_julian_year": { "numerator": [ @@ -209,7 +209,7 @@ 1 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" }, "speed_of_light_meters_per_second": { "numerator": [ @@ -222,7 +222,7 @@ 1 ], "sign": "Positive", - "constant_exactness": "Actual" + "constant_exactness": "Exact" } } } From e557ed2f6f46f93c7dc3d48856c52f55fbf07c11 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 27 Sep 2023 18:43:53 +0200 Subject: [PATCH 072/104] remove the remove white space fn --- .../src/transform/cldr/units/helpers.rs | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index ebccd0e893b..9c8066c8572 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -9,20 +9,6 @@ use icu_unitsconversion::provider::{ConstantExactness, Sign}; use num_bigint::BigInt; use num_rational::BigRational; -/// Removes all whitespace from a string. -pub fn remove_whitespace(s: &str) -> String { - s.chars().filter(|c| !c.is_whitespace()).collect() -} - -// TODO: move this to the comment above. -#[test] -fn test_remove_whitespace() { - let input = "He llo Wo rld!"; - let expected = "HelloWorld!"; - let actual = remove_whitespace(input); - assert_eq!(expected, actual); -} - /// Converts a decimal number represented as a string into a BigRational. /// Examples: /// - "1" is converted to 1/1 @@ -78,21 +64,22 @@ fn test_convert_decimal_to_bigrational() { /// Converts a scientific notation number represented as a string into a GenericFraction. /// Examples: -/// - "1E2" is converted to 100 +/// - "1E2" is converted to 100/1 /// - "1E-2" is converted to 1/100 -/// - "1.5E2" is converted to 150 +/// - "1.5E2" is converted to 150/1 /// - "1.5E-2" is converted to 15/1000 +/// - " 1.5 E -2 " is converted to 15/1000 +/// - " 1.5 E - 2" is an invalid scientific notation number /// - "1.5E-2.5" is an invalid scientific notation number pub fn convert_scientific_notation_to_fraction(number: &str) -> Result { - let number = remove_whitespace(number); let parts: Vec<&str> = number.split('E').collect(); if parts.len() > 2 { return Err(DataError::custom( "the number is not a scientific notation number", )); } - let base = parts.first().unwrap_or(&"1"); - let exponent = parts.get(1).unwrap_or(&"0"); + let base = parts.first().unwrap_or(&"1").trim(); + let exponent = parts.get(1).unwrap_or(&"0").trim(); let ten = BigRational::from(BigInt::from(10u32)); let base = convert_decimal_to_bigrational(base) @@ -126,11 +113,21 @@ fn test_convert_scientific_notation_to_fraction() { let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); + let input = " 1.5 E -2 "; + let expected = BigRational::new(BigInt::from(15u32), BigInt::from(1000u32)); + let actual = convert_scientific_notation_to_fraction(input).unwrap(); + assert_eq!(expected, actual); + + let input = " 1.5 E - 2"; + let actual = convert_scientific_notation_to_fraction(input); + assert!(actual.is_err()); + let input = "1.5E-2.5"; let actual = convert_scientific_notation_to_fraction(input); assert!(actual.is_err()); } + /// Determines if a string contains any alphabetic characters. /// Returns true if the string contains at least one alphabetic character, false otherwise. /// Examples: From 7e34f1f32a7fa3be28216d62bc8738cbeaf9c0ae Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 27 Sep 2023 19:14:05 +0200 Subject: [PATCH 073/104] improve --- .../datagen/src/transform/cldr/units/mod.rs | 60 ++++++++++--------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 47ccbabb5ab..30201e16815 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -49,53 +49,55 @@ impl DataProvider for crate::DatagenProvider { // If CLDR added more constants that are defined in terms of other constants, the maximum depth should be increased. let maximum_depth = 10; let mut has_internal_constants; - for _ in 0..maximum_depth { + let mut max_depth_reached = 0; + while max_depth_reached < maximum_depth { has_internal_constants = false; + max_depth_reached += 1; let mut constants_with_constants_map_replaceable = BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); for (cons_name, (num, den, constant_exactness)) in constants_map_in_str_form.iter() { - let mut temp_num = num.clone(); - let mut temp_den = den.clone(); - let mut temp_constant_exactness = *constant_exactness; + let mut temp_num = Vec::::new(); + let mut temp_den = Vec::::new(); + let mut temp_constant_exactness = constant_exactness.clone(); - for i in 0..temp_num.len() { - if !contains_alphabetic_chars(temp_num[i].as_str()) - || is_scientific_number(temp_num[i].as_str()) - { + for num_str in num.iter().cloned() { + if !contains_alphabetic_chars(&num_str) || is_scientific_number(&num_str) { + temp_num.push(num_str.clone()); continue; } has_internal_constants = true; if let Some((rnum, rden, rconstant_exactness)) = - constants_map_in_str_form.get(temp_num[i].as_str()) + constants_map_in_str_form.get(num_str.as_str()) { - temp_num.remove(i); temp_num.extend(rnum.clone()); temp_den.extend(rden.clone()); - if *rconstant_exactness == ConstantExactness::Approximate { + if rconstant_exactness == &ConstantExactness::Approximate { temp_constant_exactness = ConstantExactness::Approximate; } + } else { + temp_num.push(num_str.clone()); } } - for i in 0..temp_den.len() { - if !contains_alphabetic_chars(temp_den[i].as_str()) - || is_scientific_number(temp_den[i].as_str()) - { + for den_str in den.iter().cloned() { + if !contains_alphabetic_chars(&den_str) || is_scientific_number(&den_str) { + temp_den.push(den_str.clone()); continue; } has_internal_constants = true; if let Some((rnum, rden, rconstant_exactness)) = - constants_map_in_str_form.get(temp_den[i].as_str()) + constants_map_in_str_form.get(den_str.as_str()) { - temp_den.remove(i); temp_num.extend(rden.clone()); temp_den.extend(rnum.clone()); - if *rconstant_exactness == ConstantExactness::Approximate { - temp_constant_exactness = ConstantExactness::Approximate; + if rconstant_exactness == &ConstantExactness::Approximate { + temp_constant_exactness = rconstant_exactness.clone(); } + } else { + temp_den.push(den_str.clone()); } } @@ -110,6 +112,14 @@ impl DataProvider for crate::DatagenProvider { } } + if max_depth_reached >= maximum_depth { + return Err(DataError::custom( + "Maximum depth reached while parsing constants. \ + This is likely due to a circular dependency in the constants. \ + Note: If the depth was increased, you may need to increase the maximum depth in the code.", + )); + } + // Transforming the `constants_map_in_str_form` map into a ZeroMap of `ConstantValue`. // This is done by converting the numerator and denominator slices into a fraction, // and then transforming the fraction into a `ConstantValue`. @@ -118,17 +128,11 @@ impl DataProvider for crate::DatagenProvider { .into_iter() .map(|(cons_name, (num, den, constant_exactness))| { // Converting slices to fraction - let value = match convert_slices_to_fraction(&num, &den) { - Ok(value) => value, - Err(e) => return Err(e), - }; + let value = convert_slices_to_fraction(&num, &den)?; // Transforming the fraction to a constant value let (num, den, sign, cons_type) = - match transform_fraction_to_constant_value(value, constant_exactness) { - Ok(value) => value, - Err(e) => return Err(e), - }; + transform_fraction_to_constant_value(value, constant_exactness)?; Ok(( cons_name, zerovec::ule::encode_varule_to_box(&ConstantValue { @@ -139,7 +143,7 @@ impl DataProvider for crate::DatagenProvider { }), )) }) - .collect::, _>>()?, + .collect::, DataError>>()?, ); let result = UnitsConstantsV1 { constants_map }; From 9a60a86fc3f5d702a88d8cbb3e73336865e0ee7f Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 27 Sep 2023 19:30:30 +0200 Subject: [PATCH 074/104] fix --- provider/datagen/src/transform/cldr/units/helpers.rs | 1 - provider/datagen/src/transform/cldr/units/mod.rs | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 9c8066c8572..ea9e02e699e 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -127,7 +127,6 @@ fn test_convert_scientific_notation_to_fraction() { assert!(actual.is_err()); } - /// Determines if a string contains any alphabetic characters. /// Returns true if the string contains at least one alphabetic character, false otherwise. /// Examples: diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 30201e16815..a9b272c211d 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -132,7 +132,7 @@ impl DataProvider for crate::DatagenProvider { // Transforming the fraction to a constant value let (num, den, sign, cons_type) = - transform_fraction_to_constant_value(value, constant_exactness)?; + transform_fraction_to_constant_value(value, constant_exactness)?; Ok(( cons_name, zerovec::ule::encode_varule_to_box(&ConstantValue { @@ -184,6 +184,7 @@ fn test_basic() { let constants = &und.get().to_owned().constants_map; let ft_to_m = constants.get("ft_to_m").unwrap(); let expected_ft_to_m = BigRational::new(BigInt::from(3048u32), BigInt::from(10000u32)); + assert_eq!( ft_to_m, zerovec::ule::encode_varule_to_box(&ConstantValue { From 364da65b42f41e8426585541e3400130d331987a Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 27 Sep 2023 19:34:51 +0200 Subject: [PATCH 075/104] fix clibby --- provider/datagen/src/transform/cldr/units/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index a9b272c211d..54ad60a5af8 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -58,10 +58,10 @@ impl DataProvider for crate::DatagenProvider { for (cons_name, (num, den, constant_exactness)) in constants_map_in_str_form.iter() { let mut temp_num = Vec::::new(); let mut temp_den = Vec::::new(); - let mut temp_constant_exactness = constant_exactness.clone(); + let mut temp_constant_exactness = *constant_exactness; - for num_str in num.iter().cloned() { - if !contains_alphabetic_chars(&num_str) || is_scientific_number(&num_str) { + for num_str in num { + if !contains_alphabetic_chars(num_str) || is_scientific_number(num_str) { temp_num.push(num_str.clone()); continue; } @@ -80,8 +80,8 @@ impl DataProvider for crate::DatagenProvider { } } - for den_str in den.iter().cloned() { - if !contains_alphabetic_chars(&den_str) || is_scientific_number(&den_str) { + for den_str in den { + if !contains_alphabetic_chars(den_str) || is_scientific_number(den_str) { temp_den.push(den_str.clone()); continue; } @@ -94,7 +94,7 @@ impl DataProvider for crate::DatagenProvider { temp_den.extend(rnum.clone()); if rconstant_exactness == &ConstantExactness::Approximate { - temp_constant_exactness = rconstant_exactness.clone(); + temp_constant_exactness = *rconstant_exactness; } } else { temp_den.push(den_str.clone()); From bbed484e51cd4fe0496d73e6b5e7a53703e3cbad Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 14:24:09 +0200 Subject: [PATCH 076/104] fix --- Cargo.lock | 34 ---------------------------------- provider/datagen/Cargo.toml | 1 - 2 files changed, 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6000129680e..8fee08af487 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -978,15 +978,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fraction" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678" -dependencies = [ - "num", -] - [[package]] name = "freertos-rust" version = "0.1.2" @@ -1425,7 +1416,6 @@ dependencies = [ "displaydoc", "elsa", "eyre", - "fraction", "icu", "icu_calendar", "icu_casemap", @@ -2263,19 +2253,6 @@ dependencies = [ "rawpointer", ] -[[package]] -name = "num" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" -dependencies = [ - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.4" @@ -2306,17 +2283,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - [[package]] name = "num-rational" version = "0.4.1" diff --git a/provider/datagen/Cargo.toml b/provider/datagen/Cargo.toml index 2e56782b02e..bd4121e48e2 100644 --- a/provider/datagen/Cargo.toml +++ b/provider/datagen/Cargo.toml @@ -89,7 +89,6 @@ zip = { version = ">=0.5, <0.7", default-features = false, features = ["deflate" rayon = { version = "1.5", optional = true } ureq = { version = "2", optional = true } -fraction = { version = "0.13.1", default-features = false } num-bigint = { version = "0.4.4", default-features = false } num-traits = { version = "0.2.14", default-features = false } num-rational = { version = "0.4.1", features = ["num-bigint"], default-features = false } From 9553ef2d41a1973809828e58504f9036b3c87675 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 14:25:06 +0200 Subject: [PATCH 077/104] Update provider/datagen/src/transform/cldr/units/helpers.rs Co-authored-by: Robert Bastian <4706271+robertbastian@users.noreply.github.com> --- provider/datagen/src/transform/cldr/units/helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index ea9e02e699e..b4a9978367f 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -62,7 +62,7 @@ fn test_convert_decimal_to_bigrational() { assert!(actual.is_err()); } -/// Converts a scientific notation number represented as a string into a GenericFraction. +/// Converts a scientific notation number represented as a string into a BigRational. /// Examples: /// - "1E2" is converted to 100/1 /// - "1E-2" is converted to 1/100 From c7365e4cb297777e14f31662f195b1196a2c9905 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 15:04:31 +0200 Subject: [PATCH 078/104] fix --- .../src/transform/cldr/units/helpers.rs | 45 ++++++++++++++++--- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index b4a9978367f..11ad050bbd3 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -15,20 +15,26 @@ use num_rational::BigRational; /// - "1.5" is converted to 15/10 /// - "1.05" is converted to 105/100 /// - "1.005" is converted to 1005/1000 +/// - "10000.0005" is converted to 100000005/10000 +/// - ".5" is converted to 5/10 +/// - ".505" is converted to 505/1000 +/// - "5." is converted to 5/1 /// - "1.5.5" is an invalid decimal number /// NOTE: /// - "1." is not a valid decimal number /// - BigRational represents a rational number in the simplest form. For example, 15/10 is converted to 3/2. pub fn convert_decimal_to_bigrational(decimal: &str) -> Result { let parts: Vec<&str> = decimal.split('.').collect(); + let integral_part = parts.first().unwrap_or(&""); + let fractional_part = parts.last().unwrap_or(&""); match parts.len() { - 1 => BigRational::from_str(parts[0]) + 1 => BigRational::from_str(&integral_part) .map_err(|_| DataError::custom("the integer-part is not a valid number")), 2 => { let numerator = BigInt::from_str(parts.join("").as_str()).map_err(|_| { DataError::custom("the integer-part and fractional-part are not a valid number") })?; - let denominator = BigInt::from(10u32).pow(parts[1].len() as u32); + let denominator = BigInt::from(10u32).pow(fractional_part.len() as u32); Ok(BigRational::new(numerator, denominator)) } _ => Err(DataError::custom("the base is not a valid number")), @@ -57,6 +63,26 @@ fn test_convert_decimal_to_bigrational() { let actual = convert_decimal_to_bigrational(input).unwrap(); assert_eq!(expected, actual); + let input = "10000.0005"; + let expected = BigRational::new(BigInt::from(100000005u32), BigInt::from(10000u32)); + let actual = convert_decimal_to_bigrational(input).unwrap(); + assert_eq!(expected, actual); + + let input = ".5"; + let expected = BigRational::new(BigInt::from(5u32), BigInt::from(10u32)); + let actual = convert_decimal_to_bigrational(input).unwrap(); + assert_eq!(expected, actual); + + let input = ".505"; + let expected = BigRational::new(BigInt::from(505u32), BigInt::from(1000u32)); + let actual = convert_decimal_to_bigrational(input).unwrap(); + assert_eq!(expected, actual); + + let input = "5."; + let expected = BigRational::new(BigInt::from(5u32), BigInt::from(1u32)); + let actual = convert_decimal_to_bigrational(input).unwrap(); + assert_eq!(expected, actual); + let input = "1.5.5"; let actual = convert_decimal_to_bigrational(input); assert!(actual.is_err()); @@ -274,18 +300,18 @@ fn test_convert_array_of_strings_to_fraction() { /// - "1 * 2 / 3 * ft_to_m" is split into (["1", "2"], ["3" , "ft_to_m"]) /// - "/2" is split into (["1"], ["2"]) /// - "2" is split into (["2"], ["1"]) +/// - "2/" is split into (["2"], ["1"]) /// - "1E2" is split into (["1E2"], ["1"]) /// - "1 2 * 3" is an invalid constant string pub fn convert_constant_to_num_denom_strings( constant_string: &str, ) -> Result<(Vec, Vec), DataError> { - let mut split = constant_string.split('/'); - if split.clone().count() > 2 { + let split: Vec<&str> = constant_string.split('/').collect(); + if split.len() > 2 { return Err(DataError::custom("Invalid constant string")); } - - let numerator_string = split.next().unwrap_or("1"); - let denominator_string = split.next().unwrap_or("1"); + let numerator_string = split.get(0).unwrap_or(&"1"); + let denominator_string = split.get(1).unwrap_or(&"1"); let mut has_whitespace_within = false; let numerator_values = if numerator_string.is_empty() { @@ -352,6 +378,11 @@ fn test_convert_constant_to_num_denom_strings() { let actual = convert_constant_to_num_denom_strings(input).unwrap(); assert_eq!(expected, actual); + let input = "2/"; + let expected = (vec!["2".to_string()], vec!["1".to_string()]); + let actual = convert_constant_to_num_denom_strings(input).unwrap(); + assert_eq!(expected, actual); + let input = "1E2"; let expected = (vec!["1E2".to_string()], vec!["1".to_string()]); let actual = convert_constant_to_num_denom_strings(input).unwrap(); From eea8a3e6026dbf1726b2cf79d2cb9c423145b94f Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 15:12:13 +0200 Subject: [PATCH 079/104] make the function more concise --- .../src/transform/cldr/units/helpers.rs | 48 +++++++------------ 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 11ad050bbd3..1c749ca696d 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -310,41 +310,25 @@ pub fn convert_constant_to_num_denom_strings( if split.len() > 2 { return Err(DataError::custom("Invalid constant string")); } - let numerator_string = split.get(0).unwrap_or(&"1"); - let denominator_string = split.get(1).unwrap_or(&"1"); - - let mut has_whitespace_within = false; - let numerator_values = if numerator_string.is_empty() { - vec!["1".to_string()] - } else { - numerator_string - .split('*') - .map(|s| { - let s = s.trim(); - if s.chars().any(char::is_whitespace) { - has_whitespace_within = true; - } - s.to_string() - }) - .collect() - }; - let denominator_values = if denominator_string.is_empty() { - vec!["1".to_string()] - } else { - denominator_string - .split('*') - .map(|s| { - let s = s.trim(); - if s.chars().any(char::is_whitespace) { - has_whitespace_within = true; - } - s.to_string() - }) - .collect::>() + let process_string = |s: &str| -> Vec { + if s.is_empty() { + vec!["1".to_string()] + } else { + s.split('*').map(|s| s.trim().to_string()).collect() + } }; - if has_whitespace_within { + let numerator_values = process_string(split.get(0).unwrap_or(&"1")); + let denominator_values = process_string(split.get(1).unwrap_or(&"1")); + + if numerator_values + .iter() + .any(|s| s.chars().any(char::is_whitespace)) + || denominator_values + .iter() + .any(|s| s.chars().any(char::is_whitespace)) + { return Err(DataError::custom( "The constant string contains internal white spaces", )); From fe40b0e0bb46dac2bd53bfdc991d7f39af1711b1 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 15:13:45 +0200 Subject: [PATCH 080/104] add comments --- provider/datagen/src/transform/cldr/units/helpers.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 1c749ca696d..8cea76bda2f 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -311,6 +311,7 @@ pub fn convert_constant_to_num_denom_strings( return Err(DataError::custom("Invalid constant string")); } + // Define a closure to process each part of the split string let process_string = |s: &str| -> Vec { if s.is_empty() { vec!["1".to_string()] @@ -319,9 +320,11 @@ pub fn convert_constant_to_num_denom_strings( } }; + // Process the numerator and denominator parts let numerator_values = process_string(split.get(0).unwrap_or(&"1")); let denominator_values = process_string(split.get(1).unwrap_or(&"1")); + // If any part contains internal white spaces, return an error if numerator_values .iter() .any(|s| s.chars().any(char::is_whitespace)) From 84fa8123895ab3be88dddd1d2fc89c5171cc7ab4 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 15:25:06 +0200 Subject: [PATCH 081/104] fix --- provider/datagen/src/transform/cldr/units/mod.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 54ad60a5af8..134b7242d52 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -127,12 +127,10 @@ impl DataProvider for crate::DatagenProvider { constants_map_in_str_form .into_iter() .map(|(cons_name, (num, den, constant_exactness))| { - // Converting slices to fraction let value = convert_slices_to_fraction(&num, &den)?; - - // Transforming the fraction to a constant value let (num, den, sign, cons_type) = transform_fraction_to_constant_value(value, constant_exactness)?; + Ok(( cons_name, zerovec::ule::encode_varule_to_box(&ConstantValue { From 010afc7c02c126b4355a4c69cbb498c06b2e2492 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 15:34:52 +0200 Subject: [PATCH 082/104] fix --- provider/datagen/src/transform/cldr/units/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 134b7242d52..b999e2b5331 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -60,6 +60,8 @@ impl DataProvider for crate::DatagenProvider { let mut temp_den = Vec::::new(); let mut temp_constant_exactness = *constant_exactness; + // Iterate over the numerator strings, + // and replace any strings that are keys in the constants map with their corresponding values. for num_str in num { if !contains_alphabetic_chars(num_str) || is_scientific_number(num_str) { temp_num.push(num_str.clone()); @@ -80,6 +82,8 @@ impl DataProvider for crate::DatagenProvider { } } + // Iterate over the denominator strings, + // and replace any strings that are keys in the constants map with their corresponding values. for den_str in den { if !contains_alphabetic_chars(den_str) || is_scientific_number(den_str) { temp_den.push(den_str.clone()); From 33fea6ebca9d9dca1b01172da24476065bff22a1 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 15:49:07 +0200 Subject: [PATCH 083/104] done fixing --- tools/depcheck/src/allowlist.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/depcheck/src/allowlist.rs b/tools/depcheck/src/allowlist.rs index 6717b7b9841..bbff8307bc7 100644 --- a/tools/depcheck/src/allowlist.rs +++ b/tools/depcheck/src/allowlist.rs @@ -142,17 +142,14 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "databake-derive", "elsa", "erased-serde", - "fraction", "icu_codepointtrie_builder", - "itoa", "itertools", + "itoa", "matrixmultiply", "ndarray", - "num", "num-bigint", "num-complex", "num-integer", - "num-iter", "num-rational", "num-traits", "once_cell", From 0fb142f4603647b669566b2280b4cff01e6f0ab6 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 16:13:28 +0200 Subject: [PATCH 084/104] fix clibby --- provider/datagen/src/transform/cldr/units/helpers.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 8cea76bda2f..b757c5800f7 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -28,7 +28,7 @@ pub fn convert_decimal_to_bigrational(decimal: &str) -> Result BigRational::from_str(&integral_part) + 1 => BigRational::from_str(integral_part) .map_err(|_| DataError::custom("the integer-part is not a valid number")), 2 => { let numerator = BigInt::from_str(parts.join("").as_str()).map_err(|_| { @@ -321,7 +321,7 @@ pub fn convert_constant_to_num_denom_strings( }; // Process the numerator and denominator parts - let numerator_values = process_string(split.get(0).unwrap_or(&"1")); + let numerator_values = process_string(split.first().unwrap_or(&"1")); let denominator_values = process_string(split.get(1).unwrap_or(&"1")); // If any part contains internal white spaces, return an error From 63647c94a488eee5f48ddc5ab2d3fb5ddf9fc5d9 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Thu, 28 Sep 2023 16:29:36 +0200 Subject: [PATCH 085/104] add issue --- provider/datagen/src/transform/cldr/units/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index b999e2b5331..dc76aa80aab 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -43,7 +43,8 @@ impl DataProvider for crate::DatagenProvider { constants_map_in_str_form.insert(cons_name, (num, den, constant_exactness)); } - // This loop iterates over the constants, replacing any string values with their corresponding constant values. + // TODO(#4100): Implement a more efficient algorithm for replacing constants with their values. + // This loop iterates over the constants and replaces any string values with their corresponding constant values. // For example, if the constant "ft_to_m" has the value "0.3048", and the constant "ft2_to_m2" has the value "ft_to_m * ft_to_m", // the maximum depth represents the maximum number of nested constants that can be replaced. // If CLDR added more constants that are defined in terms of other constants, the maximum depth should be increased. From e734e14a332a17fe9c538575f940b0b410094ef1 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 2 Oct 2023 10:11:26 +0200 Subject: [PATCH 086/104] remove dependency of "serde" --- experimental/unitsconversion/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/unitsconversion/Cargo.toml b/experimental/unitsconversion/Cargo.toml index ebb0f4f7b0e..e260f043882 100644 --- a/experimental/unitsconversion/Cargo.toml +++ b/experimental/unitsconversion/Cargo.toml @@ -25,7 +25,7 @@ displaydoc = { version = "0.2.3", default-features = false } icu_locid = { workspace = true } icu_provider = { workspace = true, features = ["macros"] } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } -zerovec = { workspace = true, features = ["yoke", "serde"] } +zerovec = { workspace = true, features = ["yoke"] } icu_unitsconversion_data = { workspace = true, optional = true } [features] From e9c1a7c61dec0d348ae4ada5af9aec7548db4b87 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 2 Oct 2023 10:20:15 +0200 Subject: [PATCH 087/104] fix after merge. --- experimental/unitsconversion/src/provider.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/unitsconversion/src/provider.rs b/experimental/unitsconversion/src/provider.rs index 7532f911732..c2ff9f59650 100644 --- a/experimental/unitsconversion/src/provider.rs +++ b/experimental/unitsconversion/src/provider.rs @@ -9,7 +9,7 @@ //! //! Read more about data providers: [`icu_provider`] -use icu_provider::{yoke, zerofrom}; +use icu_provider::prelude::*; use zerovec::{ZeroMap, ZeroVec}; #[cfg(feature = "compiled_data")] From 2398f8d138a3dd95f238c0c3ab4ed50a3236c66d Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 10 Oct 2023 15:54:02 +0200 Subject: [PATCH 088/104] Use GenericFraction --- Cargo.lock | 37 +++- provider/datagen/Cargo.toml | 4 +- .../src/transform/cldr/units/helpers.rs | 159 ++++++------------ .../datagen/src/transform/cldr/units/mod.rs | 32 ++-- tools/depcheck/src/allowlist.rs | 3 +- 5 files changed, 102 insertions(+), 133 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2c81cc971b..00914c71f11 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -990,6 +990,15 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59a78dd758a47a7305478e0e054f9fde4e983b9f9eccda162bf7ca03b79e9d40" +dependencies = [ + "num", +] + [[package]] name = "freertos-rust" version = "0.1.2" @@ -1428,6 +1437,7 @@ dependencies = [ "displaydoc", "elsa", "eyre", + "fraction", "icu", "icu_calendar", "icu_casemap", @@ -1459,8 +1469,6 @@ dependencies = [ "memchr", "ndarray", "num-bigint", - "num-rational", - "num-traits", "once_cell", "postcard", "proc-macro2", @@ -2265,6 +2273,19 @@ dependencies = [ "rawpointer", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.4" @@ -2295,6 +2316,17 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + [[package]] name = "num-rational" version = "0.4.1" @@ -2302,7 +2334,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", - "num-bigint", "num-integer", "num-traits", ] diff --git a/provider/datagen/Cargo.toml b/provider/datagen/Cargo.toml index 110c39809a4..1d1964e99ca 100644 --- a/provider/datagen/Cargo.toml +++ b/provider/datagen/Cargo.toml @@ -89,9 +89,9 @@ zip = { version = ">=0.5, <0.7", default-features = false, features = ["deflate" rayon = { version = "1.5", optional = true } ureq = { version = "2", optional = true } +fraction = {version = "0.14.0", default-features = false } num-bigint = { version = "0.4.4", default-features = false } -num-traits = { version = "0.2.14", default-features = false } -num-rational = { version = "0.4.1", features = ["num-bigint"], default-features = false } + # Dependencies for "bin" feature clap = { version = "4", optional = true, features = ["derive"] } diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index b757c5800f7..4044b83b72e 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -2,93 +2,15 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +use core::ops::{Div, Mul}; use core::str::FromStr; +use fraction::GenericFraction; use icu_provider::DataError; use icu_unitsconversion::provider::{ConstantExactness, Sign}; -use num_bigint::BigInt; -use num_rational::BigRational; +use num_bigint::BigUint; -/// Converts a decimal number represented as a string into a BigRational. -/// Examples: -/// - "1" is converted to 1/1 -/// - "1.5" is converted to 15/10 -/// - "1.05" is converted to 105/100 -/// - "1.005" is converted to 1005/1000 -/// - "10000.0005" is converted to 100000005/10000 -/// - ".5" is converted to 5/10 -/// - ".505" is converted to 505/1000 -/// - "5." is converted to 5/1 -/// - "1.5.5" is an invalid decimal number -/// NOTE: -/// - "1." is not a valid decimal number -/// - BigRational represents a rational number in the simplest form. For example, 15/10 is converted to 3/2. -pub fn convert_decimal_to_bigrational(decimal: &str) -> Result { - let parts: Vec<&str> = decimal.split('.').collect(); - let integral_part = parts.first().unwrap_or(&""); - let fractional_part = parts.last().unwrap_or(&""); - match parts.len() { - 1 => BigRational::from_str(integral_part) - .map_err(|_| DataError::custom("the integer-part is not a valid number")), - 2 => { - let numerator = BigInt::from_str(parts.join("").as_str()).map_err(|_| { - DataError::custom("the integer-part and fractional-part are not a valid number") - })?; - let denominator = BigInt::from(10u32).pow(fractional_part.len() as u32); - Ok(BigRational::new(numerator, denominator)) - } - _ => Err(DataError::custom("the base is not a valid number")), - } -} - -#[test] -fn test_convert_decimal_to_bigrational() { - let input = "1"; - let expected = BigRational::new(BigInt::from(1u32), BigInt::from(1u32)); - let actual = convert_decimal_to_bigrational(input).unwrap(); - assert_eq!(expected, actual); - - let input = "1.5"; - let expected = BigRational::new(BigInt::from(15u32), BigInt::from(10u32)); - let actual = convert_decimal_to_bigrational(input).unwrap(); - assert_eq!(expected, actual); - - let input = "1.05"; - let expected = BigRational::new(BigInt::from(105u32), BigInt::from(100u32)); - let actual = convert_decimal_to_bigrational(input).unwrap(); - assert_eq!(expected, actual); - - let input = "1.005"; - let expected = BigRational::new(BigInt::from(1005u32), BigInt::from(1000u32)); - let actual = convert_decimal_to_bigrational(input).unwrap(); - assert_eq!(expected, actual); - - let input = "10000.0005"; - let expected = BigRational::new(BigInt::from(100000005u32), BigInt::from(10000u32)); - let actual = convert_decimal_to_bigrational(input).unwrap(); - assert_eq!(expected, actual); - - let input = ".5"; - let expected = BigRational::new(BigInt::from(5u32), BigInt::from(10u32)); - let actual = convert_decimal_to_bigrational(input).unwrap(); - assert_eq!(expected, actual); - - let input = ".505"; - let expected = BigRational::new(BigInt::from(505u32), BigInt::from(1000u32)); - let actual = convert_decimal_to_bigrational(input).unwrap(); - assert_eq!(expected, actual); - - let input = "5."; - let expected = BigRational::new(BigInt::from(5u32), BigInt::from(1u32)); - let actual = convert_decimal_to_bigrational(input).unwrap(); - assert_eq!(expected, actual); - - let input = "1.5.5"; - let actual = convert_decimal_to_bigrational(input); - assert!(actual.is_err()); -} - -/// Converts a scientific notation number represented as a string into a BigRational. +/// Converts a scientific notation number represented as a string to a fraction. /// Examples: /// - "1E2" is converted to 100/1 /// - "1E-2" is converted to 1/100 @@ -97,50 +19,63 @@ fn test_convert_decimal_to_bigrational() { /// - " 1.5 E -2 " is converted to 15/1000 /// - " 1.5 E - 2" is an invalid scientific notation number /// - "1.5E-2.5" is an invalid scientific notation number -pub fn convert_scientific_notation_to_fraction(number: &str) -> Result { +pub fn convert_scientific_notation_to_fraction( + number: &str, +) -> Result, DataError> { let parts: Vec<&str> = number.split('E').collect(); if parts.len() > 2 { return Err(DataError::custom( - "the number is not a scientific notation number", + "the number is not a valid scientific notation number", )); } - let base = parts.first().unwrap_or(&"1").trim(); + let base = parts.get(0).unwrap_or(&"1").trim(); let exponent = parts.get(1).unwrap_or(&"0").trim(); - let ten = BigRational::from(BigInt::from(10u32)); - let base = convert_decimal_to_bigrational(base) - .map_err(|_| DataError::custom("the base is not a valid number"))?; + let base = GenericFraction::::from_str(base) + .map_err(|_| DataError::custom("the base is not a valid decimal number"))?; let exponent = i32::from_str(exponent) - .map_err(|_| DataError::custom("the exponent is not a valid number"))?; + .map_err(|_| DataError::custom("the exponent is not a valid integer"))?; + + let result = if exponent >= 0 { + base.mul(GenericFraction::new( + BigUint::from(10u32).pow(exponent as u32), + BigUint::from(1u32), + )) + } else { + base.div(GenericFraction::new( + BigUint::from(10u32).pow((-exponent) as u32), + BigUint::from(1u32), + )) + }; - Ok(base * ten.pow(exponent)) + Ok(result) } // TODO: move this to the comment above. #[test] fn test_convert_scientific_notation_to_fraction() { let input = "1E2"; - let expected = BigRational::new(BigInt::from(100u32), BigInt::from(1u32)); + let expected = GenericFraction::::new(BigUint::from(100u32), BigUint::from(1u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); let input = "1E-2"; - let expected = BigRational::new(BigInt::from(1u32), BigInt::from(100u32)); + let expected = GenericFraction::::new(BigUint::from(1u32), BigUint::from(100u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); let input = "1.5E2"; - let expected = BigRational::new(BigInt::from(150u32), BigInt::from(1u32)); + let expected = GenericFraction::::new(BigUint::from(150u32), BigUint::from(1u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); let input = "1.5E-2"; - let expected = BigRational::new(BigInt::from(15u32), BigInt::from(1000u32)); + let expected = GenericFraction::::new(BigUint::from(15u32), BigUint::from(1000u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); let input = " 1.5 E -2 "; - let expected = BigRational::new(BigInt::from(15u32), BigInt::from(1000u32)); + let expected = GenericFraction::::new(BigUint::from(15u32), BigUint::from(1000u32)); let actual = convert_scientific_notation_to_fraction(input).unwrap(); assert_eq!(expected, actual); @@ -203,24 +138,24 @@ pub fn is_scientific_number(s: &str) -> bool { /// Transforms a fractional number into a constant value. pub fn transform_fraction_to_constant_value( - fraction: BigRational, + fraction: GenericFraction, constant_exactness: ConstantExactness, ) -> Result<(Vec, Vec, Sign, ConstantExactness), DataError> { - let numerator = match fraction.numer().to_biguint() { + let numerator = match fraction.numer() { Some(numerator) => numerator.to_bytes_le(), None => return Err(DataError::custom("the numerator is too large")), }; - let denominator = match fraction.denom().to_biguint() { + let denominator = match fraction.denom() { Some(denominator) => denominator.to_bytes_le(), None => return Err(DataError::custom("the denominator is too large")), }; - let sign = match fraction.numer().sign() { - num_bigint::Sign::Plus => Sign::Positive, - num_bigint::Sign::Minus => Sign::Negative, - num_bigint::Sign::NoSign => { - return Err(DataError::custom("the numerator is zero")); + let sign = match fraction.sign() { + Some(fraction::Sign::Plus) => Sign::Positive, + Some(fraction::Sign::Minus) => Sign::Negative, + None => { + return Err(DataError::custom("the sign is not defined")); } }; @@ -238,17 +173,17 @@ pub fn transform_fraction_to_constant_value( pub fn convert_slices_to_fraction( numerator_strings: &[String], denominator_strings: &[String], -) -> Result { - let mut fraction = BigRational::new(BigInt::from(1u32), BigInt::from(1u32)); +) -> Result, DataError> { + let mut fraction = GenericFraction::new(BigUint::from(1u32), BigUint::from(1u32)); for numerator in numerator_strings { let num_fraction = convert_scientific_notation_to_fraction(numerator)?; - fraction *= num_fraction; + fraction = fraction * num_fraction; } for denominator in denominator_strings { let den_fraction = convert_scientific_notation_to_fraction(denominator)?; - fraction /= den_fraction; + fraction = fraction / den_fraction; } Ok(fraction) @@ -259,19 +194,19 @@ pub fn convert_slices_to_fraction( fn test_convert_array_of_strings_to_fraction() { let numerator = vec!["1".to_string()]; let denominator = vec!["2".to_string()]; - let expected = BigRational::new(BigInt::from(1u32), BigInt::from(2u32)); + let expected = GenericFraction::new(BigUint::from(1u32), BigUint::from(2u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E2".to_string()]; - let expected = BigRational::new(BigInt::from(2u32), BigInt::from(300u32)); + let expected = GenericFraction::new(BigUint::from(2u32), BigUint::from(300u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E-2".to_string()]; - let expected = BigRational::new(BigInt::from(200u32), BigInt::from(3u32)); + let expected = GenericFraction::new(BigUint::from(200u32), BigUint::from(3u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); @@ -282,13 +217,13 @@ fn test_convert_array_of_strings_to_fraction() { let numerator = vec!["1E2".to_string()]; let denominator = vec!["2".to_string()]; - let expected = BigRational::new(BigInt::from(50u32), BigInt::from(1u32)); + let expected = GenericFraction::new(BigUint::from(50u32), BigUint::from(1u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); let numerator = vec!["1E2".to_string(), "2".to_string()]; let denominator = vec!["3".to_string(), "1E2".to_string()]; - let expected = BigRational::new(BigInt::from(2u32), BigInt::from(3u32)); + let expected = GenericFraction::new(BigUint::from(2u32), BigUint::from(3u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); } diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index dc76aa80aab..f005d569e08 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -166,12 +166,11 @@ impl IterableDataProvider for crate::DatagenProvider { #[test] fn test_basic() { + use fraction::GenericFraction; use icu_locid::locale; use icu_provider::prelude::*; use icu_unitsconversion::provider::*; - use num_bigint::BigInt; - use num_rational::BigRational; - use num_traits::ToBytes; + use num_bigint::BigUint; let provider = crate::DatagenProvider::new_testing(); @@ -186,13 +185,14 @@ fn test_basic() { let constants = &und.get().to_owned().constants_map; let ft_to_m = constants.get("ft_to_m").unwrap(); - let expected_ft_to_m = BigRational::new(BigInt::from(3048u32), BigInt::from(10000u32)); + let expected_ft_to_m = + GenericFraction::::new(BigUint::from(3048u32), BigUint::from(10000u32)); assert_eq!( ft_to_m, zerovec::ule::encode_varule_to_box(&ConstantValue { - numerator: expected_ft_to_m.numer().to_le_bytes().into(), - denominator: expected_ft_to_m.denom().to_le_bytes().into(), + numerator: expected_ft_to_m.numer().unwrap().to_bytes_le().into(), + denominator: expected_ft_to_m.denom().unwrap().to_bytes_le().into(), sign: Sign::Positive, constant_exactness: ConstantExactness::Exact, }) @@ -200,14 +200,16 @@ fn test_basic() { ); let ft2_to_m2 = constants.get("ft2_to_m2").unwrap(); - let expected_ft2_to_m2 = - BigRational::new(BigInt::from(3048u32).pow(2), BigInt::from(10000u32).pow(2)); + let expected_ft2_to_m2 = GenericFraction::::new( + BigUint::from(3048u32).pow(2), + BigUint::from(10000u32).pow(2), + ); assert_eq!( ft2_to_m2, zerovec::ule::encode_varule_to_box(&ConstantValue { - numerator: expected_ft2_to_m2.numer().to_le_bytes().into(), - denominator: expected_ft2_to_m2.denom().to_le_bytes().into(), + numerator: expected_ft2_to_m2.numer().unwrap().to_bytes_le().into(), + denominator: expected_ft2_to_m2.denom().unwrap().to_bytes_le().into(), sign: Sign::Positive, constant_exactness: ConstantExactness::Exact, }) @@ -215,14 +217,16 @@ fn test_basic() { ); let ft3_to_m3 = constants.get("ft3_to_m3").unwrap(); - let expected_ft3_to_m3 = - BigRational::new(BigInt::from(3048u32).pow(3), BigInt::from(10000u32).pow(3)); + let expected_ft3_to_m3 = GenericFraction::::new( + BigUint::from(3048u32).pow(3), + BigUint::from(10000u32).pow(3), + ); assert_eq!( ft3_to_m3, zerovec::ule::encode_varule_to_box(&ConstantValue { - numerator: expected_ft3_to_m3.numer().to_le_bytes().into(), - denominator: expected_ft3_to_m3.denom().to_le_bytes().into(), + numerator: expected_ft3_to_m3.numer().unwrap().to_bytes_le().into(), + denominator: expected_ft3_to_m3.denom().unwrap().to_bytes_le().into(), sign: Sign::Positive, constant_exactness: ConstantExactness::Exact, }) diff --git a/tools/depcheck/src/allowlist.rs b/tools/depcheck/src/allowlist.rs index bbff8307bc7..77d083e9bda 100644 --- a/tools/depcheck/src/allowlist.rs +++ b/tools/depcheck/src/allowlist.rs @@ -142,6 +142,7 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "databake-derive", "elsa", "erased-serde", + "fraction", "icu_codepointtrie_builder", "itertools", "itoa", @@ -150,8 +151,6 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "num-bigint", "num-complex", "num-integer", - "num-rational", - "num-traits", "once_cell", "rawpointer", "regex-syntax", From 7324663ed9b5f88ae67a749371405597f0f0b720 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 10 Oct 2023 16:15:22 +0200 Subject: [PATCH 089/104] remove using vecs. --- .../datagen/src/transform/cldr/units/helpers.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 4044b83b72e..0175ce37408 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -22,14 +22,14 @@ use num_bigint::BigUint; pub fn convert_scientific_notation_to_fraction( number: &str, ) -> Result, DataError> { - let parts: Vec<&str> = number.split('E').collect(); - if parts.len() > 2 { + let mut parts = number.split('E'); + let base = parts.next().unwrap_or(&"1").trim(); + let exponent = parts.next().unwrap_or(&"0").trim(); + if parts.next().is_some() { return Err(DataError::custom( "the number is not a valid scientific notation number", )); } - let base = parts.get(0).unwrap_or(&"1").trim(); - let exponent = parts.get(1).unwrap_or(&"0").trim(); let base = GenericFraction::::from_str(base) .map_err(|_| DataError::custom("the base is not a valid decimal number"))?; @@ -125,14 +125,13 @@ fn test_contains_alphabetic_chars() { /// Checks if a string is a valid scientific notation number. /// Returns true if the string is a valid scientific notation number, false otherwise. pub fn is_scientific_number(s: &str) -> bool { - let parts: Vec<&str> = s.split('E').collect(); - if parts.len() > 2 { + let mut parts = s.split('E'); + let base = parts.next().unwrap_or("0"); + let exponent = parts.next().unwrap_or("0"); + if parts.next().is_some() { return false; } - let base = parts.first().unwrap_or(&"0"); - let exponent = parts.get(1).unwrap_or(&"0"); - !contains_alphabetic_chars(base) && !contains_alphabetic_chars(exponent) } From 78f03d5c16fd5c51ffd9f02931a457eb99494f9b Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 10 Oct 2023 16:29:12 +0200 Subject: [PATCH 090/104] fix clippy --- provider/datagen/src/transform/cldr/units/helpers.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index 0175ce37408..b7b3fbaa6f4 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -23,8 +23,8 @@ pub fn convert_scientific_notation_to_fraction( number: &str, ) -> Result, DataError> { let mut parts = number.split('E'); - let base = parts.next().unwrap_or(&"1").trim(); - let exponent = parts.next().unwrap_or(&"0").trim(); + let base = parts.next().unwrap_or("1").trim(); + let exponent = parts.next().unwrap_or("0").trim(); if parts.next().is_some() { return Err(DataError::custom( "the number is not a valid scientific notation number", @@ -177,12 +177,12 @@ pub fn convert_slices_to_fraction( for numerator in numerator_strings { let num_fraction = convert_scientific_notation_to_fraction(numerator)?; - fraction = fraction * num_fraction; + fraction *= num_fraction; } for denominator in denominator_strings { let den_fraction = convert_scientific_notation_to_fraction(denominator)?; - fraction = fraction / den_fraction; + fraction /= den_fraction; } Ok(fraction) From e18ba1208eb9f9f530bc18611ee6c1cade3f2265 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Tue, 10 Oct 2023 18:31:27 +0200 Subject: [PATCH 091/104] detect the loop --- .../datagen/src/transform/cldr/units/mod.rs | 158 ++++++++++-------- 1 file changed, 89 insertions(+), 69 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index f005d569e08..1d6cdb86ed2 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -7,7 +7,7 @@ pub mod helpers; use std::collections::BTreeMap; use self::helpers::{ - contains_alphabetic_chars, convert_constant_to_num_denom_strings, convert_slices_to_fraction, + convert_constant_to_num_denom_strings, convert_slices_to_fraction, transform_fraction_to_constant_value, }; use crate::transform::cldr::{cldr_serde, units::helpers::is_scientific_number}; @@ -32,6 +32,9 @@ impl DataProvider for crate::DatagenProvider { let mut constants_map_in_str_form = BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); + // Contains all the constants that are defined in terms of scientific numbers. + let mut clean_constants_map = + BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); for (cons_name, cons_value) in constants { let (num, den) = convert_constant_to_num_denom_strings(&cons_value.value)?; @@ -40,96 +43,113 @@ impl DataProvider for crate::DatagenProvider { _ => ConstantExactness::Exact, }; - constants_map_in_str_form.insert(cons_name, (num, den, constant_exactness)); - } + let mut clean_num = Vec::::new(); + let mut clean_den = Vec::::new(); + let mut replaceable_num = Vec::::new(); + let mut replaceable_den = Vec::::new(); - // TODO(#4100): Implement a more efficient algorithm for replacing constants with their values. - // This loop iterates over the constants and replaces any string values with their corresponding constant values. - // For example, if the constant "ft_to_m" has the value "0.3048", and the constant "ft2_to_m2" has the value "ft_to_m * ft_to_m", - // the maximum depth represents the maximum number of nested constants that can be replaced. - // If CLDR added more constants that are defined in terms of other constants, the maximum depth should be increased. - let maximum_depth = 10; - let mut has_internal_constants; - let mut max_depth_reached = 0; - while max_depth_reached < maximum_depth { - has_internal_constants = false; - max_depth_reached += 1; - let mut constants_with_constants_map_replaceable = - BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); - for (cons_name, (num, den, constant_exactness)) in constants_map_in_str_form.iter() { - let mut temp_num = Vec::::new(); - let mut temp_den = Vec::::new(); - let mut temp_constant_exactness = *constant_exactness; - - // Iterate over the numerator strings, - // and replace any strings that are keys in the constants map with their corresponding values. - for num_str in num { - if !contains_alphabetic_chars(num_str) || is_scientific_number(num_str) { - temp_num.push(num_str.clone()); - continue; - } - has_internal_constants = true; + for num_elem in num.iter() { + if is_scientific_number(num_elem) { + clean_num.push(num_elem.clone()); + continue; + } + replaceable_num.push(num_elem.clone()); + } + + for den_elem in den.iter() { + if is_scientific_number(den_elem) { + clean_den.push(den_elem.clone()); + continue; + } + replaceable_den.push(den_elem.clone()); + } + + constants_map_in_str_form.insert( + cons_name.as_str(), + (replaceable_num, replaceable_den, constant_exactness), + ); + + clean_constants_map.insert( + cons_name.as_str(), + (clean_num, clean_den, constant_exactness), + ); + } - if let Some((rnum, rden, rconstant_exactness)) = - constants_map_in_str_form.get(num_str.as_str()) + let mut updated = false; + loop { + for (key, elem) in constants_map_in_str_form.iter_mut() { + let (num_vec, den_vec, _) = elem; + for i in (0..num_vec.len()).rev() { + if let Some((clean_num, clean_den, clean_constant_exactness)) = + clean_constants_map.get(num_vec[i].as_str()).cloned() { - temp_num.extend(rnum.clone()); - temp_den.extend(rden.clone()); - if rconstant_exactness == &ConstantExactness::Approximate { - temp_constant_exactness = ConstantExactness::Approximate; + if clean_num.is_empty() || clean_den.is_empty() { + continue; } - } else { - temp_num.push(num_str.clone()); + let (add_to_num, add_to_den, add_to_exactness) = + clean_constants_map.get_mut(key).unwrap(); + num_vec.remove(i); + add_to_num.extend(clean_num); + add_to_den.extend(clean_den); + if clean_constant_exactness == ConstantExactness::Approximate { + *add_to_exactness = ConstantExactness::Approximate; + } + updated = true; } } - // Iterate over the denominator strings, - // and replace any strings that are keys in the constants map with their corresponding values. - for den_str in den { - if !contains_alphabetic_chars(den_str) || is_scientific_number(den_str) { - temp_den.push(den_str.clone()); - continue; - } - has_internal_constants = true; - - if let Some((rnum, rden, rconstant_exactness)) = - constants_map_in_str_form.get(den_str.as_str()) + for i in (0..den_vec.len()).rev() { + if let Some((clean_num, clean_den, clean_constant_exactness)) = + clean_constants_map.get(den_vec[i].as_str()).cloned() { - temp_num.extend(rden.clone()); - temp_den.extend(rnum.clone()); - - if rconstant_exactness == &ConstantExactness::Approximate { - temp_constant_exactness = *rconstant_exactness; + if clean_num.is_empty() || clean_den.is_empty() { + continue; } - } else { - temp_den.push(den_str.clone()); + let (add_to_num, add_to_den, add_to_exactness) = + clean_constants_map.get_mut(key).unwrap(); + den_vec.remove(i); + add_to_num.extend(clean_den); + add_to_den.extend(clean_num); + if clean_constant_exactness == ConstantExactness::Approximate { + *add_to_exactness = ConstantExactness::Approximate; + } + + updated = true; } } - - constants_with_constants_map_replaceable - .insert(cons_name, (temp_num, temp_den, temp_constant_exactness)); } - constants_map_in_str_form = constants_with_constants_map_replaceable; + if updated { + updated = false; + println!( + "Updated constants_map_in_str_form: {:?}", + constants_map_in_str_form + ); + continue; + } - if !has_internal_constants { - break; + // Verify that all vectors in constants_map_in_str_form are empty. + // If they are, we have successfully replaced all constants with their values. + // If not, return an error due to an infinite loop. + for (_, (num_vec, den_vec, _)) in constants_map_in_str_form.iter() { + if !num_vec.is_empty() || !den_vec.is_empty() { + return Err(DataError::custom( + "Infinite loop detected while replacing constants with their values.", + )); + } } - } - if max_depth_reached >= maximum_depth { - return Err(DataError::custom( - "Maximum depth reached while parsing constants. \ - This is likely due to a circular dependency in the constants. \ - Note: If the depth was increased, you may need to increase the maximum depth in the code.", - )); + break; } + // TODO(#4100): Implement a more efficient algorithm for replacing constants with their values. + // This loop iterates over the constants and replaces any string values with their corresponding constant values. + // Transforming the `constants_map_in_str_form` map into a ZeroMap of `ConstantValue`. // This is done by converting the numerator and denominator slices into a fraction, // and then transforming the fraction into a `ConstantValue`. let constants_map = ZeroMap::from_iter( - constants_map_in_str_form + clean_constants_map .into_iter() .map(|(cons_name, (num, den, constant_exactness))| { let value = convert_slices_to_fraction(&num, &den)?; From ebfe084eca433729348e6edf8374fa012ca222f7 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 11 Oct 2023 12:58:27 +0200 Subject: [PATCH 092/104] detect the loop 2 --- .../datagen/src/transform/cldr/units/mod.rs | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 1d6cdb86ed2..b12490a914a 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -75,8 +75,12 @@ impl DataProvider for crate::DatagenProvider { ); } + // TODO(#4100): Implement a more efficient algorithm for replacing constants with their values. + // This loop iterates over the constants and replaces any string values with their corresponding constant values. let mut updated = false; loop { + // TODO(#4100): remove this copy. + let constants_map_in_str_form_copy = constants_map_in_str_form.clone(); for (key, elem) in constants_map_in_str_form.iter_mut() { let (num_vec, den_vec, _) = elem; for i in (0..num_vec.len()).rev() { @@ -86,6 +90,13 @@ impl DataProvider for crate::DatagenProvider { if clean_num.is_empty() || clean_den.is_empty() { continue; } + if let Some((clean_num_from_str, clean_den_from_str, _)) = + constants_map_in_str_form_copy.get(num_vec[i].as_str()) + { + if !clean_num_from_str.is_empty() || !clean_den_from_str.is_empty() { + continue; + } + } let (add_to_num, add_to_den, add_to_exactness) = clean_constants_map.get_mut(key).unwrap(); num_vec.remove(i); @@ -105,6 +116,13 @@ impl DataProvider for crate::DatagenProvider { if clean_num.is_empty() || clean_den.is_empty() { continue; } + if let Some((clean_num_from_str, clean_den_from_str, _)) = + constants_map_in_str_form_copy.get(den_vec[i].as_str()) + { + if !clean_num_from_str.is_empty() || !clean_den_from_str.is_empty() { + continue; + } + } let (add_to_num, add_to_den, add_to_exactness) = clean_constants_map.get_mut(key).unwrap(); den_vec.remove(i); @@ -121,10 +139,6 @@ impl DataProvider for crate::DatagenProvider { if updated { updated = false; - println!( - "Updated constants_map_in_str_form: {:?}", - constants_map_in_str_form - ); continue; } @@ -142,9 +156,6 @@ impl DataProvider for crate::DatagenProvider { break; } - // TODO(#4100): Implement a more efficient algorithm for replacing constants with their values. - // This loop iterates over the constants and replaces any string values with their corresponding constant values. - // Transforming the `constants_map_in_str_form` map into a ZeroMap of `ConstantValue`. // This is done by converting the numerator and denominator slices into a fraction, // and then transforming the fraction into a `ConstantValue`. From a64515ffc6e5b698c1f3b35e59529bf5ce75bc5d Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 11 Oct 2023 15:15:51 +0200 Subject: [PATCH 093/104] fix the loop --- .../datagen/src/transform/cldr/units/mod.rs | 219 +++++++++--------- 1 file changed, 108 insertions(+), 111 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index b12490a914a..809cca67fdd 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -4,7 +4,7 @@ pub mod helpers; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, VecDeque}; use self::helpers::{ convert_constant_to_num_denom_strings, convert_slices_to_fraction, @@ -20,6 +20,49 @@ use icu_unitsconversion::provider::{ }; use zerovec::{ZeroMap, ZeroVec}; +#[derive(Debug)] +struct CleanAndDirtyConstant { + clean_num: Vec, + clean_den: Vec, + dirty_num: VecDeque, + dirty_den: VecDeque, + constant_exactness: ConstantExactness, +} + +impl CleanAndDirtyConstant { + fn new(num: &[String], den: &[String], exactness: ConstantExactness) -> Self { + let mut constant = CleanAndDirtyConstant { + clean_num: Vec::new(), + clean_den: Vec::new(), + dirty_num: VecDeque::new(), + dirty_den: VecDeque::new(), + constant_exactness: exactness, + }; + + for n in num { + if is_scientific_number(n) { + constant.clean_num.push(n.clone()); + } else { + constant.dirty_num.push_back(n.clone()); + } + } + + for d in den { + if is_scientific_number(d) { + constant.clean_den.push(d.clone()); + } else { + constant.dirty_den.push_back(d.clone()); + } + } + constant + } + + /// Determines if the constant is free of any dirty elements. + fn is_clean(&self) -> bool { + self.dirty_num.is_empty() && self.dirty_den.is_empty() + } +} + impl DataProvider for crate::DatagenProvider { fn load(&self, _req: DataRequest) -> Result, DataError> { self.check_req::(_req)?; @@ -30,11 +73,10 @@ impl DataProvider for crate::DatagenProvider { .read_and_parse("supplemental/units.json")?; let constants = &units_data.supplemental.unit_constants.constants; - let mut constants_map_in_str_form = - BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); - // Contains all the constants that are defined in terms of scientific numbers. - let mut clean_constants_map = - BTreeMap::<&str, (Vec, Vec, ConstantExactness)>::new(); + let mut dirty_constants_queue = VecDeque::<(&str, CleanAndDirtyConstant)>::new(); + + // Contains all the constants that do not have any dirty data. I.E. dirty_num and dirty_den are empty. + let mut clean_constants_map = BTreeMap::<&str, CleanAndDirtyConstant>::new(); for (cons_name, cons_value) in constants { let (num, den) = convert_constant_to_num_denom_strings(&cons_value.value)?; @@ -43,129 +85,84 @@ impl DataProvider for crate::DatagenProvider { _ => ConstantExactness::Exact, }; - let mut clean_num = Vec::::new(); - let mut clean_den = Vec::::new(); - let mut replaceable_num = Vec::::new(); - let mut replaceable_den = Vec::::new(); + let constant = CleanAndDirtyConstant::new(&num, &den, constant_exactness); - for num_elem in num.iter() { - if is_scientific_number(num_elem) { - clean_num.push(num_elem.clone()); - continue; - } - replaceable_num.push(num_elem.clone()); - } - - for den_elem in den.iter() { - if is_scientific_number(den_elem) { - clean_den.push(den_elem.clone()); - continue; - } - replaceable_den.push(den_elem.clone()); + if constant.is_clean() { + clean_constants_map.insert(&cons_name, constant); + } else { + dirty_constants_queue.push_back((&cons_name, constant)); } - - constants_map_in_str_form.insert( - cons_name.as_str(), - (replaceable_num, replaceable_den, constant_exactness), - ); - - clean_constants_map.insert( - cons_name.as_str(), - (clean_num, clean_den, constant_exactness), - ); } - // TODO(#4100): Implement a more efficient algorithm for replacing constants with their values. - // This loop iterates over the constants and replaces any string values with their corresponding constant values. - let mut updated = false; - loop { - // TODO(#4100): remove this copy. - let constants_map_in_str_form_copy = constants_map_in_str_form.clone(); - for (key, elem) in constants_map_in_str_form.iter_mut() { - let (num_vec, den_vec, _) = elem; - for i in (0..num_vec.len()).rev() { - if let Some((clean_num, clean_den, clean_constant_exactness)) = - clean_constants_map.get(num_vec[i].as_str()).cloned() - { - if clean_num.is_empty() || clean_den.is_empty() { - continue; - } - if let Some((clean_num_from_str, clean_den_from_str, _)) = - constants_map_in_str_form_copy.get(num_vec[i].as_str()) - { - if !clean_num_from_str.is_empty() || !clean_den_from_str.is_empty() { - continue; - } - } - let (add_to_num, add_to_den, add_to_exactness) = - clean_constants_map.get_mut(key).unwrap(); - num_vec.remove(i); - add_to_num.extend(clean_num); - add_to_den.extend(clean_den); - if clean_constant_exactness == ConstantExactness::Approximate { - *add_to_exactness = ConstantExactness::Approximate; - } - updated = true; - } + // Replacing dirty constants with their corresponding clean value. + while !dirty_constants_queue.is_empty() { + let (constant_key, mut dirty_constant) = dirty_constants_queue + .pop_front() + .ok_or(DataError::custom("dirty queue defect"))?; + + for _ in 0..dirty_constant.dirty_num.len() { + let num = dirty_constant + .dirty_num + .pop_front() + .ok_or(DataError::custom("dirty queue defect"))?; + + if let Some(clean_constant) = clean_constants_map.get(num.as_str()) { + dirty_constant + .clean_num + .extend(clean_constant.clean_num.clone().into_iter()); + dirty_constant + .clean_den + .extend(clean_constant.clean_den.clone().into_iter()); + } else { + dirty_constant.dirty_num.push_back(num); } - - for i in (0..den_vec.len()).rev() { - if let Some((clean_num, clean_den, clean_constant_exactness)) = - clean_constants_map.get(den_vec[i].as_str()).cloned() - { - if clean_num.is_empty() || clean_den.is_empty() { - continue; - } - if let Some((clean_num_from_str, clean_den_from_str, _)) = - constants_map_in_str_form_copy.get(den_vec[i].as_str()) - { - if !clean_num_from_str.is_empty() || !clean_den_from_str.is_empty() { - continue; - } - } - let (add_to_num, add_to_den, add_to_exactness) = - clean_constants_map.get_mut(key).unwrap(); - den_vec.remove(i); - add_to_num.extend(clean_den); - add_to_den.extend(clean_num); - if clean_constant_exactness == ConstantExactness::Approximate { - *add_to_exactness = ConstantExactness::Approximate; - } - - updated = true; - } - } - } - - if updated { - updated = false; - continue; } - // Verify that all vectors in constants_map_in_str_form are empty. - // If they are, we have successfully replaced all constants with their values. - // If not, return an error due to an infinite loop. - for (_, (num_vec, den_vec, _)) in constants_map_in_str_form.iter() { - if !num_vec.is_empty() || !den_vec.is_empty() { - return Err(DataError::custom( - "Infinite loop detected while replacing constants with their values.", - )); + for _ in 0..dirty_constant.dirty_den.len() { + let den = dirty_constant + .dirty_den + .pop_front() + .ok_or(DataError::custom("dirty queue defect"))?; + + if let Some(clean_constant) = clean_constants_map.get(den.as_str()) { + dirty_constant + .clean_num + .extend(clean_constant.clean_den.clone().into_iter()); + dirty_constant + .clean_den + .extend(clean_constant.clean_num.clone().into_iter()); + } else { + dirty_constant.dirty_den.push_back(den); } } - break; + if dirty_constant.is_clean() { + clean_constants_map.insert(constant_key, dirty_constant); + } else { + dirty_constants_queue.push_back((constant_key, dirty_constant)); + } } - // Transforming the `constants_map_in_str_form` map into a ZeroMap of `ConstantValue`. // This is done by converting the numerator and denominator slices into a fraction, // and then transforming the fraction into a `ConstantValue`. let constants_map = ZeroMap::from_iter( clean_constants_map .into_iter() - .map(|(cons_name, (num, den, constant_exactness))| { - let value = convert_slices_to_fraction(&num, &den)?; + .map(|(cons_name, constant)| { + let value = convert_slices_to_fraction( + &constant + .clean_num + .iter() + .map(|s| s.to_string()) + .collect::>(), + &constant + .clean_den + .iter() + .map(|s| s.to_string()) + .collect::>(), + )?; let (num, den, sign, cons_type) = - transform_fraction_to_constant_value(value, constant_exactness)?; + transform_fraction_to_constant_value(value, constant.constant_exactness)?; Ok(( cons_name, From 1ca05fef5fbaafe137c09a5992046a48f6610e74 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 11 Oct 2023 15:58:49 +0200 Subject: [PATCH 094/104] detect the loop 3 --- .../datagen/src/transform/cldr/units/mod.rs | 66 +++++++++++-------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 809cca67fdd..5d23024a66c 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -95,44 +95,44 @@ impl DataProvider for crate::DatagenProvider { } // Replacing dirty constants with their corresponding clean value. + let mut count = 0; while !dirty_constants_queue.is_empty() { + let mut updated = false; let (constant_key, mut dirty_constant) = dirty_constants_queue .pop_front() .ok_or(DataError::custom("dirty queue defect"))?; for _ in 0..dirty_constant.dirty_num.len() { - let num = dirty_constant - .dirty_num - .pop_front() - .ok_or(DataError::custom("dirty queue defect"))?; - - if let Some(clean_constant) = clean_constants_map.get(num.as_str()) { - dirty_constant - .clean_num - .extend(clean_constant.clean_num.clone().into_iter()); - dirty_constant - .clean_den - .extend(clean_constant.clean_den.clone().into_iter()); - } else { - dirty_constant.dirty_num.push_back(num); + if let Some(num) = dirty_constant.dirty_num.pop_front() { + if let Some(clean_constant) = clean_constants_map.get(num.as_str()) { + dirty_constant + .clean_num + .extend(clean_constant.clean_num.clone()); + dirty_constant + .clean_den + .extend(clean_constant.clean_den.clone()); + + updated = true; + } else { + dirty_constant.dirty_num.push_back(num); + } } } for _ in 0..dirty_constant.dirty_den.len() { - let den = dirty_constant - .dirty_den - .pop_front() - .ok_or(DataError::custom("dirty queue defect"))?; - - if let Some(clean_constant) = clean_constants_map.get(den.as_str()) { - dirty_constant - .clean_num - .extend(clean_constant.clean_den.clone().into_iter()); - dirty_constant - .clean_den - .extend(clean_constant.clean_num.clone().into_iter()); - } else { - dirty_constant.dirty_den.push_back(den); + if let Some(den) = dirty_constant.dirty_den.pop_front() { + if let Some(clean_constant) = clean_constants_map.get(den.as_str()) { + dirty_constant + .clean_num + .extend(clean_constant.clean_den.clone()); + dirty_constant + .clean_den + .extend(clean_constant.clean_num.clone()); + + updated = true; + } else { + dirty_constant.dirty_den.push_back(den); + } } } @@ -141,8 +141,16 @@ impl DataProvider for crate::DatagenProvider { } else { dirty_constants_queue.push_back((constant_key, dirty_constant)); } + + count = if !updated { count + 1 } else { 0 }; + if count > dirty_constants_queue.len() { + return Err(DataError::custom( + "An Infinite loop was detected in the CLDR constants data!", + )); + } } - // Transforming the `constants_map_in_str_form` map into a ZeroMap of `ConstantValue`. + + // Transforming the `clean_constants_map` map into a ZeroMap of `ConstantValue`. // This is done by converting the numerator and denominator slices into a fraction, // and then transforming the fraction into a `ConstantValue`. let constants_map = ZeroMap::from_iter( From 36380c35338f95dade88f5544c889faa20a6571b Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 11 Oct 2023 16:14:11 +0200 Subject: [PATCH 095/104] fix naming --- .../datagen/src/transform/cldr/units/mod.rs | 85 ++++++++++--------- 1 file changed, 46 insertions(+), 39 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 5d23024a66c..bb58e850765 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -20,22 +20,27 @@ use icu_unitsconversion::provider::{ }; use zerovec::{ZeroMap, ZeroVec}; +/// Represents a general constant which contains scientific and non scientific numbers. #[derive(Debug)] -struct CleanAndDirtyConstant { +struct GeneralConstant { + /// Contains numerator terms that are represented as scientific numbers clean_num: Vec, + /// Contains denominator terms that are represented as scientific numbers clean_den: Vec, - dirty_num: VecDeque, - dirty_den: VecDeque, + /// Contains numerator terms that are not represented as scientific numbers + non_scientific_num: VecDeque, + /// Contains denominator terms that are not represented as scientific numbers + non_scientific_den: VecDeque, constant_exactness: ConstantExactness, } -impl CleanAndDirtyConstant { +impl GeneralConstant { fn new(num: &[String], den: &[String], exactness: ConstantExactness) -> Self { - let mut constant = CleanAndDirtyConstant { + let mut constant = GeneralConstant { clean_num: Vec::new(), clean_den: Vec::new(), - dirty_num: VecDeque::new(), - dirty_den: VecDeque::new(), + non_scientific_num: VecDeque::new(), + non_scientific_den: VecDeque::new(), constant_exactness: exactness, }; @@ -43,7 +48,7 @@ impl CleanAndDirtyConstant { if is_scientific_number(n) { constant.clean_num.push(n.clone()); } else { - constant.dirty_num.push_back(n.clone()); + constant.non_scientific_num.push_back(n.clone()); } } @@ -51,15 +56,15 @@ impl CleanAndDirtyConstant { if is_scientific_number(d) { constant.clean_den.push(d.clone()); } else { - constant.dirty_den.push_back(d.clone()); + constant.non_scientific_den.push_back(d.clone()); } } constant } - /// Determines if the constant is free of any dirty elements. - fn is_clean(&self) -> bool { - self.dirty_num.is_empty() && self.dirty_den.is_empty() + /// Determines if the constant is free of any non_scientific elements. + fn is_free_of_non_scientific(&self) -> bool { + self.non_scientific_num.is_empty() && self.non_scientific_den.is_empty() } } @@ -73,10 +78,10 @@ impl DataProvider for crate::DatagenProvider { .read_and_parse("supplemental/units.json")?; let constants = &units_data.supplemental.unit_constants.constants; - let mut dirty_constants_queue = VecDeque::<(&str, CleanAndDirtyConstant)>::new(); + let mut constants_with_non_scientific = VecDeque::<(&str, GeneralConstant)>::new(); - // Contains all the constants that do not have any dirty data. I.E. dirty_num and dirty_den are empty. - let mut clean_constants_map = BTreeMap::<&str, CleanAndDirtyConstant>::new(); + // Contains all the constants that do not have any non-scientific numbers. I.E., non_scientific_num and non_scientific_den are empty. + let mut clean_constants_map = BTreeMap::<&str, GeneralConstant>::new(); for (cons_name, cons_value) in constants { let (num, den) = convert_constant_to_num_denom_strings(&cons_value.value)?; @@ -85,74 +90,76 @@ impl DataProvider for crate::DatagenProvider { _ => ConstantExactness::Exact, }; - let constant = CleanAndDirtyConstant::new(&num, &den, constant_exactness); + let constant = GeneralConstant::new(&num, &den, constant_exactness); - if constant.is_clean() { + if constant.is_free_of_non_scientific() { clean_constants_map.insert(&cons_name, constant); } else { - dirty_constants_queue.push_back((&cons_name, constant)); + constants_with_non_scientific.push_back((&cons_name, constant)); } } - // Replacing dirty constants with their corresponding clean value. + // Replacing non scientific constant terms with their corresponding clean value. let mut count = 0; - while !dirty_constants_queue.is_empty() { + while !constants_with_non_scientific.is_empty() { let mut updated = false; - let (constant_key, mut dirty_constant) = dirty_constants_queue + let (constant_key, mut non_scientific_constant) = constants_with_non_scientific .pop_front() - .ok_or(DataError::custom("dirty queue defect"))?; + .ok_or(DataError::custom( + "non scientific queue error: an element must exist", + ))?; - for _ in 0..dirty_constant.dirty_num.len() { - if let Some(num) = dirty_constant.dirty_num.pop_front() { + for _ in 0..non_scientific_constant.non_scientific_num.len() { + if let Some(num) = non_scientific_constant.non_scientific_num.pop_front() { if let Some(clean_constant) = clean_constants_map.get(num.as_str()) { - dirty_constant + non_scientific_constant .clean_num .extend(clean_constant.clean_num.clone()); - dirty_constant + non_scientific_constant .clean_den .extend(clean_constant.clean_den.clone()); updated = true; } else { - dirty_constant.dirty_num.push_back(num); + non_scientific_constant.non_scientific_num.push_back(num); } } } - for _ in 0..dirty_constant.dirty_den.len() { - if let Some(den) = dirty_constant.dirty_den.pop_front() { + for _ in 0..non_scientific_constant.non_scientific_den.len() { + if let Some(den) = non_scientific_constant.non_scientific_den.pop_front() { if let Some(clean_constant) = clean_constants_map.get(den.as_str()) { - dirty_constant + non_scientific_constant .clean_num .extend(clean_constant.clean_den.clone()); - dirty_constant + non_scientific_constant .clean_den .extend(clean_constant.clean_num.clone()); updated = true; } else { - dirty_constant.dirty_den.push_back(den); + non_scientific_constant.non_scientific_den.push_back(den); } } } - if dirty_constant.is_clean() { - clean_constants_map.insert(constant_key, dirty_constant); + if non_scientific_constant.is_free_of_non_scientific() { + clean_constants_map.insert(constant_key, non_scientific_constant); } else { - dirty_constants_queue.push_back((constant_key, dirty_constant)); + constants_with_non_scientific.push_back((constant_key, non_scientific_constant)); } count = if !updated { count + 1 } else { 0 }; - if count > dirty_constants_queue.len() { + if count > constants_with_non_scientific.len() { return Err(DataError::custom( "An Infinite loop was detected in the CLDR constants data!", )); } } - // Transforming the `clean_constants_map` map into a ZeroMap of `ConstantValue`. - // This is done by converting the numerator and denominator slices into a fraction, - // and then transforming the fraction into a `ConstantValue`. + // Convert `clean_constants_map` into a ZeroMap of `ConstantValue`. + // This involves transforming the numerator and denominator slices into a fraction, + // and subsequently converting the fraction into a `ConstantValue`. let constants_map = ZeroMap::from_iter( clean_constants_map .into_iter() From 832aeeb1eae4820c6a877273f023b925f138f8ab Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 11 Oct 2023 16:20:44 +0200 Subject: [PATCH 096/104] fix clippy --- provider/datagen/src/transform/cldr/units/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index bb58e850765..d8036216e46 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -93,7 +93,7 @@ impl DataProvider for crate::DatagenProvider { let constant = GeneralConstant::new(&num, &den, constant_exactness); if constant.is_free_of_non_scientific() { - clean_constants_map.insert(&cons_name, constant); + clean_constants_map.insert(cons_name, constant); } else { constants_with_non_scientific.push_back((&cons_name, constant)); } From a5aa753ae5de62295ea9e08a605f99e416ccf1ab Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 11 Oct 2023 16:23:03 +0200 Subject: [PATCH 097/104] fix tidy --- tools/depcheck/src/allowlist.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/depcheck/src/allowlist.rs b/tools/depcheck/src/allowlist.rs index 77d083e9bda..83889a6fb30 100644 --- a/tools/depcheck/src/allowlist.rs +++ b/tools/depcheck/src/allowlist.rs @@ -148,9 +148,13 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "itoa", "matrixmultiply", "ndarray", + "num", "num-bigint", "num-complex", "num-integer", + "num-iter", + "num-rational", + "num-traits", "once_cell", "rawpointer", "regex-syntax", From bb6b78695796d974b0e267ef3c24f73fc19b5b14 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Wed, 11 Oct 2023 16:32:30 +0200 Subject: [PATCH 098/104] fix comment --- provider/datagen/src/transform/cldr/units/mod.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index d8036216e46..32d067af7ed 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -243,10 +243,7 @@ fn test_basic() { ); let ft2_to_m2 = constants.get("ft2_to_m2").unwrap(); - let expected_ft2_to_m2 = GenericFraction::::new( - BigUint::from(3048u32).pow(2), - BigUint::from(10000u32).pow(2), - ); + let expected_ft2_to_m2 = &expected_ft_to_m * &expected_ft_to_m; assert_eq!( ft2_to_m2, @@ -260,10 +257,7 @@ fn test_basic() { ); let ft3_to_m3 = constants.get("ft3_to_m3").unwrap(); - let expected_ft3_to_m3 = GenericFraction::::new( - BigUint::from(3048u32).pow(3), - BigUint::from(10000u32).pow(3), - ); + let expected_ft3_to_m3 = &expected_ft2_to_m2 * &expected_ft_to_m; assert_eq!( ft3_to_m3, From afb8d8f0aa0a59a34b14b28fc878a593ba1dcbef Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 16 Oct 2023 14:02:32 +0200 Subject: [PATCH 099/104] use &str instead --- .../src/transform/cldr/units/helpers.rs | 28 +++++++++---------- .../datagen/src/transform/cldr/units/mod.rs | 8 +++--- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/helpers.rs b/provider/datagen/src/transform/cldr/units/helpers.rs index b7b3fbaa6f4..1229454ed5c 100644 --- a/provider/datagen/src/transform/cldr/units/helpers.rs +++ b/provider/datagen/src/transform/cldr/units/helpers.rs @@ -170,8 +170,8 @@ pub fn transform_fraction_to_constant_value( /// - ["1E2"], ["2"] is converted to 1E2/2 --> 100/2 --> 50/1 /// - ["1E2", "2"], ["3", "1E2"] is converted to 1E2*2/(3*1E2) --> 2/3 pub fn convert_slices_to_fraction( - numerator_strings: &[String], - denominator_strings: &[String], + numerator_strings: &[&str], + denominator_strings: &[&str], ) -> Result, DataError> { let mut fraction = GenericFraction::new(BigUint::from(1u32), BigUint::from(1u32)); @@ -191,37 +191,37 @@ pub fn convert_slices_to_fraction( // TODO: move some of these tests to the comment above. #[test] fn test_convert_array_of_strings_to_fraction() { - let numerator = vec!["1".to_string()]; - let denominator = vec!["2".to_string()]; + let numerator: Vec<&str> = vec!["1"]; + let denominator: Vec<&str> = vec!["2"]; let expected = GenericFraction::new(BigUint::from(1u32), BigUint::from(2u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); - let numerator = vec!["1".to_string(), "2".to_string()]; - let denominator = vec!["3".to_string(), "1E2".to_string()]; + let numerator = vec!["1", "2"]; + let denominator = vec!["3", "1E2"]; let expected = GenericFraction::new(BigUint::from(2u32), BigUint::from(300u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); - let numerator = vec!["1".to_string(), "2".to_string()]; - let denominator = vec!["3".to_string(), "1E-2".to_string()]; + let numerator = vec!["1", "2"]; + let denominator = vec!["3", "1E-2"]; let expected = GenericFraction::new(BigUint::from(200u32), BigUint::from(3u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); - let numerator = vec!["1".to_string(), "2".to_string()]; - let denominator = vec!["3".to_string(), "1E-2.5".to_string()]; + let numerator = vec!["1", "2"]; + let denominator = vec!["3", "1E-2.5"]; let actual = convert_slices_to_fraction(&numerator, &denominator); assert!(actual.is_err()); - let numerator = vec!["1E2".to_string()]; - let denominator = vec!["2".to_string()]; + let numerator = vec!["1E2"]; + let denominator = vec!["2"]; let expected = GenericFraction::new(BigUint::from(50u32), BigUint::from(1u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); - let numerator = vec!["1E2".to_string(), "2".to_string()]; - let denominator = vec!["3".to_string(), "1E2".to_string()]; + let numerator = vec!["1E2", "2"]; + let denominator = vec!["3", "1E2"]; let expected = GenericFraction::new(BigUint::from(2u32), BigUint::from(3u32)); let actual = convert_slices_to_fraction(&numerator, &denominator).unwrap(); assert_eq!(expected, actual); diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 32d067af7ed..0019da21fca 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -168,13 +168,13 @@ impl DataProvider for crate::DatagenProvider { &constant .clean_num .iter() - .map(|s| s.to_string()) - .collect::>(), + .map(|s| s.as_str()) + .collect::>(), &constant .clean_den .iter() - .map(|s| s.to_string()) - .collect::>(), + .map(|s| s.as_str()) + .collect::>(), )?; let (num, den, sign, cons_type) = transform_fraction_to_constant_value(value, constant.constant_exactness)?; From ba1dde24be8120eee60a576a4d0a77fea29d4500 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 16 Oct 2023 14:09:31 +0200 Subject: [PATCH 100/104] fix build after merge. --- provider/datagen/tests/data/postcard/fingerprints.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index b9a776835ef..359916aaa0f 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -2079,4 +2079,4 @@ transliterator/rules@1, und+und-t-und-d0-test-m0-niels-s0-test, 1769B, 45400449c transliterator/rules@1, und+und-t-und-d0-test-m0-rectesta-s0-test, 369B, 69c41d4b5c828833 transliterator/rules@1, und+und-t-und-d0-test-m0-rectestr-s0-test, 237B, 3345ed066cbb729f transliterator/rules@1, und+und-t-und-latn-d0-ascii, 27083B, 5098d1af741181a3 -units/constants@1, und, 426B, e0c7eeb9e702371c +units/constants@1, und, 555B, b463e4109a02b639 From f5e2e7964bf3823076598bf779293e2d0a333403 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 16 Oct 2023 14:27:18 +0200 Subject: [PATCH 101/104] rename count --- provider/datagen/src/transform/cldr/units/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 0019da21fca..71638d5f5d8 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -100,7 +100,7 @@ impl DataProvider for crate::DatagenProvider { } // Replacing non scientific constant terms with their corresponding clean value. - let mut count = 0; + let mut no_update_count = 0; while !constants_with_non_scientific.is_empty() { let mut updated = false; let (constant_key, mut non_scientific_constant) = constants_with_non_scientific @@ -149,8 +149,8 @@ impl DataProvider for crate::DatagenProvider { constants_with_non_scientific.push_back((constant_key, non_scientific_constant)); } - count = if !updated { count + 1 } else { 0 }; - if count > constants_with_non_scientific.len() { + no_update_count = if !updated { no_update_count + 1 } else { 0 }; + if no_update_count > constants_with_non_scientific.len() { return Err(DataError::custom( "An Infinite loop was detected in the CLDR constants data!", )); From 61796c6ce6b1960a5775bc860db0b08c859aae9e Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 16 Oct 2023 14:38:17 +0200 Subject: [PATCH 102/104] Update provider/datagen/src/transform/cldr/units/mod.rs Co-authored-by: Robert Bastian <4706271+robertbastian@users.noreply.github.com> --- provider/datagen/src/transform/cldr/units/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/provider/datagen/src/transform/cldr/units/mod.rs b/provider/datagen/src/transform/cldr/units/mod.rs index 71638d5f5d8..0f0a77e4e34 100644 --- a/provider/datagen/src/transform/cldr/units/mod.rs +++ b/provider/datagen/src/transform/cldr/units/mod.rs @@ -152,7 +152,7 @@ impl DataProvider for crate::DatagenProvider { no_update_count = if !updated { no_update_count + 1 } else { 0 }; if no_update_count > constants_with_non_scientific.len() { return Err(DataError::custom( - "An Infinite loop was detected in the CLDR constants data!", + "A loop was detected in the CLDR constants data!", )); } } From 4a6e2bbb4fb5dd599a741c4e006e6ebff63c3086 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 16 Oct 2023 14:42:50 +0200 Subject: [PATCH 103/104] remove "num" --- tools/depcheck/src/allowlist.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/depcheck/src/allowlist.rs b/tools/depcheck/src/allowlist.rs index 83889a6fb30..e933d34927b 100644 --- a/tools/depcheck/src/allowlist.rs +++ b/tools/depcheck/src/allowlist.rs @@ -148,7 +148,6 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "itoa", "matrixmultiply", "ndarray", - "num", "num-bigint", "num-complex", "num-integer", From 70f48c7582b8567017a19d152b03b2b1abc2d681 Mon Sep 17 00:00:00 2001 From: Younies Mahmoud Date: Mon, 16 Oct 2023 14:47:11 +0200 Subject: [PATCH 104/104] add num --- tools/depcheck/src/allowlist.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/depcheck/src/allowlist.rs b/tools/depcheck/src/allowlist.rs index e933d34927b..83889a6fb30 100644 --- a/tools/depcheck/src/allowlist.rs +++ b/tools/depcheck/src/allowlist.rs @@ -148,6 +148,7 @@ pub const EXTRA_DATAGEN_DEPS: &[&str] = &[ "itoa", "matrixmultiply", "ndarray", + "num", "num-bigint", "num-complex", "num-integer",