From 1f209c39598fa3adb567ff9f9de76769aa6ac9f4 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 10 May 2021 21:56:27 +0100 Subject: [PATCH 01/15] faster parsing when not possible to overflow --- core/src/num/mod.rs | 85 ++++++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/core/src/num/mod.rs b/core/src/num/mod.rs index dca8ffa4e..5fd697fd4 100644 --- a/core/src/num/mod.rs +++ b/core/src/num/mod.rs @@ -970,12 +970,14 @@ pub enum FpCategory { #[doc(hidden)] trait FromStrRadixHelper: PartialOrd + Copy { - fn min_value() -> Self; - fn max_value() -> Self; + const MIN: Self; fn from_u32(u: u32) -> Self; fn checked_mul(&self, other: u32) -> Option; fn checked_sub(&self, other: u32) -> Option; fn checked_add(&self, other: u32) -> Option; + unsafe fn unchecked_mul(&self, other: u32) -> Self; + unsafe fn unchecked_sub(&self, other: u32) -> Self; + unsafe fn unchecked_add(&self, other: u32) -> Self; } macro_rules! from_str_radix_int_impl { @@ -993,10 +995,7 @@ from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 } macro_rules! doit { ($($t:ty)*) => ($(impl FromStrRadixHelper for $t { - #[inline] - fn min_value() -> Self { Self::MIN } - #[inline] - fn max_value() -> Self { Self::MAX } + const MIN: Self = Self::MIN; #[inline] fn from_u32(u: u32) -> Self { u as Self } #[inline] @@ -1011,6 +1010,27 @@ macro_rules! doit { fn checked_add(&self, other: u32) -> Option { Self::checked_add(*self, other as Self) } + #[inline] + unsafe fn unchecked_mul(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_mul` must be upheld by the caller. + unsafe { + Self::unchecked_mul(*self, other as Self) + } + } + #[inline] + unsafe fn unchecked_sub(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_sub` must be upheld by the caller. + unsafe { + Self::unchecked_sub(*self, other as Self) + } + } + #[inline] + unsafe fn unchecked_add(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_add` must be upheld by the caller. + unsafe { + Self::unchecked_add(*self, other as Self) + } + } })*) } doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize } @@ -1029,7 +1049,7 @@ fn from_str_radix(src: &str, radix: u32) -> Result T::min_value(); + let is_signed_ty = T::from_u32(0) > T::MIN; // all valid digits are ascii, so we will just iterate over the utf8 bytes // and cast them to chars. .to_digit() will safely return None for anything @@ -1047,37 +1067,32 @@ fn from_str_radix(src: &str, radix: u32) -> Result x, - None => return Err(PIE { kind: InvalidDigit }), - }; - result = match result.checked_mul(radix) { - Some(result) => result, - None => return Err(PIE { kind: PosOverflow }), - }; - result = match result.checked_add(x) { - Some(result) => result, - None => return Err(PIE { kind: PosOverflow }), - }; + + if radix <= 16 && digits.len() <= mem::size_of::() * 2 - is_signed_ty as usize { + // SAFETY: Consider the highest radix of 16: + // `u8::MAX` is `ff` (2 characters), `u16::MAX` is `ffff` (4 characters) + // We can be sure that any src len of 2 would fit in a u8 so we don't need + // to check for overflow. + unsafe { + let unchecked_additive_op = + if is_positive { T::unchecked_add } else { T::unchecked_sub }; + + for &c in digits { + result = result.unchecked_mul(radix); + let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; + result = unchecked_additive_op(&result, x); + } } } else { - // The number is negative + let additive_op = if is_positive { T::checked_add } else { T::checked_sub }; + let overflow_err = || PIE { kind: if is_positive { PosOverflow } else { NegOverflow } }; + for &c in digits { - let x = match (c as char).to_digit(radix) { - Some(x) => x, - None => return Err(PIE { kind: InvalidDigit }), - }; - result = match result.checked_mul(radix) { - Some(result) => result, - None => return Err(PIE { kind: NegOverflow }), - }; - result = match result.checked_sub(x) { - Some(result) => result, - None => return Err(PIE { kind: NegOverflow }), - }; + let mul = result.checked_mul(radix); + let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; + // multiply done early for performance reasons. + result = mul.ok_or_else(overflow_err)?; + result = additive_op(&result, x).ok_or_else(overflow_err)?; } } Ok(result) From 74ca9ac3e5a83cb7769b124539966327b4623ec9 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 18 May 2021 08:51:20 +0100 Subject: [PATCH 02/15] add likely and clearer comments --- core/src/num/mod.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/core/src/num/mod.rs b/core/src/num/mod.rs index 5fd697fd4..194193348 100644 --- a/core/src/num/mod.rs +++ b/core/src/num/mod.rs @@ -1068,8 +1068,13 @@ fn from_str_radix(src: &str, radix: u32) -> Result() * 2 - is_signed_ty as usize { - // SAFETY: Consider the highest radix of 16: + if intrinsics::likely( + radix <= 16 && digits.len() <= mem::size_of::() * 2 - is_signed_ty as usize, + ) { + // SAFETY: We can take this fast path when `radix.pow(digits.len()) - 1 <= T::MAX` + // but the condition above is a faster (conservative) approximation of this. + // + // Consider the highest radix of 16: // `u8::MAX` is `ff` (2 characters), `u16::MAX` is `ffff` (4 characters) // We can be sure that any src len of 2 would fit in a u8 so we don't need // to check for overflow. @@ -1088,9 +1093,14 @@ fn from_str_radix(src: &str, radix: u32) -> Result Date: Tue, 18 May 2021 18:22:04 +0100 Subject: [PATCH 03/15] Update library/core/src/num/mod.rs Co-authored-by: LingMan --- core/src/num/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/num/mod.rs b/core/src/num/mod.rs index 194193348..a833f4025 100644 --- a/core/src/num/mod.rs +++ b/core/src/num/mod.rs @@ -1094,8 +1094,8 @@ fn from_str_radix(src: &str, radix: u32) -> Result Date: Tue, 18 May 2021 18:22:21 +0100 Subject: [PATCH 04/15] Update library/core/src/num/mod.rs Co-authored-by: LingMan --- core/src/num/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/num/mod.rs b/core/src/num/mod.rs index a833f4025..9cd278e75 100644 --- a/core/src/num/mod.rs +++ b/core/src/num/mod.rs @@ -1097,8 +1097,8 @@ fn from_str_radix(src: &str, radix: u32) -> Result