rust-lang · bors · Apr 12, 2022 · May 10, 2021 · May 18, 2021 · May 18, 2021
diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs
@@ -5,6 +5,7 @@
 use crate::ascii;
 use crate::intrinsics;
 use crate::mem;
+use crate::ops::{Add, Mul, Sub};
 use crate::str::FromStr;
 
 // Used because the `?` operator is not allowed in a const context.
@@ -969,9 +970,10 @@ pub enum FpCategory {
 }
 
 #[doc(hidden)]
-trait FromStrRadixHelper: PartialOrd + Copy {
- fn min_value() -> Self;
- fn max_value() -> Self;
+trait FromStrRadixHelper:
+ PartialOrd + Copy + Add<Output = Self> + Sub<Output = Self> + Mul<Output = Self>
+{
+ const MIN: Self;
  fn from_u32(u: u32) -> Self;
  fn checked_mul(&self, other: u32) -> Option<Self>;
  fn checked_sub(&self, other: u32) -> Option<Self>;
@@ -991,12 +993,9 @@ macro_rules! from_str_radix_int_impl {
 }
 from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 }
 
-macro_rules! doit {
+macro_rules! impl_helper_for {
  ($($t:ty)*) => ($(impl FromStrRadixHelper for $t {
- #[inline]
- fn min_value() -> Self { Self::MIN }
- #[inline]
- fn max_value() -> Self { Self::MAX }
+ const MIN: Self = Self::MIN;
  #[inline]
  fn from_u32(u: u32) -> Self { u as Self }
  #[inline]
@@ -1013,7 +1012,18 @@ macro_rules! doit {
  }
  })*)
 }
-doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
+impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }
+
+/// Determins if a string of text of that length of that radix could be guaranteed to be
+/// stored in the given type T.
+/// Note that if the radix is known to the compiler, it is just the check of digits.len that
+/// is done at runtime.
+#[doc(hidden)]
+#[inline(always)]
+#[unstable(issue = "none", feature = "std_internals")]
+pub fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
+ radix <= 16 && digits.len() <= mem::size_of::<T>() * 2 - is_signed_ty as usize
+}
 
 fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, ParseIntError> {
  use self::IntErrorKind::*;
@@ -1029,7 +1039,7 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
  return Err(PIE { kind: Empty });
  }
 
- let is_signed_ty = T::from_u32(0) > T::min_value();
+ let is_signed_ty = T::from_u32(0) > T::MIN;
 
  // all valid digits are ascii, so we will just iterate over the utf8 bytes
  // and cast them to chars. .to_digit() will safely return None for anything
@@ -1047,38 +1057,56 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
  };
 
  let mut result = T::from_u32(0);
- if is_positive {
- // The number is positive
- for &c in digits {
- let x = match (c as char).to_digit(radix) {
- Some(x) => x,
- None => return Err(PIE { kind: InvalidDigit }),
- };
- result = match result.checked_mul(radix) {
- Some(result) => result,
- None => return Err(PIE { kind: PosOverflow }),
- };
- result = match result.checked_add(x) {
- Some(result) => result,
- None => return Err(PIE { kind: PosOverflow }),
+
+ if can_not_overflow::<T>(radix, is_signed_ty, digits) {
+ // If the len of the str is short compared to the range of the type
+ // we are parsing into, then we can be certain that an overflow will not occur.
+ // This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
+ // above is a faster (conservative) approximation of this.
+ //
+ // Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
+ // `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
+ // `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
+ macro_rules! run_unchecked_loop {
+ ($unchecked_additive_op:expr) => {
+ for &c in digits {
+ result = result * T::from_u32(radix);
+ let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
+ result = $unchecked_additive_op(result, T::from_u32(x));
+ }
  };
  }
+ if is_positive {
+ run_unchecked_loop!(<T as core::ops::Add>::add)
+ } else {
+ run_unchecked_loop!(<T as core::ops::Sub>::sub)
+ };
  } else {
- // The number is negative
- for &c in digits {
- let x = match (c as char).to_digit(radix) {
- Some(x) => x,
- None => return Err(PIE { kind: InvalidDigit }),
- };
- result = match result.checked_mul(radix) {
- Some(result) => result,
- None => return Err(PIE { kind: NegOverflow }),
- };
- result = match result.checked_sub(x) {
- Some(result) => result,
- None => return Err(PIE { kind: NegOverflow }),
+ macro_rules! run_checked_loop {
+ ($checked_additive_op:ident, $overflow_err:expr) => {
+ for &c in digits {
+ // When `radix` is passed in as a literal, rather than doing a slow `imul`
+ // the compiler can use shifts if `radix` can be expressed as a
+ // sum of powers of 2 (x*10 can be written as x*8 + x*2).
+ // When the compiler can't use these optimisations,
+ // the latency of the multiplication can be hidden by issuing it
+ // before the result is needed to improve performance on
+ // modern out-of-order CPU as multiplication here is slower
+ // than the other instructions, we can get the end result faster
+ // doing multiplication first and let the CPU spends other cycles
+ // doing other computation and get multiplication result later.
+ let mul = result.checked_mul(radix);
+ let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
+ result = mul.ok_or_else($overflow_err)?;
+ result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?;
+ }
  };
  }
+ if is_positive {
+ run_checked_loop!(checked_add, || PIE { kind: PosOverflow })
+ } else {
+ run_checked_loop!(checked_sub, || PIE { kind: NegOverflow })
+ };
  }
  Ok(result)
 }
diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs
@@ -55,6 +55,7 @@
 #![feature(numfmt)]
 #![feature(step_trait)]
 #![feature(str_internals)]
+#![feature(std_internals)]
 #![feature(test)]
 #![feature(trusted_len)]
 #![feature(try_blocks)]

diff --git a/library/core/tests/num/mod.rs b/library/core/tests/num/mod.rs
@@ -2,7 +2,7 @@ use core::cmp::PartialEq;
 use core::convert::{TryFrom, TryInto};
 use core::fmt::Debug;
 use core::marker::Copy;
-use core::num::{IntErrorKind, ParseIntError, TryFromIntError};
+use core::num::{can_not_overflow, IntErrorKind, ParseIntError, TryFromIntError};
 use core::ops::{Add, Div, Mul, Rem, Sub};
 use core::option::Option;
 use core::option::Option::None;
@@ -120,6 +120,75 @@ fn test_int_from_str_overflow() {
  test_parse::<i64>("-9223372036854775809", Err(IntErrorKind::NegOverflow));
 }
 
+#[test]
+fn test_can_not_overflow() {
+ fn can_overflow<T>(radix: u32, input: &str) -> bool
+ where
+ T: std::convert::TryFrom<i8>,
+ {
+ !can_not_overflow::<T>(radix, T::try_from(-1_i8).is_ok(), input.as_bytes())
+ }
+
+ // Positive tests:
+ assert!(!can_overflow::<i8>(16, "F"));
+ assert!(!can_overflow::<u8>(16, "FF"));
+
+ assert!(!can_overflow::<i8>(10, "9"));
+ assert!(!can_overflow::<u8>(10, "99"));
+
+ // Negative tests:
+
+ // Not currently in std lib (issue: #27728)
+ fn format_radix<T>(mut x: T, radix: T) -> String
+ where
+ T: std::ops::Rem<Output = T>,
+ T: std::ops::Div<Output = T>,
+ T: std::cmp::PartialEq,
+ T: std::default::Default,
+ T: Copy,
+ T: Default,
+ u32: TryFrom<T>,
+ {
+ let mut result = vec![];
+
+ loop {
+ let m = x % radix;
+ x = x / radix;
+ result.push(
+ std::char::from_digit(m.try_into().ok().unwrap(), radix.try_into().ok().unwrap())
+ .unwrap(),
+ );
+ if x == T::default() {
+ break;
+ }
+ }
+ result.into_iter().rev().collect()
+ }
+
+ macro_rules! check {
+ ($($t:ty)*) => ($(
+ for base in 2..=36 {
+ let num = (<$t>::MAX as u128) + 1;
+
+ // Calcutate the string length for the smallest overflowing number:
+ let max_len_string = format_radix(num, base as u128);
+ // Ensure that that string length is deemed to potentially overflow:
+ assert!(can_overflow::<$t>(base, &max_len_string));
+ }
+ )*)
+ }
+
+ check! { i8 i16 i32 i64 i128 isize usize u8 u16 u32 u64 }
+
+ // Check u128 separately:
+ for base in 2..=36 {
+ let num = u128::MAX as u128;
+ let max_len_string = format_radix(num, base as u128);
+ // base 16 fits perfectly for u128 and won't overflow:
+ assert_eq!(can_overflow::<u128>(base, &max_len_string), base != 16);
+ }
+}
+
 #[test]
 fn test_leading_plus() {
  test_parse::<u8>("+127", Ok(127));