diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index dca8ffa4e2c89..fff9bddc43557 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -5,6 +5,7 @@ use crate::ascii; use crate::intrinsics; use crate::mem; +use crate::ops::{Add, Mul, Sub}; use crate::str::FromStr; // Used because the `?` operator is not allowed in a const context. @@ -969,9 +970,10 @@ pub enum FpCategory { } #[doc(hidden)] -trait FromStrRadixHelper: PartialOrd + Copy { - fn min_value() -> Self; - fn max_value() -> Self; +trait FromStrRadixHelper: + PartialOrd + Copy + Add + Sub + Mul +{ + const MIN: Self; fn from_u32(u: u32) -> Self; fn checked_mul(&self, other: u32) -> Option; fn checked_sub(&self, other: u32) -> Option; @@ -991,12 +993,9 @@ macro_rules! from_str_radix_int_impl { } from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 } -macro_rules! doit { +macro_rules! impl_helper_for { ($($t:ty)*) => ($(impl FromStrRadixHelper for $t { - #[inline] - fn min_value() -> Self { Self::MIN } - #[inline] - fn max_value() -> Self { Self::MAX } + const MIN: Self = Self::MIN; #[inline] fn from_u32(u: u32) -> Self { u as Self } #[inline] @@ -1013,7 +1012,18 @@ macro_rules! doit { } })*) } -doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize } +impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize } + +/// Determins if a string of text of that length of that radix could be guaranteed to be +/// stored in the given type T. +/// Note that if the radix is known to the compiler, it is just the check of digits.len that +/// is done at runtime. +#[doc(hidden)] +#[inline(always)] +#[unstable(issue = "none", feature = "std_internals")] +pub fn can_not_overflow(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool { + radix <= 16 && digits.len() <= mem::size_of::() * 2 - is_signed_ty as usize +} fn from_str_radix(src: &str, radix: u32) -> Result { use self::IntErrorKind::*; @@ -1029,7 +1039,7 @@ fn from_str_radix(src: &str, radix: u32) -> Result T::min_value(); + let is_signed_ty = T::from_u32(0) > T::MIN; // all valid digits are ascii, so we will just iterate over the utf8 bytes // and cast them to chars. .to_digit() will safely return None for anything @@ -1047,38 +1057,56 @@ fn from_str_radix(src: &str, radix: u32) -> Result x, - None => return Err(PIE { kind: InvalidDigit }), - }; - result = match result.checked_mul(radix) { - Some(result) => result, - None => return Err(PIE { kind: PosOverflow }), - }; - result = match result.checked_add(x) { - Some(result) => result, - None => return Err(PIE { kind: PosOverflow }), + + if can_not_overflow::(radix, is_signed_ty, digits) { + // If the len of the str is short compared to the range of the type + // we are parsing into, then we can be certain that an overflow will not occur. + // This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition + // above is a faster (conservative) approximation of this. + // + // Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest: + // `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow. + // `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow. + macro_rules! run_unchecked_loop { + ($unchecked_additive_op:expr) => { + for &c in digits { + result = result * T::from_u32(radix); + let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; + result = $unchecked_additive_op(result, T::from_u32(x)); + } }; } + if is_positive { + run_unchecked_loop!(::add) + } else { + run_unchecked_loop!(::sub) + }; } else { - // The number is negative - for &c in digits { - let x = match (c as char).to_digit(radix) { - Some(x) => x, - None => return Err(PIE { kind: InvalidDigit }), - }; - result = match result.checked_mul(radix) { - Some(result) => result, - None => return Err(PIE { kind: NegOverflow }), - }; - result = match result.checked_sub(x) { - Some(result) => result, - None => return Err(PIE { kind: NegOverflow }), + macro_rules! run_checked_loop { + ($checked_additive_op:ident, $overflow_err:expr) => { + for &c in digits { + // When `radix` is passed in as a literal, rather than doing a slow `imul` + // the compiler can use shifts if `radix` can be expressed as a + // sum of powers of 2 (x*10 can be written as x*8 + x*2). + // When the compiler can't use these optimisations, + // the latency of the multiplication can be hidden by issuing it + // before the result is needed to improve performance on + // modern out-of-order CPU as multiplication here is slower + // than the other instructions, we can get the end result faster + // doing multiplication first and let the CPU spends other cycles + // doing other computation and get multiplication result later. + let mul = result.checked_mul(radix); + let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; + result = mul.ok_or_else($overflow_err)?; + result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?; + } }; } + if is_positive { + run_checked_loop!(checked_add, || PIE { kind: PosOverflow }) + } else { + run_checked_loop!(checked_sub, || PIE { kind: NegOverflow }) + }; } Ok(result) } diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs index 5c861236e86f1..bc2ab7489a165 100644 --- a/library/core/tests/lib.rs +++ b/library/core/tests/lib.rs @@ -55,6 +55,7 @@ #![feature(numfmt)] #![feature(step_trait)] #![feature(str_internals)] +#![feature(std_internals)] #![feature(test)] #![feature(trusted_len)] #![feature(try_blocks)] diff --git a/library/core/tests/num/mod.rs b/library/core/tests/num/mod.rs index 4f773a824efd2..49580cdcc481a 100644 --- a/library/core/tests/num/mod.rs +++ b/library/core/tests/num/mod.rs @@ -2,7 +2,7 @@ use core::cmp::PartialEq; use core::convert::{TryFrom, TryInto}; use core::fmt::Debug; use core::marker::Copy; -use core::num::{IntErrorKind, ParseIntError, TryFromIntError}; +use core::num::{can_not_overflow, IntErrorKind, ParseIntError, TryFromIntError}; use core::ops::{Add, Div, Mul, Rem, Sub}; use core::option::Option; use core::option::Option::None; @@ -120,6 +120,75 @@ fn test_int_from_str_overflow() { test_parse::("-9223372036854775809", Err(IntErrorKind::NegOverflow)); } +#[test] +fn test_can_not_overflow() { + fn can_overflow(radix: u32, input: &str) -> bool + where + T: std::convert::TryFrom, + { + !can_not_overflow::(radix, T::try_from(-1_i8).is_ok(), input.as_bytes()) + } + + // Positive tests: + assert!(!can_overflow::(16, "F")); + assert!(!can_overflow::(16, "FF")); + + assert!(!can_overflow::(10, "9")); + assert!(!can_overflow::(10, "99")); + + // Negative tests: + + // Not currently in std lib (issue: #27728) + fn format_radix(mut x: T, radix: T) -> String + where + T: std::ops::Rem, + T: std::ops::Div, + T: std::cmp::PartialEq, + T: std::default::Default, + T: Copy, + T: Default, + u32: TryFrom, + { + let mut result = vec![]; + + loop { + let m = x % radix; + x = x / radix; + result.push( + std::char::from_digit(m.try_into().ok().unwrap(), radix.try_into().ok().unwrap()) + .unwrap(), + ); + if x == T::default() { + break; + } + } + result.into_iter().rev().collect() + } + + macro_rules! check { + ($($t:ty)*) => ($( + for base in 2..=36 { + let num = (<$t>::MAX as u128) + 1; + + // Calcutate the string length for the smallest overflowing number: + let max_len_string = format_radix(num, base as u128); + // Ensure that that string length is deemed to potentially overflow: + assert!(can_overflow::<$t>(base, &max_len_string)); + } + )*) + } + + check! { i8 i16 i32 i64 i128 isize usize u8 u16 u32 u64 } + + // Check u128 separately: + for base in 2..=36 { + let num = u128::MAX as u128; + let max_len_string = format_radix(num, base as u128); + // base 16 fits perfectly for u128 and won't overflow: + assert_eq!(can_overflow::(base, &max_len_string), base != 16); + } +} + #[test] fn test_leading_plus() { test_parse::("+127", Ok(127));