Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Int parsing optimisations (part 2) #96071

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions library/core/benches/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ from_str_radix_bench!(bench_u64_from_str_radix_10, u64, 10);
from_str_radix_bench!(bench_u64_from_str_radix_16, u64, 16);
from_str_radix_bench!(bench_u64_from_str_radix_36, u64, 36);

from_str_bench!(bench_u128_from_str, u128);
from_str_radix_bench!(bench_u128_from_str_radix_2, u128, 2);
from_str_radix_bench!(bench_u128_from_str_radix_10, u128, 10);
from_str_radix_bench!(bench_u128_from_str_radix_16, u128, 16);
from_str_radix_bench!(bench_u128_from_str_radix_36, u128, 36);

from_str_bench!(bench_i8_from_str, i8);
from_str_radix_bench!(bench_i8_from_str_radix_2, i8, 2);
from_str_radix_bench!(bench_i8_from_str_radix_10, i8, 10);
Expand All @@ -106,3 +112,9 @@ from_str_radix_bench!(bench_i64_from_str_radix_2, i64, 2);
from_str_radix_bench!(bench_i64_from_str_radix_10, i64, 10);
from_str_radix_bench!(bench_i64_from_str_radix_16, i64, 16);
from_str_radix_bench!(bench_i64_from_str_radix_36, i64, 36);

from_str_bench!(bench_i128_from_str, i128);
from_str_radix_bench!(bench_i128_from_str_radix_2, i128, 2);
from_str_radix_bench!(bench_i128_from_str_radix_10, i128, 10);
from_str_radix_bench!(bench_i128_from_str_radix_16, i128, 16);
from_str_radix_bench!(bench_i128_from_str_radix_36, i128, 36);
110 changes: 67 additions & 43 deletions library/core/src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -999,15 +999,15 @@ macro_rules! impl_helper_for {
}
impl_helper_for! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize }

/// Determines if a string of text of that length of that radix could be guaranteed to be
/// stored in the given type T.
/// Determines length of text of a particular radix that could be guaranteed to be
/// stored in the given type T without overflow.
/// Note that if the radix is known to the compiler, it is just the check of digits.len that
/// is done at runtime.
#[doc(hidden)]
#[inline(always)]
#[unstable(issue = "none", feature = "std_internals")]
pub fn can_not_overflow<T>(radix: u32, is_signed_ty: bool, digits: &[u8]) -> bool {
radix <= 16 && digits.len() <= mem::size_of::<T>() * 2 - is_signed_ty as usize
pub fn safe_width<T>(radix: u32, is_signed_ty: bool) -> usize {
if radix > 16 { 0 } else { mem::size_of::<T>() * 2 - is_signed_ty as usize }
}
scottmcm marked this conversation as resolved.
Show resolved Hide resolved

fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, ParseIntError> {
Expand All @@ -1020,10 +1020,6 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
radix
);

if src.is_empty() {
return Err(PIE { kind: Empty });
}

let is_signed_ty = T::from_u32(0) > T::MIN;

// all valid digits are ascii, so we will just iterate over the utf8 bytes
Expand All @@ -1032,29 +1028,52 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
// of multi-byte sequences
let src = src.as_bytes();

let (is_positive, digits) = match src[0] {
b'+' | b'-' if src[1..].is_empty() => {
return Err(PIE { kind: InvalidDigit });
let (first, mut digits) = (*src.get(0).ok_or_else(|| PIE { kind: Empty })?, &src[1..]);

let (is_positive, mut result) = match first {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like there's a bit more repetition going on here than there needs to be.

Maybe try this with slice patterns, or something? I'm imagining something like

let (is_positive, digits) = match src {
    [b'-', d] => (false, d),
    [b'+', d] => (true, d),
    d => (true, d),
};

To hopefully simplify a bit of the first/digits/result dance that's currently happening.

b'+' => {
let first = digits.get(0).ok_or_else(|| PIE { kind: InvalidDigit })?;
digits = &digits[1..];
(
true,
T::from_u32(
(*first as char).to_digit(radix).ok_or_else(|| PIE { kind: InvalidDigit })?,
),
)
}
b'+' => (true, &src[1..]),
b'-' if is_signed_ty => (false, &src[1..]),
_ => (true, src),
b'-' if is_signed_ty => {
let first = digits.get(0).ok_or_else(|| PIE { kind: InvalidDigit })?;
digits = &digits[1..];
(
false,
T::from_u32(0)
- T::from_u32(
(*first as char)
.to_digit(radix)
.ok_or_else(|| PIE { kind: InvalidDigit })?,
),
)
}
val => (
true,
T::from_u32((val as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?),
),
};

let mut result = T::from_u32(0);

if can_not_overflow::<T>(radix, is_signed_ty, digits) {
if mem::size_of::<T>() > 2 {
// If the len of the str is short compared to the range of the type
// we are parsing into, then we can be certain that an overflow will not occur.
// This bound is when `radix.pow(digits.len()) - 1 <= T::MAX` but the condition
// above is a faster (conservative) approximation of this.
// in `safe_width` is a faster (conservative) approximation of this.
//
// Consider radix 16 as it has the highest information density per digit and will thus overflow the earliest:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: this comment is useful information, but doesn't seem like it belongs here, since the computation it's talking about here isn't here. Maybe put it in on/in safe_width instead?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Said otherwise, this code will be correct as long as safe_width is correct, so the details of which approach -- faster or tighter -- doesn't really matter here.

// `u8::MAX` is `ff` - any str of len 2 is guaranteed to not overflow.
// `i8::MAX` is `7f` - only a str of len 1 is guaranteed to not overflow.
let safe_width = safe_width::<T>(radix, is_signed_ty);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: the .take in one spot not coupled with a corresponding .skip in the other makes this read a bit strangely to me. Perhaps the splitting could just be put here, with no need to ever look at the length again later? As a first thought, something like this, with appropriate updates to the for loops?

Suggested change
let (safe_digits, risky_digits) = if safe_width > digits.len() { (digits, &[]) } else { digits.split_at(safe_width) };

macro_rules! run_unchecked_loop {
($unchecked_additive_op:expr) => {
for &c in digits {
for &c in digits.iter().take(safe_width) {
result = result * T::from_u32(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = $unchecked_additive_op(result, T::from_u32(x));
Expand All @@ -1066,32 +1085,37 @@ fn from_str_radix<T: FromStrRadixHelper>(src: &str, radix: u32) -> Result<T, Par
} else {
run_unchecked_loop!(<T as core::ops::Sub>::sub)
};
} else {
macro_rules! run_checked_loop {
($checked_additive_op:ident, $overflow_err:expr) => {
for &c in digits {
// When `radix` is passed in as a literal, rather than doing a slow `imul`
// the compiler can use shifts if `radix` can be expressed as a
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
// When the compiler can't use these optimisations,
// the latency of the multiplication can be hidden by issuing it
// before the result is needed to improve performance on
// modern out-of-order CPU as multiplication here is slower
// than the other instructions, we can get the end result faster
// doing multiplication first and let the CPU spends other cycles
// doing other computation and get multiplication result later.
let mul = result.checked_mul(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = mul.ok_or_else($overflow_err)?;
result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?;
}
};
if safe_width >= digits.len() {
return Ok(result);
}
if is_positive {
run_checked_loop!(checked_add, || PIE { kind: PosOverflow })
} else {
run_checked_loop!(checked_sub, || PIE { kind: NegOverflow })
digits = &digits[safe_width..];
}

macro_rules! run_checked_loop {
($checked_additive_op:ident, $overflow_err:expr) => {
for &c in digits {
// When `radix` is passed in as a literal, rather than doing a slow `imul`
// the compiler can use shifts if `radix` can be expressed as a
// sum of powers of 2 (x*10 can be written as x*8 + x*2).
// When the compiler can't use these optimisations,
// the latency of the multiplication can be hidden by issuing it
// before the result is needed to improve performance on
// modern out-of-order CPU as multiplication here is slower
// than the other instructions, we can get the end result faster
// doing multiplication first and let the CPU spends other cycles
// doing other computation and get multiplication result later.
let mul = result.checked_mul(radix);
let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?;
result = mul.ok_or_else($overflow_err)?;
result = T::$checked_additive_op(&result, x).ok_or_else($overflow_err)?;
}
};
}
if is_positive {
run_checked_loop!(checked_add, || PIE { kind: PosOverflow })
} else {
run_checked_loop!(checked_sub, || PIE { kind: NegOverflow })
};

Ok(result)
}
12 changes: 6 additions & 6 deletions library/core/tests/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use core::cmp::PartialEq;
use core::convert::{TryFrom, TryInto};
use core::fmt::Debug;
use core::marker::Copy;
use core::num::{can_not_overflow, IntErrorKind, ParseIntError, TryFromIntError};
use core::num::{safe_width, IntErrorKind, ParseIntError, TryFromIntError};
use core::ops::{Add, Div, Mul, Rem, Sub};
use core::option::Option;
use core::option::Option::None;
Expand Down Expand Up @@ -126,15 +126,15 @@ fn test_can_not_overflow() {
where
T: std::convert::TryFrom<i8>,
{
!can_not_overflow::<T>(radix, T::try_from(-1_i8).is_ok(), input.as_bytes())
safe_width::<T>(radix, T::try_from(-1_i8).is_ok()) < input.len()
}

// Positive tests:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: how about testing the output of safe_width directly? Just seeing can_overflow returning true doesn't mean that it's correct -- it could be returning usize::MAX.

assert!(!can_overflow::<i8>(16, "F"));
assert!(!can_overflow::<u8>(16, "FF"));
assert!(!can_overflow::<i32>(16, "FFFFFFF"));
assert!(!can_overflow::<u32>(16, "FFFFFFFF"));

assert!(!can_overflow::<i8>(10, "9"));
assert!(!can_overflow::<u8>(10, "99"));
assert!(!can_overflow::<i32>(10, "9"));
assert!(!can_overflow::<u32>(10, "99"));

// Negative tests:

Expand Down