Small optimization for integers Display implementation

GuillaumeGomez · GuillaumeGomez · commit fe7cdd871576 · 2024-07-25T22:10:19.000+02:00
diff --git a/library/core/src/fmt/num.rs b/library/core/src/fmt/num.rs
@@ -211,11 +211,47 @@ static DEC_DIGITS_LUT: &[u8; 200] = b"0001020304050607080910111213141516171819\
       8081828384858687888990919293949596979899";
 
 macro_rules! impl_Display {
-    ($($t:ident),* as $u:ident via $conv_fn:ident named $name:ident) => {
+    ($($t:ident => $size:literal $(as $positive:ident in $other:ident)? => named $name:ident,)* ; as $u:ident via $conv_fn:ident named $gen_name:ident) => {
+
+        $(
+        #[stable(feature = "rust1", since = "1.0.0")]
+        impl fmt::Display for $t {
+            #[allow(unused_comparisons)]
+            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                // If it's a signed integer.
+                $(
+                    let is_nonnegative = *self >= 0;
+
+                    #[cfg(not(feature = "optimize_for_size"))]
+                    {
+                        if !is_nonnegative {
+                            // convert the negative num to positive by summing 1 to it's 2 complement
+                            return $other((!self as $positive + 1), false, f);
+                        }
+                    }
+                    #[cfg(feature = "optimize_for_size")]
+                    {
+                        if !is_nonnegative {
+                            // convert the negative num to positive by summing 1 to it's 2 complement
+                            return $other((!self.$conv_fn()).wrapping_add(1), false, f);
+                        }
+                    }
+                )?
+                // If it's an unsigned integer.
+                #[cfg(not(feature = "optimize_for_size"))]
+                {
+                    $name(*self, true, f)
+                }
+                #[cfg(feature = "optimize_for_size")]
+                {
+                    $gen_name(*self, true, f)
+                }
+            }
+        }
+
         #[cfg(not(feature = "optimize_for_size"))]
-        fn $name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            // 2^128 is about 3*10^38, so 39 gives an extra byte of space
-            let mut buf = [MaybeUninit::<u8>::uninit(); 39];
+        fn $name(mut n: $t, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+            let mut buf = [MaybeUninit::<u8>::uninit(); $size];
             let mut curr = buf.len();
             let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
             let lut_ptr = DEC_DIGITS_LUT.as_ptr();
@@ -229,58 +265,64 @@ macro_rules! impl_Display {
             // is safe to access.
             unsafe {
                 // need at least 16 bits for the 4-characters-at-a-time to work.
-                assert!(crate::mem::size_of::<$u>() >= 2);
-
-                // eagerly decode 4 characters at a time
-                while n >= 10000 {
-                    let rem = (n % 10000) as usize;
-                    n /= 10000;
-
-                    let d1 = (rem / 100) << 1;
-                    let d2 = (rem % 100) << 1;
-                    curr -= 4;
-
-                    // We are allowed to copy to `buf_ptr[curr..curr + 3]` here since
-                    // otherwise `curr < 0`. But then `n` was originally at least `10000^10`
-                    // which is `10^40 > 2^128 > n`.
-                    ptr::copy_nonoverlapping(lut_ptr.add(d1), buf_ptr.add(curr), 2);
-                    ptr::copy_nonoverlapping(lut_ptr.add(d2), buf_ptr.add(curr + 2), 2);
+                #[allow(overflowing_literals)]
+                #[allow(unused_comparisons)]
+                // This block should be removed for smaller types at compile time so it
+                // should be ok.
+                if core::mem::size_of::<$t>() >= 2 {
+                    // eagerly decode 4 characters at a time
+                    while n >= 10000 {
+                        let rem = (n % 10000) as u16;
+                        n /= 10000;
+
+                        let d1 = (rem / 100) << 1;
+                        let d2 = (rem % 100) << 1;
+                        curr -= 4;
+
+                        // We are allowed to copy to `buf_ptr[curr..curr + 3]` here since
+                        // otherwise `curr < 0`. But then `n` was originally at least `10000^10`
+                        // which is `10^40 > 2^128 > n`.
+                        ptr::copy_nonoverlapping(lut_ptr.add(d1 as usize), buf_ptr.add(curr), 2);
+                        ptr::copy_nonoverlapping(lut_ptr.add(d2 as usize), buf_ptr.add(curr + 2), 2);
+                    }
                 }
 
                 // if we reach here numbers are <= 9999, so at most 4 chars long
-                let mut n = n as usize; // possibly reduce 64bit math
+                let mut n = n as u16; // possibly reduce 64bit math
 
                 // decode 2 more chars, if > 2 chars
                 if n >= 100 {
                     let d1 = (n % 100) << 1;
                     n /= 100;
                     curr -= 2;
-                    ptr::copy_nonoverlapping(lut_ptr.add(d1), buf_ptr.add(curr), 2);
+                    ptr::copy_nonoverlapping(lut_ptr.add(d1 as usize), buf_ptr.add(curr), 2);
                 }
 
+                // if we reach here numbers are <= 100, so at most 2 chars long
+                // The biggest it can be is 99, and 99 << 1 == 198, so a `u8` is enough.
+                let n = n as u8;
                 // decode last 1 or 2 chars
                 if n < 10 {
                     curr -= 1;
-                    *buf_ptr.add(curr) = (n as u8) + b'0';
+                    *buf_ptr.add(curr) = n + b'0';
                 } else {
                     let d1 = n << 1;
                     curr -= 2;
-                    ptr::copy_nonoverlapping(lut_ptr.add(d1), buf_ptr.add(curr), 2);
+                    ptr::copy_nonoverlapping(lut_ptr.add(d1 as usize), buf_ptr.add(curr), 2);
                 }
             }
 
             // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid
             // UTF-8 since `DEC_DIGITS_LUT` is
             let buf_slice = unsafe {
-                str::from_utf8_unchecked(
+                core::str::from_utf8_unchecked(
                     slice::from_raw_parts(buf_ptr.add(curr), buf.len() - curr))
             };
             f.pad_integral(is_nonnegative, "", buf_slice)
-        }
+        })*
 
         #[cfg(feature = "optimize_for_size")]
-        fn $name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            // 2^128 is about 3*10^38, so 39 gives an extra byte of space
+        fn $gen_name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
             let mut buf = [MaybeUninit::<u8>::uninit(); 39];
             let mut curr = buf.len();
             let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
@@ -309,27 +351,12 @@ macro_rules! impl_Display {
             };
             f.pad_integral(is_nonnegative, "", buf_slice)
         }
-
-        $(#[stable(feature = "rust1", since = "1.0.0")]
-        impl fmt::Display for $t {
-            #[allow(unused_comparisons)]
-            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-                let is_nonnegative = *self >= 0;
-                let n = if is_nonnegative {
-                    self.$conv_fn()
-                } else {
-                    // convert the negative num to positive by summing 1 to it's 2 complement
-                    (!self.$conv_fn()).wrapping_add(1)
-                };
-                $name(n, is_nonnegative, f)
-            }
-        })*
     };
 }
 
 macro_rules! impl_Exp {
-    ($($t:ident),* as $u:ident via $conv_fn:ident named $name:ident) => {
-        fn $name(
+    ($($t:ident => $size:literal,)* ; as $u:ident via $conv_fn:ident named $name:ident) => {
+        fn $name<const SIZE: usize>(
             mut n: $u,
             is_nonnegative: bool,
             upper: bool,
@@ -377,10 +404,9 @@ macro_rules! impl_Exp {
                 (n, exponent, exponent, added_precision)
             };
 
-            // 39 digits (worst case u128) + . = 40
             // Since `curr` always decreases by the number of digits copied, this means
             // that `curr >= 0`.
-            let mut buf = [MaybeUninit::<u8>::uninit(); 40];
+            let mut buf = [MaybeUninit::<u8>::uninit(); SIZE];
             let mut curr = buf.len(); //index for buf
             let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
             let lut_ptr = DEC_DIGITS_LUT.as_ptr();
@@ -398,7 +424,7 @@ macro_rules! impl_Exp {
                 exponent += 2;
             }
             // n is <= 99, so at most 2 chars long
-            let mut n = n as isize; // possibly reduce 64bit math
+            let mut n = n as i8; // possibly reduce 64bit math
             // decode second-to-last character
             if n >= 10 {
                 curr -= 1;
@@ -475,7 +501,7 @@ macro_rules! impl_Exp {
                         // convert the negative num to positive by summing 1 to it's 2 complement
                         (!self.$conv_fn()).wrapping_add(1)
                     };
-                    $name(n, is_nonnegative, false, f)
+                    $name::<$size>(n, is_nonnegative, false, f)
                 }
             })*
         $(
@@ -490,7 +516,7 @@ macro_rules! impl_Exp {
                         // convert the negative num to positive by summing 1 to it's 2 complement
                         (!self.$conv_fn()).wrapping_add(1)
                     };
-                    $name(n, is_nonnegative, true, f)
+                    $name::<$size>(n, is_nonnegative, true, f)
                 }
             })*
     };
@@ -502,24 +528,70 @@ macro_rules! impl_Exp {
 mod imp {
     use super::*;
     impl_Display!(
-        i8, u8, i16, u16, i32, u32, i64, u64, usize, isize
-            as u64 via to_u64 named fmt_u64
+        i8 => 4 as u8 in fmt_u8 => named fmt_i8,
+        u8 => 3 => named fmt_u8,
+        i16 => 6 as u16 in fmt_u16 => named fmt_i16,
+        u16 => 5 => named fmt_u16,
+        i32 => 10 as u32 in fmt_u32 => named fmt_i32,
+        u32 => 9 => named fmt_u32,
+        i64 => 20 as u64 in fmt_u64 => named fmt_i64,
+        u64 => 20 => named fmt_u64,
+        isize => 20 as u64 in fmt_u64 => named fmt_isize,
+        usize => 20 => named fmt_usize,
+        ; as u64 via to_u64 named fmt_u64
     );
     impl_Exp!(
-        i8, u8, i16, u16, i32, u32, i64, u64, usize, isize
-            as u64 via to_u64 named exp_u64
+        i8 => 5,
+        u8 => 4,
+        i16 => 7,
+        u16 => 6,
+        i32 => 11,
+        u32 => 10,
+        i64 => 21,
+        u64 => 21,
+        isize => 21,
+        usize => 21,
+        ; as u64 via to_u64 named exp_u64
     );
 }
 
 #[cfg(not(any(target_pointer_width = "64", target_arch = "wasm32")))]
 mod imp {
     use super::*;
-    impl_Display!(i8, u8, i16, u16, i32, u32, isize, usize as u32 via to_u32 named fmt_u32);
-    impl_Display!(i64, u64 as u64 via to_u64 named fmt_u64);
-    impl_Exp!(i8, u8, i16, u16, i32, u32, isize, usize as u32 via to_u32 named exp_u32);
-    impl_Exp!(i64, u64 as u64 via to_u64 named exp_u64);
+    impl_Display!(
+        i8 => 4 as u8 in fmt_u8 => named fmt_i8,
+        u8 => 3 => named fmt_u8,
+        i16 => 6 as u16 in fmt_u16 => named fmt_i16,
+        u16 => 5 => named fmt_u16,
+        i32 => 10 as u32 in fmt_u32 => named fmt_i32,
+        u32 => 9 => named fmt_u32,
+        isize => 10 as usize in fmt_usize => named fmt_isize,
+        usize => 9 => named fmt_usize,
+        ; as u32 via to_u32 named fmt_u32);
+    impl_Display!(
+        i64 => 20 as u64 in fmt_u64 => named fmt_i64,
+        u64 => 20 => named fmt_u64,
+        ; as u64 via to_u64 named fmt_u64);
+
+    impl_Exp!(
+        i8 => 5,
+        u8 => 4,
+        i16 => 7,
+        u16 => 6,
+        i32 => 11,
+        u32 => 10,
+        isize => 11,
+        usize => 10,
+        ; as u32 via to_u32 named exp_u32);
+    impl_Exp!(
+        i64 => 21,
+        u64 => 21,
+        ; as u64 via to_u64 named exp_u64);
 }
-impl_Exp!(i128, u128 as u128 via to_u128 named exp_u128);
+impl_Exp!(
+    i128 => 40,
+    u128 => 39,
+    ; as u128 via to_u128 named exp_u128);
 
 /// Helper function for writing a u64 into `buf` going from last to first, with `curr`.
 fn parse_u64_into<const N: usize>(mut n: u64, buf: &mut [MaybeUninit<u8>; N], curr: &mut usize) {