From 21fda7d865d86d88ada5f959e35af979804b42e9 Mon Sep 17 00:00:00 2001 From: Madhav Madhusoodanan Date: Mon, 24 Mar 2025 21:32:25 +0400 Subject: [PATCH] Added functionality for `int_format_into` --- library/core/src/fmt/mod.rs | 4 + library/core/src/fmt/num.rs | 371 +++++++++++++++++++++++------ library/core/src/fmt/num_buffer.rs | 65 +++++ 3 files changed, 372 insertions(+), 68 deletions(-) create mode 100644 library/core/src/fmt/num_buffer.rs diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index 30fd2d7815f51..31e5b68e30544 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -15,6 +15,7 @@ mod float; #[cfg(no_fp_fmt_parse)] mod nofloat; mod num; +mod num_buffer; mod rt; #[stable(feature = "fmt_flags_align", since = "1.28.0")] @@ -33,6 +34,9 @@ pub enum Alignment { Center, } +#[unstable(feature = "int_format_into", issue = "138215")] +pub use num_buffer::NumBuffer; + #[stable(feature = "debug_builders", since = "1.2.0")] pub use self::builders::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple}; #[unstable(feature = "debug_closure_helpers", issue = "117729")] diff --git a/library/core/src/fmt/num.rs b/library/core/src/fmt/num.rs index 4467b37bd4510..0837a1d449bf7 100644 --- a/library/core/src/fmt/num.rs +++ b/library/core/src/fmt/num.rs @@ -1,5 +1,6 @@ //! Integer and floating-point number formatting +use crate::fmt::num_buffer::NumBuffer; use crate::mem::MaybeUninit; use crate::num::fmt as numfmt; use crate::ops::{Div, Rem, Sub}; @@ -199,44 +200,60 @@ static DEC_DIGITS_LUT: &[u8; 200] = b"\ 6061626364656667686970717273747576777879\ 8081828384858687888990919293949596979899"; -macro_rules! impl_Display { - ($($signed:ident, $unsigned:ident,)* ; as $u:ident via $conv_fn:ident named $gen_name:ident) => { +static NEGATIVE_SIGN: &[u8; 1] = b"-"; + +// SAFETY: safety is ensured by the caller about: +// 1. The contents of `buf` containing only ASCII characters. +// 2. `offset` being bound checked. +// 3. The contents of `buf` being initialized from `offset` onwards till the end. +unsafe fn extract_str_from_buf(buf: &NumBuffer, offset: usize) -> &str { + // SAFETY: safety is ensured by the caller about: + // 1. `offset` being bound checked + // 2. The contents of `buf` being initialized from `offset` onwards till the end. + let written = unsafe { buf.extract(offset..) }; + + // SAFETY: safety is ensured by the caller about: + // 1. The contents of `buf` containing only ASCII characters. + let as_str = unsafe { + str::from_utf8_unchecked(slice::from_raw_parts( + MaybeUninit::slice_as_ptr(written), + written.len(), + )) + }; - $( - #[stable(feature = "rust1", since = "1.0.0")] - impl fmt::Display for $unsigned { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - #[cfg(not(feature = "optimize_for_size"))] - { - self._fmt(true, f) - } - #[cfg(feature = "optimize_for_size")] - { - $gen_name(self.$conv_fn(), true, f) - } - } - } + as_str +} - #[stable(feature = "rust1", since = "1.0.0")] - impl fmt::Display for $signed { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - #[cfg(not(feature = "optimize_for_size"))] - { - return self.unsigned_abs()._fmt(*self >= 0, f); - } - #[cfg(feature = "optimize_for_size")] - { - return $gen_name(self.unsigned_abs().$conv_fn(), *self >= 0, f); - } - } - } +// SAFETY: safety is ensured by the caller about: +// 1. `start_offset` being bound checked +unsafe fn add_negative_sign( + is_nonnegative: bool, + buf: &mut NumBuffer, + start_offset: usize, +) -> usize { + if is_nonnegative { + return start_offset; + } + + let offset = start_offset - 1; + + // Setting sign for the negative number + // SAFETY: `start_offset` being bound checked is ensured by + // the caller. + unsafe { buf.write(offset, NEGATIVE_SIGN[0]) }; + offset +} + +// Basic functionality that is relied upon by functionality within +// `impl_Display` and `impl_FormatInto` +macro_rules! impl_NumBuffer { + ($($unsigned:ident,)* ; as $u:ident named $gen_name:ident) => { + + $( #[cfg(not(feature = "optimize_for_size"))] impl $unsigned { - fn _fmt(self, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result { - const MAX_DEC_N: usize = $unsigned::MAX.ilog(10) as usize + 1; - // Buffer decimals for $unsigned with right alignment. - let mut buf = [MaybeUninit::::uninit(); MAX_DEC_N]; + fn _write_into_buf(self, buf: &mut NumBuffer) -> usize { // Count the number of bytes in buf that are not initialized. let mut offset = buf.len(); // Consume the least-significant decimals from a working copy. @@ -245,10 +262,11 @@ macro_rules! impl_Display { // Format per four digits from the lookup table. // Four digits need a 16-bit $unsigned or wider. while size_of::() > 1 && remain > 999.try_into().expect("branch is not hit for types that cannot fit 999 (u8)") { - // SAFETY: All of the decimals fit in buf due to MAX_DEC_N + // SAFETY: All of the decimals fit in `buf` since `buf` is large enough to + // accommodate the largest representation of a number possible (that of i128::MIN) // and the while condition ensures at least 4 more decimals. unsafe { core::hint::assert_unchecked(offset >= 4) } - // SAFETY: The offset counts down from its initial buf.len() + // SAFETY: The offset counts down from its initial size // without underflow due to the previous precondition. unsafe { core::hint::assert_unchecked(offset <= buf.len()) } offset -= 4; @@ -259,34 +277,44 @@ macro_rules! impl_Display { remain /= scale; let pair1 = (quad / 100) as usize; let pair2 = (quad % 100) as usize; - buf[offset + 0].write(DEC_DIGITS_LUT[pair1 * 2 + 0]); - buf[offset + 1].write(DEC_DIGITS_LUT[pair1 * 2 + 1]); - buf[offset + 2].write(DEC_DIGITS_LUT[pair2 * 2 + 0]); - buf[offset + 3].write(DEC_DIGITS_LUT[pair2 * 2 + 1]); + + // SAFETY: The offset is bounds-checked in the asserts above. + unsafe { + buf.write(offset + 0, DEC_DIGITS_LUT[pair1 * 2 + 0]); + buf.write(offset + 1, DEC_DIGITS_LUT[pair1 * 2 + 1]); + buf.write(offset + 2, DEC_DIGITS_LUT[pair2 * 2 + 0]); + buf.write(offset + 3, DEC_DIGITS_LUT[pair2 * 2 + 1]); + } } // Format per two digits from the lookup table. if remain > 9 { - // SAFETY: All of the decimals fit in buf due to MAX_DEC_N + // SAFETY: All of the decimals fit in `buf` since `buf` is large enough to + // accommodate the largest representation of a number possible (that of i128::MIN) // and the while condition ensures at least 2 more decimals. unsafe { core::hint::assert_unchecked(offset >= 2) } - // SAFETY: The offset counts down from its initial buf.len() + // SAFETY: The offset counts down from its initial size // without underflow due to the previous precondition. unsafe { core::hint::assert_unchecked(offset <= buf.len()) } offset -= 2; let pair = (remain % 100) as usize; remain /= 100; - buf[offset + 0].write(DEC_DIGITS_LUT[pair * 2 + 0]); - buf[offset + 1].write(DEC_DIGITS_LUT[pair * 2 + 1]); + + // SAFETY: The offset is bounds-checked in the asserts above. + unsafe { + buf.write(offset + 0, DEC_DIGITS_LUT[pair * 2 + 0]); + buf.write(offset + 1, DEC_DIGITS_LUT[pair * 2 + 1]); + }; } // Format the last remaining digit, if any. if remain != 0 || self == 0 { - // SAFETY: All of the decimals fit in buf due to MAX_DEC_N + // SAFETY: All of the decimals fit in `buf` since `buf` is large enough to + // accommodate the largest representation of a number possible (that of i128::MIN) // and the if condition ensures (at least) 1 more decimals. unsafe { core::hint::assert_unchecked(offset >= 1) } - // SAFETY: The offset counts down from its initial buf.len() + // SAFETY: The offset counts down from its initial size // without underflow due to the previous precondition. unsafe { core::hint::assert_unchecked(offset <= buf.len()) } offset -= 1; @@ -294,34 +322,37 @@ macro_rules! impl_Display { // Either the compiler sees that remain < 10, or it prevents // a boundary check up next. let last = (remain & 15) as usize; - buf[offset].write(DEC_DIGITS_LUT[last * 2 + 1]); + + // SAFETY: The offset is bounds-checked in the asserts above. + unsafe { buf.write(offset, DEC_DIGITS_LUT[last * 2 + 1]) }; // not used: remain = 0; } - // SAFETY: All buf content since offset is set. - let written = unsafe { buf.get_unchecked(offset..) }; - // SAFETY: Writes use ASCII from the lookup table exclusively. - let as_str = unsafe { - str::from_utf8_unchecked(slice::from_raw_parts( - MaybeUninit::slice_as_ptr(written), - written.len(), - )) - }; + offset + } + + fn _fmt(self, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Buffer decimals for $unsigned with right alignment. + let mut buf = NumBuffer::new(); + let offset = self._write_into_buf(&mut buf); + + // SAFETY: All contents of `buf` since offset is set, and + // writes use ASCII from the lookup table exclusively. + let as_str = unsafe { extract_str_from_buf(&buf, offset) }; + f.pad_integral(is_nonnegative, "", as_str) } })* #[cfg(feature = "optimize_for_size")] - fn $gen_name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result { - const MAX_DEC_N: usize = $u::MAX.ilog(10) as usize + 1; - let mut buf = [MaybeUninit::::uninit(); MAX_DEC_N]; - let mut curr = MAX_DEC_N; - let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf); + fn $gen_name(mut n: $u, is_nonnegative: bool, buf: &mut NumBuffer) -> &str { + let mut curr = buf.len(); + let buf_ptr = NumBuffer::extract_start_mut_ptr(buf); // SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning - // `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at + // `curr == buf.len() == 40 > log(n)` since `n < 2^128 < 10^39 < 10^40`, and at // each step this is kept the same as `n` is divided. Since `n` is always - // non-negative, this means that `curr > 0` so `buf_ptr[curr..curr + 1]` + // non-negative, this means that `curr >= (40 - 39) == 1 > 0` so `buf_ptr[curr..curr + 1]` // is safe to access. unsafe { loop { @@ -335,16 +366,187 @@ macro_rules! impl_Display { } } - // SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid UTF-8 + // SAFETY: `curr >= 1` (unchanged if `is_nonnegative` is true) and + // `curr >= 0` (incase `is_nonnegative` is false) so `buf_ptr[curr..curr + 1]` + // is safe to access. + unsafe { add_negative_sign(is_nonnegative, buf, curr) }; + + // SAFETY: `curr >= 0` (since we made `buf` large enough), and all the chars are valid UTF-8 let buf_slice = unsafe { str::from_utf8_unchecked( slice::from_raw_parts(buf_ptr.add(curr), buf.len() - curr)) }; - f.pad_integral(is_nonnegative, "", buf_slice) + + buf_slice } }; } +// Must invoke `impl_NumBuffer` before invoking this macro. +macro_rules! impl_Display { + ($($signed:ident, $unsigned:ident,)* ; via $conv_fn:ident named $gen_name:ident) => { + + $( + #[stable(feature = "rust1", since = "1.0.0")] + impl fmt::Display for $unsigned { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[cfg(not(feature = "optimize_for_size"))] + { + self._fmt(true, f) + } + #[cfg(feature = "optimize_for_size")] + { + let mut buf = NumBuffer::new(); + + // not setting the sign here, hence sending `is_nonnegative` as `true` + let as_str = $gen_name(self.$conv_fn(), true, &mut buf); + f.pad_integral(true, "", as_str) + } + } + } + + #[stable(feature = "rust1", since = "1.0.0")] + impl fmt::Display for $signed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + #[cfg(not(feature = "optimize_for_size"))] + { + return self.unsigned_abs()._fmt(*self >= 0, f); + } + #[cfg(feature = "optimize_for_size")] + { + let mut buf = NumBuffer::new(); + + // not setting the sign here, hence sending `is_nonnegative` as `true` + let as_str = $gen_name(self.unsigned_abs().$conv_fn(), true, &mut buf); + f.pad_integral(*self >= 0, "", as_str) + } + } + } + )* + }; +} + +// Must invoke `impl_NumBuffer` before invoking this macro. +macro_rules! impl_FormatInto { + ($($signed:ident, $unsigned:ident,)* ; via $conv_fn:ident named $gen_name:ident) => { + $( + #[unstable(feature = "int_format_into", issue = "138215")] + impl $signed { + /// Allows users to write an integer (in signed decimal format) into a variable `buf` of + /// type [`NumBuffer`] that is passed by the caller by mutable reference. + /// + /// # Examples + /// ``` + /// #![feature(int_format_into)] + /// use core::fmt::NumBuffer; + /// + #[doc = concat!("let n = 0", stringify!($signed), ";")] + /// let mut buf = NumBuffer::new(); + /// assert_eq!(n.format_into(&mut buf), "0"); + /// + #[doc = concat!("let n1 = -32", stringify!($signed), ";")] + /// let mut buf1 = NumBuffer::new(); + /// assert_eq!(n1.format_into(&mut buf1), "-32"); + /// + #[doc = concat!("let n2 = ", stringify!($signed::MIN), ";")] + /// let mut buf2 = NumBuffer::new(); + #[doc = concat!("assert_eq!(n2.format_into(&mut buf2), ", stringify!($signed::MIN), ".to_string());")] + /// + #[doc = concat!("let n3 = ", stringify!($signed::MAX), ";")] + /// let mut buf3 = NumBuffer::new(); + #[doc = concat!("assert_eq!(n3.format_into(&mut buf3), ", stringify!($signed::MAX), ".to_string());")] + /// ``` + /// + pub fn format_into(self, buf: &mut NumBuffer) -> &str { + #[cfg(not(feature = "optimize_for_size"))] + { + let is_nonnegative = self >= 0; + let mut offset = self.unsigned_abs()._write_into_buf(buf); + + // SAFETY: `offset >= 1` since only a maximum of 39 digits + // would have been written into the buffer (of size 40 bytes). + // The negative sign is not written yet. + unsafe { core::hint::assert_unchecked(offset >= 1) } + + // SAFETY: The offset counts down from its initial size + // without underflow due to the previous precondition. + unsafe { core::hint::assert_unchecked(offset <= buf.len()) } + + // SAFETY: `offset >= 1` (unchanged if `is_nonnegative` is true) and + // `offset >= 0` (incase `is_nonnegative` is false) so + // `buf_ptr[offset..offset + 1]` is safe to access. + offset = unsafe { add_negative_sign(is_nonnegative, buf, offset) }; + + // SAFETY: All buf content since offset is set, and + // writes use ASCII from the lookup table exclusively. + let as_str = unsafe { extract_str_from_buf(buf, offset) }; + + as_str + } + + #[cfg(feature = "optimize_for_size")] + { + let is_nonnegative = self >= 0; + $gen_name(self.unsigned_abs().$conv_fn(), is_nonnegative, buf) + } + + } + } + + #[unstable(feature = "int_format_into", issue = "138215")] + impl $unsigned { + /// Allows users to write an integer (in signed decimal format) into a variable `buf` of + /// type [`NumBuffer`] that is passed by the caller by mutable reference. + /// + /// # Examples + /// ``` + /// #![feature(int_format_into)] + /// use core::fmt::NumBuffer; + /// + #[doc = concat!("let n = 0", stringify!($signed), ";")] + /// let mut buf = NumBuffer::new(); + /// assert_eq!(n.format_into(&mut buf), "0"); + /// + #[doc = concat!("let n1 = 32", stringify!($unsigned), ";")] + /// let mut buf1 = NumBuffer::new(); + /// assert_eq!(n1.format_into(&mut buf1), "32"); + /// + #[doc = concat!("let n2 = ", stringify!($unsigned::MAX), ";")] + /// let mut buf2 = NumBuffer::new(); + #[doc = concat!("assert_eq!(n2.format_into(&mut buf2), ", stringify!($unsigned::MAX), ".to_string());")] + /// ``` + /// + pub fn format_into(self, buf: &mut NumBuffer) -> &str { + #[cfg(not(feature = "optimize_for_size"))] + { + let offset = self._write_into_buf(buf); + + // SAFETY: `offset >= 1` since only a maximum of 39 digits + // would have been written into the buffer (of size 40 bytes). + unsafe { core::hint::assert_unchecked(offset >= 1) } + + // SAFETY: The offset counts down from its initial size + // without underflow due to the previous precondition. + unsafe { core::hint::assert_unchecked(offset <= buf.len()) } + + // SAFETY: All contents in `buf` since offset is set, and + // writes use ASCII from the lookup table exclusively. + let as_str = unsafe { extract_str_from_buf(buf, offset) }; + + as_str + } + + #[cfg(feature = "optimize_for_size")] + { + $gen_name(self.$conv_fn(), true, buf) + } + + } + } + )* + } +} + macro_rules! impl_Exp { ($($t:ident),* as $u:ident via $conv_fn:ident named $name:ident) => { fn $name( @@ -523,14 +725,24 @@ impl_Debug! { #[cfg(any(target_pointer_width = "64", target_arch = "wasm32"))] mod imp { use super::*; + impl_NumBuffer!( + u8, u16, u32, u64, usize, + ; as u64 named stringify_u64); impl_Display!( i8, u8, i16, u16, i32, u32, i64, u64, isize, usize, - ; as u64 via to_u64 named fmt_u64 - ); + ; via to_u64 named stringify_u64); + impl_FormatInto!( + i8, u8, + i16, u16, + i32, u32, + i64, u64, + isize, usize, + ; via to_u64 named stringify_u64); + impl_Exp!( i8, u8, i16, u16, i32, u32, i64, u64, usize, isize as u64 via to_u64 named exp_u64 @@ -540,19 +752,42 @@ mod imp { #[cfg(not(any(target_pointer_width = "64", target_arch = "wasm32")))] mod imp { use super::*; + impl_NumBuffer!( + u8, u16, u32, usize, + ; as u32 named stringify_u32); impl_Display!( i8, u8, i16, u16, i32, u32, isize, usize, - ; as u32 via to_u32 named fmt_u32); + ; via to_u32 named stringify_u32); + impl_FormatInto!( + i8, u8, + i16, u16, + i32, u32, + isize, usize, + ; via to_u32 named stringify_u32); + + impl_NumBuffer!( + u64, + ; as u64 named stringify_u64); impl_Display!( i64, u64, - ; as u64 via to_u64 named fmt_u64); + ; via to_u64 named stringify_u64); + impl_FormatInto!( + i64, u64, + ; via to_u64 named stringify_u64); impl_Exp!(i8, u8, i16, u16, i32, u32, isize, usize as u32 via to_u32 named exp_u32); impl_Exp!(i64, u64 as u64 via to_u64 named exp_u64); } +impl_NumBuffer!( + u128, + ; as u128 named stringify_u128); +impl_FormatInto!( + i128, u128, + ; via to_u128 named stringify_u128); + impl_Exp!(i128, u128 as u128 via to_u128 named exp_u128); /// Helper function for writing a u64 into `buf` going from last to first, with `curr`. diff --git a/library/core/src/fmt/num_buffer.rs b/library/core/src/fmt/num_buffer.rs new file mode 100644 index 0000000000000..1c2d61cecb10a --- /dev/null +++ b/library/core/src/fmt/num_buffer.rs @@ -0,0 +1,65 @@ +use crate::mem::MaybeUninit; +use crate::slice::SliceIndex; + +/// 40 is chosen as the buffer length, as it is equal +/// to that required to accommodate i128::MIN, which has the largest +/// decimal string representation +/// (39 decimal digits + 1 for negative sign). +const BUF_SIZE: usize = 40; + +/// A minimal buffer implementation containing elements of type +/// `MaybeUninit`. +#[unstable(feature = "int_format_into", issue = "138215")] +#[derive(Debug)] +pub struct NumBuffer { + /// An array of elements of type `MaybeUninit`. + contents: [MaybeUninit; BUF_SIZE], +} + +#[unstable(feature = "int_format_into", issue = "138215")] +impl NumBuffer { + /// Initializes `contents` as an uninitialized array of `MaybeUninit`. + #[unstable(feature = "int_format_into", issue = "138215")] + pub fn new() -> Self { + NumBuffer { contents: [MaybeUninit::::uninit(); BUF_SIZE] } + } + + /// Returns the length of the buffer. + #[unstable(feature = "int_format_into", issue = "138215")] + pub fn len(&self) -> usize { + BUF_SIZE + } + + /// Extracts a slice of the contents of the buffer. + /// This function is unsafe, since it does not itself + /// bounds-check `index`. + /// + /// SAFETY: `index` is bounds-checked by the caller. + #[unstable(feature = "int_format_into", issue = "138215")] + pub(crate) unsafe fn extract(&self, index: I) -> &I::Output + where + I: SliceIndex<[MaybeUninit]>, + { + // SAFETY: `index` is bound-checked by the caller. + unsafe { self.contents.get_unchecked(index) } + } + + /// Returns a mutable pointer pointing to the start of the buffer. + #[unstable(feature = "int_format_into", issue = "138215")] + #[cfg(feature = "optimize_for_size")] + pub(crate) fn extract_start_mut_ptr(buf: &mut Self) -> *mut u8 { + MaybeUninit::slice_as_mut_ptr(&mut buf.contents) + } + + /// Writes data at index `offset` of the buffer. + /// This function is unsafe, since it does not itself perform + /// the safety checks below. + /// + /// SAFETY: The caller ensures the following: + /// 1. `offset` is bounds-checked. + /// 2. `data` is a valid ASCII character. + #[unstable(feature = "int_format_into", issue = "138215")] + pub(crate) unsafe fn write(&mut self, offset: usize, data: u8) { + self.contents[offset].write(data); + } +}