Auto merge of rust-lang#136932 - m-ou-se:fmt-width-precision-u16, r=scottmcm

bors · bors · commit 1119688c98bc · 2025-03-11T04:07:05.000Z
Reduce formatting `width` and `precision` to 16 bits This is part of rust-lang#99012 This is reduces the `width` and `precision` fields in format strings to 16 bits. They are currently full `usize`s, but it's a bit nonsensical that we need to support the case where someone wants to pad their value to eighteen quintillion spaces and/or have eighteen quintillion digits of precision. By reducing these fields to 16 bit, we can reduce `FormattingOptions` to 64 bits (see rust-lang#136974) and improve the in memory representation of `format_args!()`. (See additional context below.) This also fixes a bug where the width or precision is silently truncated when cross-compiling to a target with a smaller `usize`. By reducing the width and precision fields to the minimum guaranteed size of `usize`, 16 bits, this bug is eliminated. This is a breaking change, but affects almost no existing code. --- Details of this change: There are three ways to set a width or precision today: 1. Directly a formatting string, e.g. `println!("{a:1234}")` 2. Indirectly in a formatting string, e.g. `println!("{a:width$}", width=1234)` 3. Through the unstable `FormattingOptions::width` method. This PR: - Adds a compiler error for 1. (`println!("{a:9999999}")` no longer compiles and gives a clear error.) - Adds a runtime check for 2. (`println!("{a:width$}, width=9999999)` will panic.) - Changes the signatures of the (unstable) `FormattingOptions::[get_]width` methods to use a `u16` instead. --- Additional context for improving `FormattingOptions` and `fmt::Arguments`: All the formatting flags and options are currently: - The `+` flag (1 bit) - The `-` flag (1 bit) - The `#` flag (1 bit) - The `0` flag (1 bit) - The `x?` flag (1 bit) - The `X?` flag (1 bit) - The alignment (2 bits) - The fill character (21 bits) - Whether a width is specified (1 bit) - Whether a precision is specified (1 bit) - If used, the width (a full usize) - If used, the precision (a full usize) Everything except the last two can simply fit in a `u32` (those add up to 31 bits in total). If we can accept a max width and precision of u16::MAX, we can make a `FormattingOptions` that is exactly 64 bits in size; the same size as a thin reference on most platforms. If, additionally, we also limit the number of formatting arguments, we can also reduce the size of `fmt::Arguments` (that is, of a `format_args!()` expression).
diff --git a/core/src/fmt/float.rs b/core/src/fmt/float.rs
@@ -29,7 +29,7 @@ fn float_to_decimal_common_exact<T>(
     fmt: &mut Formatter<'_>,
     num: &T,
     sign: flt2dec::Sign,
-    precision: usize,
+    precision: u16,
 ) -> Result
 where
     T: flt2dec::DecodableFloat,
@@ -40,7 +40,7 @@ where
         flt2dec::strategy::grisu::format_exact,
         *num,
         sign,
-        precision,
+        precision.into(),
         &mut buf,
         &mut parts,
     );
@@ -55,7 +55,7 @@ fn float_to_decimal_common_shortest<T>(
     fmt: &mut Formatter<'_>,
     num: &T,
     sign: flt2dec::Sign,
-    precision: usize,
+    precision: u16,
 ) -> Result
 where
     T: flt2dec::DecodableFloat,
@@ -68,7 +68,7 @@ where
         flt2dec::strategy::grisu::format_shortest,
         *num,
         sign,
-        precision,
+        precision.into(),
         &mut buf,
         &mut parts,
     );
@@ -101,7 +101,7 @@ fn float_to_exponential_common_exact<T>(
     fmt: &mut Formatter<'_>,
     num: &T,
     sign: flt2dec::Sign,
-    precision: usize,
+    precision: u16,
     upper: bool,
 ) -> Result
 where
@@ -113,7 +113,7 @@ where
         flt2dec::strategy::grisu::format_exact,
         *num,
         sign,
-        precision,
+        precision.into(),
         upper,
         &mut buf,
         &mut parts,
diff --git a/core/src/fmt/mod.rs b/core/src/fmt/mod.rs
@@ -294,8 +294,8 @@ pub struct FormattingOptions {
     flags: u32,
     fill: char,
     align: Option<Alignment>,
-    width: Option<usize>,
-    precision: Option<usize>,
+    width: Option<u16>,
+    precision: Option<u16>,
 }
 
 impl FormattingOptions {
@@ -389,7 +389,7 @@ impl FormattingOptions {
     /// the padding specified by [`FormattingOptions::fill`]/[`FormattingOptions::align`]
     /// will be used to take up the required space.
     #[unstable(feature = "formatting_options", issue = "118117")]
-    pub fn width(&mut self, width: Option<usize>) -> &mut Self {
+    pub fn width(&mut self, width: Option<u16>) -> &mut Self {
         self.width = width;
         self
     }
@@ -403,7 +403,7 @@ impl FormattingOptions {
     /// - For floating-point types, this indicates how many digits after the
     /// decimal point should be printed.
     #[unstable(feature = "formatting_options", issue = "118117")]
-    pub fn precision(&mut self, precision: Option<usize>) -> &mut Self {
+    pub fn precision(&mut self, precision: Option<u16>) -> &mut Self {
         self.precision = precision;
         self
     }
@@ -455,12 +455,12 @@ impl FormattingOptions {
     }
     /// Returns the current width.
     #[unstable(feature = "formatting_options", issue = "118117")]
-    pub const fn get_width(&self) -> Option<usize> {
+    pub const fn get_width(&self) -> Option<u16> {
         self.width
     }
     /// Returns the current precision.
     #[unstable(feature = "formatting_options", issue = "118117")]
-    pub const fn get_precision(&self) -> Option<usize> {
+    pub const fn get_precision(&self) -> Option<u16> {
         self.precision
     }
     /// Returns the current precision.
@@ -1499,15 +1499,18 @@ unsafe fn run(fmt: &mut Formatter<'_>, arg: &rt::Placeholder, args: &[rt::Argume
     unsafe { value.fmt(fmt) }
 }
 
-unsafe fn getcount(args: &[rt::Argument<'_>], cnt: &rt::Count) -> Option<usize> {
+unsafe fn getcount(args: &[rt::Argument<'_>], cnt: &rt::Count) -> Option<u16> {
     match *cnt {
+        #[cfg(bootstrap)]
+        rt::Count::Is(n) => Some(n as u16),
+        #[cfg(not(bootstrap))]
         rt::Count::Is(n) => Some(n),
         rt::Count::Implied => None,
         rt::Count::Param(i) => {
             debug_assert!(i < args.len());
             // SAFETY: cnt and args come from the same Arguments,
             // which guarantees this index is always within bounds.
-            unsafe { args.get_unchecked(i).as_usize() }
+            unsafe { args.get_unchecked(i).as_u16() }
         }
     }
 }
@@ -1516,11 +1519,11 @@ unsafe fn getcount(args: &[rt::Argument<'_>], cnt: &rt::Count) -> Option<usize>
 #[must_use = "don't forget to write the post padding"]
 pub(crate) struct PostPadding {
     fill: char,
-    padding: usize,
+    padding: u16,
 }
 
 impl PostPadding {
-    fn new(fill: char, padding: usize) -> PostPadding {
+    fn new(fill: char, padding: u16) -> PostPadding {
         PostPadding { fill, padding }
     }
 
@@ -1634,7 +1637,7 @@ impl<'a> Formatter<'a> {
             }
             // Check if we're over the minimum width, if so then we can also
             // just write the bytes.
-            Some(min) if width >= min => {
+            Some(min) if width >= usize::from(min) => {
                 write_prefix(self, sign, prefix)?;
                 self.buf.write_str(buf)
             }
@@ -1645,7 +1648,7 @@ impl<'a> Formatter<'a> {
                 let old_align =
                     crate::mem::replace(&mut self.options.align, Some(Alignment::Right));
                 write_prefix(self, sign, prefix)?;
-                let post_padding = self.padding(min - width, Alignment::Right)?;
+                let post_padding = self.padding(min - width as u16, Alignment::Right)?;
                 self.buf.write_str(buf)?;
                 post_padding.write(self)?;
                 self.options.fill = old_fill;
@@ -1654,7 +1657,7 @@ impl<'a> Formatter<'a> {
             }
             // Otherwise, the sign and prefix goes after the padding
             Some(min) => {
-                let post_padding = self.padding(min - width, Alignment::Right)?;
+                let post_padding = self.padding(min - width as u16, Alignment::Right)?;
                 write_prefix(self, sign, prefix)?;
                 self.buf.write_str(buf)?;
                 post_padding.write(self)
@@ -1702,26 +1705,26 @@ impl<'a> Formatter<'a> {
         // string being formatted.
         let (s, char_count) = if let Some(max_char_count) = self.options.precision {
             let mut iter = s.char_indices();
-            let remaining = match iter.advance_by(max_char_count) {
+            let remaining = match iter.advance_by(usize::from(max_char_count)) {
                 Ok(()) => 0,
                 Err(remaining) => remaining.get(),
             };
             // SAFETY: The offset of `.char_indices()` is guaranteed to be
             // in-bounds and between character boundaries.
             let truncated = unsafe { s.get_unchecked(..iter.offset()) };
-            (truncated, max_char_count - remaining)
+            (truncated, usize::from(max_char_count) - remaining)
         } else {
             // Use the optimized char counting algorithm for the full string.
             (s, s.chars().count())
         };
 
         // The `width` field is more of a minimum width parameter at this point.
         if let Some(width) = self.options.width
-            && char_count < width
+            && char_count < usize::from(width)
         {
             // If we're under the minimum width, then fill up the minimum width
             // with the specified string + some alignment.
-            let post_padding = self.padding(width - char_count, Alignment::Left)?;
+            let post_padding = self.padding(width - char_count as u16, Alignment::Left)?;
             self.buf.write_str(s)?;
             post_padding.write(self)
         } else {
@@ -1737,7 +1740,7 @@ impl<'a> Formatter<'a> {
     /// thing that is being padded.
     pub(crate) fn padding(
         &mut self,
-        padding: usize,
+        padding: u16,
         default: Alignment,
     ) -> result::Result<PostPadding, Error> {
         let align = self.align().unwrap_or(default);
@@ -1777,19 +1780,19 @@ impl<'a> Formatter<'a> {
 
                 // remove the sign from the formatted parts
                 formatted.sign = "";
-                width = width.saturating_sub(sign.len());
+                width = width.saturating_sub(sign.len() as u16);
                 self.options.fill = '0';
                 self.options.align = Some(Alignment::Right);
             }
 
             // remaining parts go through the ordinary padding process.
             let len = formatted.len();
-            let ret = if width <= len {
+            let ret = if usize::from(width) <= len {
                 // no padding
                 // SAFETY: Per the precondition.
                 unsafe { self.write_formatted_parts(&formatted) }
             } else {
-                let post_padding = self.padding(width - len, Alignment::Right)?;
+                let post_padding = self.padding(width - len as u16, Alignment::Right)?;
                 // SAFETY: Per the precondition.
                 unsafe {
                     self.write_formatted_parts(&formatted)?;
@@ -2021,7 +2024,7 @@ impl<'a> Formatter<'a> {
     #[must_use]
     #[stable(feature = "fmt_flags", since = "1.5.0")]
     pub fn width(&self) -> Option<usize> {
-        self.options.width
+        self.options.width.map(|x| x as usize)
     }
 
     /// Returns the optionally specified precision for numeric types.
@@ -2052,7 +2055,7 @@ impl<'a> Formatter<'a> {
     #[must_use]
     #[stable(feature = "fmt_flags", since = "1.5.0")]
     pub fn precision(&self) -> Option<usize> {
-        self.options.precision
+        self.options.precision.map(|x| x as usize)
     }
 
     /// Determines if the `+` flag was specified.
@@ -2792,7 +2795,7 @@ pub(crate) fn pointer_fmt_inner(ptr_addr: usize, f: &mut Formatter<'_>) -> Resul
         f.options.flags |= 1 << (rt::Flag::SignAwareZeroPad as u32);
 
         if f.options.width.is_none() {
-            f.options.width = Some((usize::BITS / 4) as usize + 2);
+            f.options.width = Some((usize::BITS / 4) as u16 + 2);
         }
     }
     f.options.flags |= 1 << (rt::Flag::Alternate as u32);
diff --git a/core/src/fmt/rt.rs b/core/src/fmt/rt.rs
@@ -47,7 +47,11 @@ pub enum Alignment {
 #[derive(Copy, Clone)]
 pub enum Count {
     /// Specified with a literal number, stores the value
+    #[cfg(bootstrap)]
     Is(usize),
+    /// Specified with a literal number, stores the value
+    #[cfg(not(bootstrap))]
+    Is(u16),
     /// Specified using `$` and `*` syntaxes, stores the index into `args`
     Param(usize),
     /// Not specified
@@ -74,7 +78,7 @@ enum ArgumentType<'a> {
         formatter: unsafe fn(NonNull<()>, &mut Formatter<'_>) -> Result,
         _lifetime: PhantomData<&'a ()>,
     },
-    Count(usize),
+    Count(u16),
 }
 
 /// This struct represents a generic "argument" which is taken by format_args!().
@@ -150,8 +154,12 @@ impl Argument<'_> {
         Self::new(x, UpperExp::fmt)
     }
     #[inline]
+    #[track_caller]
     pub const fn from_usize(x: &usize) -> Argument<'_> {
-        Argument { ty: ArgumentType::Count(*x) }
+        if *x > u16::MAX as usize {
+            panic!("Formatting argument out of range");
+        }
+        Argument { ty: ArgumentType::Count(*x as u16) }
     }
 
     /// Format this placeholder argument.
@@ -181,7 +189,7 @@ impl Argument<'_> {
     }
 
     #[inline]
-    pub(super) const fn as_usize(&self) -> Option<usize> {
+    pub(super) const fn as_u16(&self) -> Option<u16> {
         match self.ty {
             ArgumentType::Count(count) => Some(count),
             ArgumentType::Placeholder { .. } => None,
diff --git a/core/src/time.rs b/core/src/time.rs
@@ -1377,7 +1377,8 @@ impl fmt::Debug for Duration {
                     } else {
                         // We need to add padding. Use the `Formatter::padding` helper function.
                         let default_align = fmt::Alignment::Left;
-                        let post_padding = f.padding(requested_w - actual_w, default_align)?;
+                        let post_padding =
+                            f.padding((requested_w - actual_w) as u16, default_align)?;
                         emit_without_padding(f)?;
                         post_padding.write(f)
                     }
diff --git a/coretests/tests/num/flt2dec/mod.rs b/coretests/tests/num/flt2dec/mod.rs
@@ -577,7 +577,7 @@ where
     }
 
     // very large output
-    assert_eq!(to_string(f, 1.1, Minus, 80000), format!("1.1{:0>79999}", ""));
+    assert_eq!(to_string(f, 1.1, Minus, 50000), format!("1.1{:0>49999}", ""));
 }
 
 pub fn to_shortest_exp_str_test<F>(mut f_: F)
@@ -914,22 +914,22 @@ where
     );
 
     // very large output
-    assert_eq!(to_string(f, 0.0, Minus, 80000, false), format!("0.{:0>79999}e0", ""));
-    assert_eq!(to_string(f, 1.0e1, Minus, 80000, false), format!("1.{:0>79999}e1", ""));
-    assert_eq!(to_string(f, 1.0e0, Minus, 80000, false), format!("1.{:0>79999}e0", ""));
+    assert_eq!(to_string(f, 0.0, Minus, 50000, false), format!("0.{:0>49999}e0", ""));
+    assert_eq!(to_string(f, 1.0e1, Minus, 50000, false), format!("1.{:0>49999}e1", ""));
+    assert_eq!(to_string(f, 1.0e0, Minus, 50000, false), format!("1.{:0>49999}e0", ""));
     assert_eq!(
-        to_string(f, 1.0e-1, Minus, 80000, false),
+        to_string(f, 1.0e-1, Minus, 50000, false),
         format!(
-            "1.000000000000000055511151231257827021181583404541015625{:0>79945}\
+            "1.000000000000000055511151231257827021181583404541015625{:0>49945}\
                         e-1",
             ""
         )
     );
     assert_eq!(
-        to_string(f, 1.0e-20, Minus, 80000, false),
+        to_string(f, 1.0e-20, Minus, 50000, false),
         format!(
             "9.999999999999999451532714542095716517295037027873924471077157760\
-                         66783064379706047475337982177734375{:0>79901}e-21",
+                         66783064379706047475337982177734375{:0>49901}e-21",
             ""
         )
     );
@@ -1150,18 +1150,18 @@ where
     );
 
     // very large output
-    assert_eq!(to_string(f, 0.0, Minus, 80000), format!("0.{:0>80000}", ""));
-    assert_eq!(to_string(f, 1.0e1, Minus, 80000), format!("10.{:0>80000}", ""));
-    assert_eq!(to_string(f, 1.0e0, Minus, 80000), format!("1.{:0>80000}", ""));
+    assert_eq!(to_string(f, 0.0, Minus, 50000), format!("0.{:0>50000}", ""));
+    assert_eq!(to_string(f, 1.0e1, Minus, 50000), format!("10.{:0>50000}", ""));
+    assert_eq!(to_string(f, 1.0e0, Minus, 50000), format!("1.{:0>50000}", ""));
     assert_eq!(
-        to_string(f, 1.0e-1, Minus, 80000),
-        format!("0.1000000000000000055511151231257827021181583404541015625{:0>79945}", "")
+        to_string(f, 1.0e-1, Minus, 50000),
+        format!("0.1000000000000000055511151231257827021181583404541015625{:0>49945}", "")
     );
     assert_eq!(
-        to_string(f, 1.0e-20, Minus, 80000),
+        to_string(f, 1.0e-20, Minus, 50000),
         format!(
             "0.0000000000000000000099999999999999994515327145420957165172950370\
-                          2787392447107715776066783064379706047475337982177734375{:0>79881}",
+                          2787392447107715776066783064379706047475337982177734375{:0>49881}",
             ""
         )
     );