From 41c8e45c255043925a9426bd1024a84ca71fcc63 Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 13:56:16 +0100 Subject: [PATCH 1/9] Extract decimal integer formatting logic into separate functions --- src/libcore/fmt/num.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/libcore/fmt/num.rs b/src/libcore/fmt/num.rs index c8218172583d6..008e133fce28a 100644 --- a/src/libcore/fmt/num.rs +++ b/src/libcore/fmt/num.rs @@ -200,21 +200,32 @@ macro_rules! impl_Display { #[allow(unused_comparisons)] fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let is_nonnegative = *self >= 0; - let mut n = if is_nonnegative { + let n = if is_nonnegative { self.$conv_fn() } else { // convert the negative num to positive by summing 1 to it's 2 complement (!self.$conv_fn()).wrapping_add(1) }; let mut buf: [u8; 39] = unsafe { mem::uninitialized() }; + f.pad_integral(is_nonnegative, "", n.to_str_unsigned(&mut buf)) + } + })+); +} + +macro_rules! impl_to_str_unsigned { + ($($t:ident),*) => ($( + impl ToStrUnsigned for $t { + fn to_str_unsigned(self, buf: &mut [u8; 39]) -> &str { let mut curr = buf.len() as isize; let buf_ptr = buf.as_mut_ptr(); let lut_ptr = DEC_DIGITS_LUT.as_ptr(); + let mut n = self; unsafe { // need at least 16 bits for the 4-characters-at-a-time to work. if ::mem::size_of::<$t>() >= 2 { // eagerly decode 4 characters at a time + #[allow(unused_comparisons, overflowing_literals)] while n >= 10000 { let rem = (n % 10000) as isize; n /= 10000; @@ -253,7 +264,7 @@ macro_rules! impl_Display { str::from_utf8_unchecked( slice::from_raw_parts(buf_ptr.offset(curr), buf.len() - curr as usize)) }; - f.pad_integral(is_nonnegative, "", buf_slice) + buf_slice } })*); } @@ -267,3 +278,9 @@ impl_Display!(isize, usize: to_u16); impl_Display!(isize, usize: to_u32); #[cfg(target_pointer_width = "64")] impl_Display!(isize, usize: to_u64); + +impl_to_str_unsigned!(u8, u16, u32, u64, u128); + +pub(crate) trait ToStrUnsigned { + fn to_str_unsigned(self, buf: &mut [u8; 39]) -> &str; +} From 33693eb674d6d4c07694d36be19fa20584ccb267 Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 15:41:26 +0100 Subject: [PATCH 2/9] Add a to_str method to unsigned integers, taking a pre-allocated buffer --- src/libcore/fmt/mod.rs | 2 +- src/libcore/fmt/num.rs | 53 +++++++++++++++++++++++++++++++----------- src/libcore/num/mod.rs | 19 +++++++++++++++ 3 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/libcore/fmt/mod.rs b/src/libcore/fmt/mod.rs index 897222747f5e6..793b9edd787bc 100644 --- a/src/libcore/fmt/mod.rs +++ b/src/libcore/fmt/mod.rs @@ -22,7 +22,7 @@ use slice; use str; mod float; -mod num; +pub(crate) mod num; mod builders; #[unstable(feature = "fmt_flags_align", issue = "27726")] diff --git a/src/libcore/fmt/num.rs b/src/libcore/fmt/num.rs index 008e133fce28a..9af9c9396c5a9 100644 --- a/src/libcore/fmt/num.rs +++ b/src/libcore/fmt/num.rs @@ -206,22 +206,48 @@ macro_rules! impl_Display { // convert the negative num to positive by summing 1 to it's 2 complement (!self.$conv_fn()).wrapping_add(1) }; - let mut buf: [u8; 39] = unsafe { mem::uninitialized() }; - f.pad_integral(is_nonnegative, "", n.to_str_unsigned(&mut buf)) + unsafe { + let mut buf: [u8; 39] = mem::uninitialized(); + f.pad_integral(is_nonnegative, "", n.to_str_unchecked(&mut buf)) + } } })+); } -macro_rules! impl_to_str_unsigned { +macro_rules! impl_unsigned_to_str { ($($t:ident),*) => ($( - impl ToStrUnsigned for $t { - fn to_str_unsigned(self, buf: &mut [u8; 39]) -> &str { + impl UnsignedToStr for $t { + #[inline] + fn to_str(self, buf: &mut [u8]) -> &mut str { + // python -c 'print([len(str((1<<bits)-1)) for bits in range(128)])' + const DECIMAL_LENGTH_BY_BINARY_LENGTH: [usize; 128] = [ + 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, // 0..15 significant bits + 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, // 16..31 + 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, // 32..47 + 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, // 48..63 + 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 24, 24, 24, // 64..79 + 25, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 28, 29, 29, // 80..95 + 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34, // 96..111 + 34, 35, 35, 35, 35, 36, 36, 36, 37, 37, 37, 38, 38, 38, 38, 39, // 112..127 + ]; + + let bits = ::mem::size_of::<$t>() * 8 - self.leading_zeros() as usize; + assert!(buf.len() >= DECIMAL_LENGTH_BY_BINARY_LENGTH[bits], + "A buffer of length {} is too small to represent {}", buf.len(), self); + unsafe { + self.to_str_unchecked(buf) + } + } + + /// `buf` must be large enough + #[inline] + unsafe fn to_str_unchecked(self, buf: &mut [u8]) -> &mut str { let mut curr = buf.len() as isize; let buf_ptr = buf.as_mut_ptr(); let lut_ptr = DEC_DIGITS_LUT.as_ptr(); let mut n = self; - unsafe { + { // need at least 16 bits for the 4-characters-at-a-time to work. if ::mem::size_of::<$t>() >= 2 { // eagerly decode 4 characters at a time @@ -260,11 +286,9 @@ macro_rules! impl_to_str_unsigned { } } - let buf_slice = unsafe { - str::from_utf8_unchecked( - slice::from_raw_parts(buf_ptr.offset(curr), buf.len() - curr as usize)) - }; - buf_slice + str::from_utf8_unchecked_mut( + slice::from_raw_parts_mut(buf_ptr.offset(curr), buf.len() - curr as usize) + ) } })*); } @@ -279,8 +303,9 @@ impl_Display!(isize, usize: to_u32); #[cfg(target_pointer_width = "64")] impl_Display!(isize, usize: to_u64); -impl_to_str_unsigned!(u8, u16, u32, u64, u128); +impl_unsigned_to_str!(u8, u16, u32, u64, u128); -pub(crate) trait ToStrUnsigned { - fn to_str_unsigned(self, buf: &mut [u8; 39]) -> &str; +pub(crate) trait UnsignedToStr { + fn to_str(self, buf: &mut [u8]) -> &mut str; + unsafe fn to_str_unchecked(self, buf: &mut [u8]) -> &mut str; } diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 1230066e2b33b..47a4c1e787123 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -1315,6 +1315,25 @@ macro_rules! uint_impl { from_str_radix(src, radix) } + /// Writes the decimal representation in a pre-allocated buffer. + /// + /// The returned slice contains no leading zero or plus sign, + /// and is aligned to the *end* of `buffer`. + /// + /// # Panics + /// + /// This function will panic if `buffer` is too small. + /// + /// # Examples + /// + /// ``` + /// assert_eq!(0xFFFF_u32.to_str(&mut [0; 10]), "65635") + /// ``` + #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] + pub fn to_str(self, buffer: &mut [u8]) -> &mut str { + fmt::num::UnsignedToStr::to_str(self as $ActualT, buffer) + } + /// Returns the number of ones in the binary representation of `self`. /// /// # Examples From 7cd95e2f45968a302349ea6307188bed669e8cbe Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 15:42:18 +0100 Subject: [PATCH 3/9] Unindent a no-op block --- src/libcore/fmt/num.rs | 66 ++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/src/libcore/fmt/num.rs b/src/libcore/fmt/num.rs index 9af9c9396c5a9..2f4d1d01ee7db 100644 --- a/src/libcore/fmt/num.rs +++ b/src/libcore/fmt/num.rs @@ -217,7 +217,6 @@ macro_rules! impl_Display { macro_rules! impl_unsigned_to_str { ($($t:ident),*) => ($( impl UnsignedToStr for $t { - #[inline] fn to_str(self, buf: &mut [u8]) -> &mut str { // python -c 'print([len(str((1<<bits)-1)) for bits in range(128)])' const DECIMAL_LENGTH_BY_BINARY_LENGTH: [usize; 128] = [ @@ -240,50 +239,47 @@ macro_rules! impl_unsigned_to_str { } /// `buf` must be large enough - #[inline] unsafe fn to_str_unchecked(self, buf: &mut [u8]) -> &mut str { let mut curr = buf.len() as isize; let buf_ptr = buf.as_mut_ptr(); let lut_ptr = DEC_DIGITS_LUT.as_ptr(); let mut n = self; - { - // need at least 16 bits for the 4-characters-at-a-time to work. - if ::mem::size_of::<$t>() >= 2 { - // eagerly decode 4 characters at a time - #[allow(unused_comparisons, overflowing_literals)] - while n >= 10000 { - let rem = (n % 10000) as isize; - n /= 10000; - - let d1 = (rem / 100) << 1; - let d2 = (rem % 100) << 1; - curr -= 4; - ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); - ptr::copy_nonoverlapping(lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2); - } + // need at least 16 bits for the 4-characters-at-a-time to work. + if ::mem::size_of::<$t>() >= 2 { + // eagerly decode 4 characters at a time + #[allow(unused_comparisons, overflowing_literals)] + while n >= 10000 { + let rem = (n % 10000) as isize; + n /= 10000; + + let d1 = (rem / 100) << 1; + let d2 = (rem % 100) << 1; + curr -= 4; + ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); + ptr::copy_nonoverlapping(lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2); } + } - // if we reach here numbers are <= 9999, so at most 4 chars long - let mut n = n as isize; // possibly reduce 64bit math + // if we reach here numbers are <= 9999, so at most 4 chars long + let mut n = n as isize; // possibly reduce 64bit math - // decode 2 more chars, if > 2 chars - if n >= 100 { - let d1 = (n % 100) << 1; - n /= 100; - curr -= 2; - ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); - } + // decode 2 more chars, if > 2 chars + if n >= 100 { + let d1 = (n % 100) << 1; + n /= 100; + curr -= 2; + ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); + } - // decode last 1 or 2 chars - if n < 10 { - curr -= 1; - *buf_ptr.offset(curr) = (n as u8) + b'0'; - } else { - let d1 = n << 1; - curr -= 2; - ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); - } + // decode last 1 or 2 chars + if n < 10 { + curr -= 1; + *buf_ptr.offset(curr) = (n as u8) + b'0'; + } else { + let d1 = n << 1; + curr -= 2; + ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); } str::from_utf8_unchecked_mut( From 334280c2fd343fb5007ae523b3a8b61168ea32d5 Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 16:56:44 +0100 Subject: [PATCH 4/9] Add a to_str method to signed integers, taking a pre-allocated buffer --- src/libcore/fmt/num.rs | 53 ++++++++++++++++++++++++++++++++++++------ src/libcore/num/mod.rs | 26 ++++++++++++++++++++- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/src/libcore/fmt/num.rs b/src/libcore/fmt/num.rs index 2f4d1d01ee7db..c52bd4fcaa4d8 100644 --- a/src/libcore/fmt/num.rs +++ b/src/libcore/fmt/num.rs @@ -208,7 +208,7 @@ macro_rules! impl_Display { }; unsafe { let mut buf: [u8; 39] = mem::uninitialized(); - f.pad_integral(is_nonnegative, "", n.to_str_unchecked(&mut buf)) + f.pad_integral(is_nonnegative, "", n.to_str_unchecked(&mut buf, false)) } } })+); @@ -217,9 +217,9 @@ macro_rules! impl_Display { macro_rules! impl_unsigned_to_str { ($($t:ident),*) => ($( impl UnsignedToStr for $t { - fn to_str(self, buf: &mut [u8]) -> &mut str { + fn str_len(self) -> usize { // python -c 'print([len(str((1<<bits)-1)) for bits in range(128)])' - const DECIMAL_LENGTH_BY_BINARY_LENGTH: [usize; 128] = [ + const DECIMAL_LENGTH_BY_BINARY_LENGTH: [u8; 128] = [ 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, // 0..15 significant bits 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, // 16..31 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, // 32..47 @@ -231,15 +231,19 @@ macro_rules! impl_unsigned_to_str { ]; let bits = ::mem::size_of::<$t>() * 8 - self.leading_zeros() as usize; - assert!(buf.len() >= DECIMAL_LENGTH_BY_BINARY_LENGTH[bits], + DECIMAL_LENGTH_BY_BINARY_LENGTH[bits] as usize + } + + fn to_str(self, buf: &mut [u8]) -> &mut str { + assert!(buf.len() >= UnsignedToStr::str_len(self), "A buffer of length {} is too small to represent {}", buf.len(), self); unsafe { - self.to_str_unchecked(buf) + self.to_str_unchecked(buf, false) } } /// `buf` must be large enough - unsafe fn to_str_unchecked(self, buf: &mut [u8]) -> &mut str { + unsafe fn to_str_unchecked(self, buf: &mut [u8], minus_sign: bool) -> &mut str { let mut curr = buf.len() as isize; let buf_ptr = buf.as_mut_ptr(); let lut_ptr = DEC_DIGITS_LUT.as_ptr(); @@ -282,6 +286,11 @@ macro_rules! impl_unsigned_to_str { ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); } + if minus_sign { + curr -= 1; + *buf_ptr.offset(curr) = b'-'; + } + str::from_utf8_unchecked_mut( slice::from_raw_parts_mut(buf_ptr.offset(curr), buf.len() - curr as usize) ) @@ -289,6 +298,30 @@ macro_rules! impl_unsigned_to_str { })*); } +macro_rules! impl_signed_to_str { + ($($t:ident $conv_fn: ident),*) => ($( + impl SignedToStr for $t { + fn to_str(self, buf: &mut [u8]) -> &mut str { + let is_negative = self < 0; + let n = if is_negative { + // convert the negative num to positive by summing 1 to it's 2 complement + (!self.$conv_fn()).wrapping_add(1) + } else { + self.$conv_fn() + }; + let mut str_len = UnsignedToStr::str_len(n); + if is_negative { + str_len += 1 // += "-".len() + } + assert!(buf.len() >= str_len, + "A buffer of length {} is too small to represent {}", buf.len(), self); + unsafe { + n.to_str_unchecked(buf, is_negative) + } + } + })*); +} + impl_Display!(i8, u8, i16, u16, i32, u32: to_u32); impl_Display!(i64, u64: to_u64); impl_Display!(i128, u128: to_u128); @@ -300,8 +333,14 @@ impl_Display!(isize, usize: to_u32); impl_Display!(isize, usize: to_u64); impl_unsigned_to_str!(u8, u16, u32, u64, u128); +impl_signed_to_str!(i8 to_u8, i16 to_u16, i32 to_u32, i64 to_u64, i128 to_u128); pub(crate) trait UnsignedToStr { + fn str_len(self) -> usize; + fn to_str(self, buf: &mut [u8]) -> &mut str; + unsafe fn to_str_unchecked(self, buf: &mut [u8], minus_sign: bool) -> &mut str; +} + +pub(crate) trait SignedToStr { fn to_str(self, buf: &mut [u8]) -> &mut str; - unsafe fn to_str_unchecked(self, buf: &mut [u8]) -> &mut str; } diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 47a4c1e787123..2fe7bdfc23d7d 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -150,6 +150,28 @@ macro_rules! int_impl { from_str_radix(src, radix) } + /// Writes the decimal representation in a pre-allocated buffer. + /// + /// The returned slice starts with a minus sign for negative values + /// but contains no leading zero or plus sign, + /// and is aligned to the *end* of `buffer`. + /// + /// # Panics + /// + /// This function will panic if `buffer` is too small. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_to_str)] + /// + /// assert_eq!(i16::min_value().to_str(&mut [0; 6]), "-32768") + /// ``` + #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] + pub fn to_str(self, buffer: &mut [u8]) -> &mut str { + fmt::num::SignedToStr::to_str(self as $ActualT, buffer) + } + /// Returns the number of ones in the binary representation of `self`. /// /// # Examples @@ -1327,7 +1349,9 @@ macro_rules! uint_impl { /// # Examples /// /// ``` - /// assert_eq!(0xFFFF_u32.to_str(&mut [0; 10]), "65635") + /// #![feature(int_to_str)] + /// + /// assert_eq!(0xFFFF_u32.to_str(&mut [0; 10]), "65535") /// ``` #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] pub fn to_str(self, buffer: &mut [u8]) -> &mut str { From a3a0714ee70f198a6476edb4753e411596349fb6 Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 18:23:22 +0100 Subject: [PATCH 5/9] UnsignedToStr::str_len: replace lookup table by some integer arithmetic --- src/libcore/fmt/num.rs | 47 +++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/src/libcore/fmt/num.rs b/src/libcore/fmt/num.rs index c52bd4fcaa4d8..c25b8dba3c15c 100644 --- a/src/libcore/fmt/num.rs +++ b/src/libcore/fmt/num.rs @@ -217,21 +217,40 @@ macro_rules! impl_Display { macro_rules! impl_unsigned_to_str { ($($t:ident),*) => ($( impl UnsignedToStr for $t { + /// Returns `self.to_string().len()` fn str_len(self) -> usize { - // python -c 'print([len(str((1<<bits)-1)) for bits in range(128)])' - const DECIMAL_LENGTH_BY_BINARY_LENGTH: [u8; 128] = [ - 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, // 0..15 significant bits - 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, // 16..31 - 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, // 32..47 - 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 19, // 48..63 - 20, 20, 20, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 24, 24, 24, // 64..79 - 25, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 28, 29, 29, // 80..95 - 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 32, 33, 33, 33, 34, 34, // 96..111 - 34, 35, 35, 35, 35, 36, 36, 36, 37, 37, 37, 38, 38, 38, 38, 39, // 112..127 - ]; - - let bits = ::mem::size_of::<$t>() * 8 - self.leading_zeros() as usize; - DECIMAL_LENGTH_BY_BINARY_LENGTH[bits] as usize + if self == 0 { + // We use one "0" digit even though the mathematical answer + // is zero significant decimal digits. + return 1 + } + + #[inline] + fn ceiling_div(numerator: u32, denominator: u32) -> u32 { + (numerator + denominator - 1) / denominator + } + + let type_bits = ::mem::size_of::<$t>() as u32 * 8; + + // This is the number of bits in `self`, ignoring leading zeros. + // It is equal to `ceil(log2(self + 1))`. + let bits = type_bits - self.leading_zeros(); + + // `28 / 93` is an approximation of `log(2) / log(10)`. + // So if we take `bits` as an approximation of `log2(self + 1)`, + // this is an approximation of `ceil(log10(self + 1))`. + let approx_log10 = ceiling_div(bits * 28, 93); + + // Because of integer rounding, this approximation turns out to be good enough + // to provide the exact result we want for all values up to 128 bits. + // This can be verified by running: + // + // ```python + // print(all(len(str((1 << bits) - 1)) == (bits * 28 + 92) // 93 + // for bits in range(1, 128))) + // ``` + // python -c 'print(all(len(str((1<<bits)-1)) == (bits * 28 + 92)//93 for bits in range(1, 128)))' + approx_log10 as usize } fn to_str(self, buf: &mut [u8]) -> &mut str { From 3f9bacb4b5449e59605cf1916f471c26a7737941 Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 19:16:44 +0100 Subject: [PATCH 6/9] <$Int>::str_len based on bit length was wrong, always require an upper bound. For example, 99 would be assume to have decimal length 3 since it has the same bit length as 127. --- src/libcore/fmt/num.rs | 51 +++++------------------------------------- src/libcore/num/mod.rs | 44 ++++++++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 49 deletions(-) diff --git a/src/libcore/fmt/num.rs b/src/libcore/fmt/num.rs index c25b8dba3c15c..aec52181f6056 100644 --- a/src/libcore/fmt/num.rs +++ b/src/libcore/fmt/num.rs @@ -217,45 +217,10 @@ macro_rules! impl_Display { macro_rules! impl_unsigned_to_str { ($($t:ident),*) => ($( impl UnsignedToStr for $t { - /// Returns `self.to_string().len()` - fn str_len(self) -> usize { - if self == 0 { - // We use one "0" digit even though the mathematical answer - // is zero significant decimal digits. - return 1 - } - - #[inline] - fn ceiling_div(numerator: u32, denominator: u32) -> u32 { - (numerator + denominator - 1) / denominator - } - - let type_bits = ::mem::size_of::<$t>() as u32 * 8; - - // This is the number of bits in `self`, ignoring leading zeros. - // It is equal to `ceil(log2(self + 1))`. - let bits = type_bits - self.leading_zeros(); - - // `28 / 93` is an approximation of `log(2) / log(10)`. - // So if we take `bits` as an approximation of `log2(self + 1)`, - // this is an approximation of `ceil(log10(self + 1))`. - let approx_log10 = ceiling_div(bits * 28, 93); - - // Because of integer rounding, this approximation turns out to be good enough - // to provide the exact result we want for all values up to 128 bits. - // This can be verified by running: - // - // ```python - // print(all(len(str((1 << bits) - 1)) == (bits * 28 + 92) // 93 - // for bits in range(1, 128))) - // ``` - // python -c 'print(all(len(str((1<<bits)-1)) == (bits * 28 + 92)//93 for bits in range(1, 128)))' - approx_log10 as usize - } - fn to_str(self, buf: &mut [u8]) -> &mut str { - assert!(buf.len() >= UnsignedToStr::str_len(self), - "A buffer of length {} is too small to represent {}", buf.len(), self); + assert!(buf.len() >= $t::MAX_STR_LEN, concat!( + "A buffer of length ", stringify!($t), "::MAX_STR_LEN or more is required." + )); unsafe { self.to_str_unchecked(buf, false) } @@ -321,6 +286,9 @@ macro_rules! impl_signed_to_str { ($($t:ident $conv_fn: ident),*) => ($( impl SignedToStr for $t { fn to_str(self, buf: &mut [u8]) -> &mut str { + assert!(buf.len() >= $t::MAX_STR_LEN, concat!( + "A buffer of length ", stringify!($t), "::MAX_STR_LEN or more is required." + )); let is_negative = self < 0; let n = if is_negative { // convert the negative num to positive by summing 1 to it's 2 complement @@ -328,12 +296,6 @@ macro_rules! impl_signed_to_str { } else { self.$conv_fn() }; - let mut str_len = UnsignedToStr::str_len(n); - if is_negative { - str_len += 1 // += "-".len() - } - assert!(buf.len() >= str_len, - "A buffer of length {} is too small to represent {}", buf.len(), self); unsafe { n.to_str_unchecked(buf, is_negative) } @@ -355,7 +317,6 @@ impl_unsigned_to_str!(u8, u16, u32, u64, u128); impl_signed_to_str!(i8 to_u8, i16 to_u16, i32 to_u32, i64 to_u64, i128 to_u128); pub(crate) trait UnsignedToStr { - fn str_len(self) -> usize; fn to_str(self, buf: &mut [u8]) -> &mut str; unsafe fn to_str_unchecked(self, buf: &mut [u8], minus_sign: bool) -> &mut str; } diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 2fe7bdfc23d7d..af70624726741 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -99,6 +99,7 @@ pub mod diy_float; // `Int` + `SignedInt` implemented for signed integers macro_rules! int_impl { ($SelfT:ty, $ActualT:ident, $UnsignedT:ty, $BITS:expr, + MAX_STR_LEN = $MAX_STR_LEN: expr, $add_with_overflow:path, $sub_with_overflow:path, $mul_with_overflow:path) => { @@ -158,20 +159,29 @@ macro_rules! int_impl { /// /// # Panics /// - /// This function will panic if `buffer` is too small. + /// This function will panic if `buffer` is smaller than [`MAX_STR_LEN`]. + /// + /// [`MAX_STR_LEN`]: #associatedconstant.MAX_STR_LEN /// /// # Examples /// /// ``` /// #![feature(int_to_str)] /// - /// assert_eq!(i16::min_value().to_str(&mut [0; 6]), "-32768") + /// assert_eq!(i16::min_value().to_str(&mut [0; i16::MAX_STR_LEN]), "-32768") /// ``` #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] pub fn to_str(self, buffer: &mut [u8]) -> &mut str { fmt::num::SignedToStr::to_str(self as $ActualT, buffer) } + /// The maximum length of the decimal representation of a value of this type. + /// This is intended to be used together with [`to_str`]. + /// + /// [`MAX_STR_LEN`]: #method.to_str + #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] + pub const MAX_STR_LEN: usize = $MAX_STR_LEN; + /// Returns the number of ones in the binary representation of `self`. /// /// # Examples @@ -1222,6 +1232,7 @@ macro_rules! int_impl { #[lang = "i8"] impl i8 { int_impl! { i8, i8, u8, 8, + MAX_STR_LEN = 4, // i8::min_value().to_string().len() intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } @@ -1230,6 +1241,7 @@ impl i8 { #[lang = "i16"] impl i16 { int_impl! { i16, i16, u16, 16, + MAX_STR_LEN = 6, // i16::min_value().to_string().len() intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } @@ -1238,6 +1250,7 @@ impl i16 { #[lang = "i32"] impl i32 { int_impl! { i32, i32, u32, 32, + MAX_STR_LEN = 11, // i32::min_value().to_string().len() intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } @@ -1246,6 +1259,7 @@ impl i32 { #[lang = "i64"] impl i64 { int_impl! { i64, i64, u64, 64, + MAX_STR_LEN = 20, // i64::min_value().to_string().len() intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } @@ -1254,6 +1268,7 @@ impl i64 { #[lang = "i128"] impl i128 { int_impl! { i128, i128, u128, 128, + MAX_STR_LEN = 40, // i128::min_value().to_string().len() intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } @@ -1263,6 +1278,7 @@ impl i128 { #[lang = "isize"] impl isize { int_impl! { isize, i16, u16, 16, + MAX_STR_LEN = i16::MAX_STR_LEN, intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } @@ -1272,6 +1288,7 @@ impl isize { #[lang = "isize"] impl isize { int_impl! { isize, i32, u32, 32, + MAX_STR_LEN = i32::MAX_STR_LEN, intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } @@ -1281,6 +1298,7 @@ impl isize { #[lang = "isize"] impl isize { int_impl! { isize, i64, u64, 64, + MAX_STR_LEN = i64::MAX_STR_LEN, intrinsics::add_with_overflow, intrinsics::sub_with_overflow, intrinsics::mul_with_overflow } @@ -1289,6 +1307,7 @@ impl isize { // `Int` + `UnsignedInt` implemented for unsigned integers macro_rules! uint_impl { ($SelfT:ty, $ActualT:ty, $BITS:expr, + MAX_STR_LEN = $MAX_STR_LEN: expr, $ctpop:path, $ctlz:path, $ctlz_nonzero:path, @@ -1344,20 +1363,29 @@ macro_rules! uint_impl { /// /// # Panics /// - /// This function will panic if `buffer` is too small. + /// This function will panic if `buffer` is smaller than [`MAX_STR_LEN`]. + /// + /// [`MAX_STR_LEN`]: #associatedconstant.MAX_STR_LEN /// /// # Examples /// /// ``` /// #![feature(int_to_str)] /// - /// assert_eq!(0xFFFF_u32.to_str(&mut [0; 10]), "65535") + /// assert_eq!(0xFFFF_u32.to_str(&mut [0; u32::MAX_STR_LEN]), "65535") /// ``` #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] pub fn to_str(self, buffer: &mut [u8]) -> &mut str { fmt::num::UnsignedToStr::to_str(self as $ActualT, buffer) } + /// The maximum length of the decimal representation of a value of this type. + /// This is intended to be used together with [`to_str`]. + /// + /// [`MAX_STR_LEN`]: #method.to_str + #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] + pub const MAX_STR_LEN: usize = $MAX_STR_LEN; + /// Returns the number of ones in the binary representation of `self`. /// /// # Examples @@ -2294,6 +2322,7 @@ macro_rules! uint_impl { #[lang = "u8"] impl u8 { uint_impl! { u8, u8, 8, + MAX_STR_LEN = 3, // u8::min_value().to_string().len() intrinsics::ctpop, intrinsics::ctlz, intrinsics::ctlz_nonzero, @@ -2848,6 +2877,7 @@ impl u8 { #[lang = "u16"] impl u16 { uint_impl! { u16, u16, 16, + MAX_STR_LEN = 5, // u16::min_value().to_string().len() intrinsics::ctpop, intrinsics::ctlz, intrinsics::ctlz_nonzero, @@ -2861,6 +2891,7 @@ impl u16 { #[lang = "u32"] impl u32 { uint_impl! { u32, u32, 32, + MAX_STR_LEN = 10, // u32::min_value().to_string().len() intrinsics::ctpop, intrinsics::ctlz, intrinsics::ctlz_nonzero, @@ -2874,6 +2905,7 @@ impl u32 { #[lang = "u64"] impl u64 { uint_impl! { u64, u64, 64, + MAX_STR_LEN = 20, // u64::min_value().to_string().len() intrinsics::ctpop, intrinsics::ctlz, intrinsics::ctlz_nonzero, @@ -2887,6 +2919,7 @@ impl u64 { #[lang = "u128"] impl u128 { uint_impl! { u128, u128, 128, + MAX_STR_LEN = 40, // u128::min_value().to_string().len() intrinsics::ctpop, intrinsics::ctlz, intrinsics::ctlz_nonzero, @@ -2901,6 +2934,7 @@ impl u128 { #[lang = "usize"] impl usize { uint_impl! { usize, u16, 16, + MAX_STR_LEN = u16::MAX_STR_LEN, intrinsics::ctpop, intrinsics::ctlz, intrinsics::ctlz_nonzero, @@ -2914,6 +2948,7 @@ impl usize { #[lang = "usize"] impl usize { uint_impl! { usize, u32, 32, + MAX_STR_LEN = u32::MAX_STR_LEN, intrinsics::ctpop, intrinsics::ctlz, intrinsics::ctlz_nonzero, @@ -2928,6 +2963,7 @@ impl usize { #[lang = "usize"] impl usize { uint_impl! { usize, u64, 64, + MAX_STR_LEN = u64::MAX_STR_LEN, intrinsics::ctpop, intrinsics::ctlz, intrinsics::ctlz_nonzero, From 3de7bfeb5bc96effd4c5bb56bd3be9b0a435419d Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 20:22:24 +0100 Subject: [PATCH 7/9] Add to_uppercase_str_radix and to_lowercase_str_radix to primitive integers --- src/libcore/fmt/num.rs | 56 ++++++++++++++++++ src/libcore/num/mod.rs | 126 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+) diff --git a/src/libcore/fmt/num.rs b/src/libcore/fmt/num.rs index aec52181f6056..cd56c587ea0b3 100644 --- a/src/libcore/fmt/num.rs +++ b/src/libcore/fmt/num.rs @@ -25,6 +25,7 @@ trait Int: PartialEq + PartialOrd + Div<Output=Self> + Rem<Output=Self> + Sub<Output=Self> + Copy { fn zero() -> Self; fn from_u8(u: u8) -> Self; + fn from_u32(u: u32) -> Self; fn to_u8(&self) -> u8; fn to_u16(&self) -> u16; fn to_u32(&self) -> u32; @@ -36,6 +37,7 @@ macro_rules! doit { ($($t:ident)*) => ($(impl Int for $t { fn zero() -> $t { 0 } fn from_u8(u: u8) -> $t { u as $t } + fn from_u32(u: u32) -> $t { u as $t } fn to_u8(&self) -> u8 { *self as u8 } fn to_u16(&self) -> u16 { *self as u16 } fn to_u32(&self) -> u32 { *self as u32 } @@ -279,6 +281,46 @@ macro_rules! impl_unsigned_to_str { slice::from_raw_parts_mut(buf_ptr.offset(curr), buf.len() - curr as usize) ) } + + fn to_str_radix(self, buf: &mut [u8], radix: u32, uppercase: bool, minus_sign: bool) + -> &mut str { + assert!(2 <= radix && radix <= 36, "radix must be between 2 and 36 inclusive"); + let radix = $t::from_u32(radix); + let mut curr = buf.len(); + macro_rules! next { + () => { + match curr.checked_sub(1) { + Some(next) => curr = next, + None => panic!( + "A buffer of length {} is too small to represent {} in base {}", + buf.len(), self, radix + ) + } + } + } + let mut n = self; + loop { + next!(); + let digit = (n % radix).to_u8(); + buf[curr] = digit + if digit < 10 { + b'0' + } else if uppercase { + b'A' - 10 + } else { + b'a' - 10 + }; + n /= radix; + if n == 0 { + if minus_sign { + next!(); + buf[curr] = b'-' + } + return unsafe { + str::from_utf8_unchecked_mut(&mut buf[curr..]) + } + } + } + } })*); } @@ -300,6 +342,17 @@ macro_rules! impl_signed_to_str { n.to_str_unchecked(buf, is_negative) } } + + fn to_str_radix(self, buf: &mut [u8], radix: u32, uppercase: bool) -> &mut str { + let is_negative = self < 0; + let n = if is_negative { + // convert the negative num to positive by summing 1 to it's 2 complement + (!self.$conv_fn()).wrapping_add(1) + } else { + self.$conv_fn() + }; + n.to_str_radix(buf, radix, uppercase, is_negative) + } })*); } @@ -319,8 +372,11 @@ impl_signed_to_str!(i8 to_u8, i16 to_u16, i32 to_u32, i64 to_u64, i128 to_u128); pub(crate) trait UnsignedToStr { fn to_str(self, buf: &mut [u8]) -> &mut str; unsafe fn to_str_unchecked(self, buf: &mut [u8], minus_sign: bool) -> &mut str; + fn to_str_radix(self, buf: &mut [u8], radix: u32, uppercase: bool, minus_sign: bool) + -> &mut str; } pub(crate) trait SignedToStr { fn to_str(self, buf: &mut [u8]) -> &mut str; + fn to_str_radix(self, buf: &mut [u8], radix: u32, uppercase: bool) -> &mut str; } diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index af70624726741..5137cc52256f6 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -151,6 +151,66 @@ macro_rules! int_impl { from_str_radix(src, radix) } + /// Write the representation in a given base in a pre-allocated buffer. + /// + /// Digits are a subset (depending on `radix`) of `0-9A-Z`. + /// + /// The returned slice starts with a minus sign for negative values + /// but contains no leading zero or plus sign, + /// and is aligned to the *end* of `buffer`. + /// + /// # Panics + /// + /// This function will panic if `radix` is smaller than 2 or larger than 36, + /// or if `buffer` is too small. + /// As a conservative upper bound, `&mut [u8; 128]` is always large enough. + /// + /// # Safety + /// + /// `buffer` may be uninitialized. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_to_str)] + /// + /// assert_eq!((i16::min_value() + 1).to_uppercase_str_radix(&mut [0; 5], 16), "-7FFF") + /// ``` + #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] + pub fn to_uppercase_str_radix(self, buffer: &mut [u8], radix: u32) -> &mut str { + fmt::num::SignedToStr::to_str_radix(self as $ActualT, buffer, radix, true) + } + + /// Write the representation in a given base in a pre-allocated buffer. + /// + /// Digits are a subset (depending on `radix`) of `0-9a-z`. + /// + /// The returned slice starts with a minus sign for negative values + /// but contains no leading zero or plus sign, + /// and is aligned to the *end* of `buffer`. + /// + /// # Panics + /// + /// This function will panic if `radix` is smaller than 2 or larger than 36, + /// or if `buffer` is too small. + /// As a conservative upper bound, `&mut [u8; 128]` is always large enough. + /// + /// # Safety + /// + /// `buffer` may be uninitialized. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_to_str)] + /// + /// assert_eq!((i16::min_value() + 1).to_lowercase_str_radix(&mut [0; 5], 16), "-7fff") + /// ``` + #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] + pub fn to_lowercase_str_radix(self, buffer: &mut [u8], radix: u32) -> &mut str { + fmt::num::SignedToStr::to_str_radix(self as $ActualT, buffer, radix, false) + } + /// Writes the decimal representation in a pre-allocated buffer. /// /// The returned slice starts with a minus sign for negative values @@ -163,6 +223,10 @@ macro_rules! int_impl { /// /// [`MAX_STR_LEN`]: #associatedconstant.MAX_STR_LEN /// + /// # Safety + /// + /// `buffer` may be uninitialized. + /// /// # Examples /// /// ``` @@ -1356,6 +1420,64 @@ macro_rules! uint_impl { from_str_radix(src, radix) } + /// Write the representation in a given base in a pre-allocated buffer. + /// + /// Digits are a subset (depending on `radix`) of `0-9A-F`. + /// + /// The returned slice contains no leading zero or plus sign, + /// and is aligned to the *end* of `buffer`. + /// + /// # Panics + /// + /// This function will panic if `radix` is smaller than 2 or larger than 36, + /// or if `buffer` is too small. + /// As a conservative upper bound, `&mut [u8; 128]` is always large enough. + /// + /// # Safety + /// + /// `buffer` may be uninitialized. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_to_str)] + /// + /// assert_eq!((std::char::MAX as u32).to_uppercase_str_radix(&mut [0; 8], 16), "10FFFF") + /// ``` + #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] + pub fn to_uppercase_str_radix(self, buffer: &mut [u8], radix: u32) -> &mut str { + fmt::num::UnsignedToStr::to_str_radix(self as $ActualT, buffer, radix, true, false) + } + + /// Write the representation in a given base in a pre-allocated buffer. + /// + /// Digits are a subset (depending on `radix`) of `0-9a-z`. + /// + /// The returned slice contains no leading zero or plus sign, + /// and is aligned to the *end* of `buffer`. + /// + /// # Panics + /// + /// This function will panic if `radix` is smaller than 2 or larger than 36, + /// or if `buffer` is too small. + /// As a conservative upper bound, `&mut [u8; 128]` is always large enough. + /// + /// # Safety + /// + /// `buffer` may be uninitialized. + /// + /// # Examples + /// + /// ``` + /// #![feature(int_to_str)] + /// + /// assert_eq!((std::char::MAX as u32).to_lowercase_str_radix(&mut [0; 8], 16), "10ffff") + /// ``` + #[unstable(feature = "int_to_str", issue = /* FIXME */ "0")] + pub fn to_lowercase_str_radix(self, buffer: &mut [u8], radix: u32) -> &mut str { + fmt::num::UnsignedToStr::to_str_radix(self as $ActualT, buffer, radix, false, false) + } + /// Writes the decimal representation in a pre-allocated buffer. /// /// The returned slice contains no leading zero or plus sign, @@ -1367,6 +1489,10 @@ macro_rules! uint_impl { /// /// [`MAX_STR_LEN`]: #associatedconstant.MAX_STR_LEN /// + /// # Safety + /// + /// `buffer` may be uninitialized. + /// /// # Examples /// /// ``` From 53bd486046a741c7d7c9602e11b53c96b2322401 Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 21:04:00 +0100 Subject: [PATCH 8/9] Replace a magic number with a constant --- src/libcore/fmt/num.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libcore/fmt/num.rs b/src/libcore/fmt/num.rs index cd56c587ea0b3..7d070fede49c1 100644 --- a/src/libcore/fmt/num.rs +++ b/src/libcore/fmt/num.rs @@ -209,7 +209,7 @@ macro_rules! impl_Display { (!self.$conv_fn()).wrapping_add(1) }; unsafe { - let mut buf: [u8; 39] = mem::uninitialized(); + let mut buf: [u8; i128::MAX_STR_LEN] = mem::uninitialized(); f.pad_integral(is_nonnegative, "", n.to_str_unchecked(&mut buf, false)) } } From ad4edf75b66dec279d8cc4109abbef7362fdb552 Mon Sep 17 00:00:00 2001 From: Simon Sapin <simon.sapin@exyr.org> Date: Sun, 26 Nov 2017 21:49:08 +0100 Subject: [PATCH 9/9] Typo fix --- src/libcore/num/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 5137cc52256f6..bc6cd7b25ab8a 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -1422,7 +1422,7 @@ macro_rules! uint_impl { /// Write the representation in a given base in a pre-allocated buffer. /// - /// Digits are a subset (depending on `radix`) of `0-9A-F`. + /// Digits are a subset (depending on `radix`) of `0-9A-Z`. /// /// The returned slice contains no leading zero or plus sign, /// and is aligned to the *end* of `buffer`.