Skip to content

Commit e38ba06

Browse files
committed
uucode: format: format_float_hexadecimal: Take in &BigDecimal
Display hexadecimal floats with arbitrary precision. Note that some of the logic will produce extremely large BitInt as intermediate values: there is some optimization possible here, but the current implementation appears to work fine for reasonable numbers (e.g. whatever would previously fit in a f64, and even with somewhat large precision).
1 parent 1de44d1 commit e38ba06

File tree

1 file changed

+148
-49
lines changed

1 file changed

+148
-49
lines changed

src/uucore/src/lib/features/format/num_format.rs

Lines changed: 148 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
// spell-checker:ignore bigdecimal prec
66
//! Utilities for formatting numbers in various formats
77
8+
use bigdecimal::num_bigint::ToBigInt;
89
use bigdecimal::BigDecimal;
910
use num_traits::Signed;
10-
use num_traits::ToPrimitive;
1111
use num_traits::Zero;
1212
use std::cmp::min;
1313
use std::io::Write;
@@ -254,24 +254,20 @@ impl Formatter<&ExtendedBigDecimal> for Float {
254254
};
255255

256256
let s = match abs {
257-
ExtendedBigDecimal::BigDecimal(bd) => {
258-
// TODO: Convert format_float_* functions to take in a BigDecimal.
259-
let x = bd.to_f64().unwrap();
260-
match self.variant {
261-
FloatVariant::Decimal => {
262-
format_float_decimal(&bd, self.precision, self.force_decimal)
263-
}
264-
FloatVariant::Scientific => {
265-
format_float_scientific(&bd, self.precision, self.case, self.force_decimal)
266-
}
267-
FloatVariant::Shortest => {
268-
format_float_shortest(&bd, self.precision, self.case, self.force_decimal)
269-
}
270-
FloatVariant::Hexadecimal => {
271-
format_float_hexadecimal(x, self.precision, self.case, self.force_decimal)
272-
}
257+
ExtendedBigDecimal::BigDecimal(bd) => match self.variant {
258+
FloatVariant::Decimal => {
259+
format_float_decimal(&bd, self.precision, self.force_decimal)
273260
}
274-
}
261+
FloatVariant::Scientific => {
262+
format_float_scientific(&bd, self.precision, self.case, self.force_decimal)
263+
}
264+
FloatVariant::Shortest => {
265+
format_float_shortest(&bd, self.precision, self.case, self.force_decimal)
266+
}
267+
FloatVariant::Hexadecimal => {
268+
format_float_hexadecimal(&bd, self.precision, self.case, self.force_decimal)
269+
}
270+
},
275271
_ => format_float_non_finite(&abs, self.case),
276272
};
277273
let sign_indicator = get_sign_indicator(self.positive_sign, negative);
@@ -485,33 +481,109 @@ fn format_float_shortest(
485481
}
486482

487483
fn format_float_hexadecimal(
488-
f: f64,
484+
bd: &BigDecimal,
489485
precision: usize,
490486
case: Case,
491487
force_decimal: ForceDecimal,
492488
) -> String {
493-
debug_assert!(!f.is_sign_negative());
494-
let (first_digit, mantissa, exponent) = if f == 0.0 {
495-
(0, 0, 0)
489+
debug_assert!(!bd.is_negative());
490+
491+
let exp_char = match case {
492+
Case::Lowercase => 'p',
493+
Case::Uppercase => 'P',
494+
};
495+
496+
if BigDecimal::zero().eq(bd) {
497+
return if force_decimal == ForceDecimal::Yes && precision == 0 {
498+
format!("0x0.{exp_char}+0")
499+
} else {
500+
format!("0x{:.*}{exp_char}+0", precision, 0.0)
501+
};
502+
}
503+
504+
// Convert to the form frac10 * 10^exp
505+
let (frac10, p) = bd.as_bigint_and_exponent();
506+
// We cast this to u32 below, but we probably do not care about exponents
507+
// that would overflow u32. We should probably detect this and fail
508+
// gracefully though.
509+
let exp10 = -p;
510+
511+
// We want something that looks like this: frac2 * 2^exp2,
512+
// without losing precision.
513+
// frac10 * 10^exp10 = (frac10 * 5^exp10) * 2^exp10 = frac2 * 2^exp2
514+
515+
// TODO: this is most accurate, but frac2 will grow a lot for large
516+
// precision or exponent, and formatting will get very slow.
517+
// The precision can't technically be a very large number (up to 32-bit int),
518+
// but we can trim some of the lower digits, if we want to only keep what a
519+
// `long double` (80-bit or 128-bit at most) implementation would be able to
520+
// display.
521+
// The exponent is less of a problem if we matched `long double` implementation,
522+
// as a 80/128-bit floats only covers a 15-bit exponent.
523+
524+
let (mut frac2, mut exp2) = if exp10 >= 0 {
525+
// Positive exponent. 5^exp10 is an integer, so we can just multiply.
526+
(frac10 * 5.to_bigint().unwrap().pow(exp10 as u32), exp10)
496527
} else {
497-
let bits = f.to_bits();
498-
let exponent_bits = ((bits >> 52) & 0x7ff) as i64;
499-
let exponent = exponent_bits - 1023;
500-
let mantissa = bits & 0xf_ffff_ffff_ffff;
501-
(1, mantissa, exponent)
528+
// Negative exponent: We're going to need to divide by 5^-exp10,
529+
// so we first shift left by some margin to make sure we do not lose digits.
530+
531+
// We want to make sure we have at least precision+1 hex digits to start with.
532+
// Then, dividing by 5^-exp10 loses at most -exp10*3 binary digits
533+
// (since 5^-exp10 < 8^-exp10), so we add that, and another bit for
534+
// rounding.
535+
let margin = ((precision + 1) as i64 * 4 - frac10.bits() as i64).max(0) + -exp10 * 3 + 1;
536+
537+
// frac10 * 10^exp10 = frac10 * 2^margin * 10^exp10 * 2^-margin =
538+
// (frac10 * 2^margin * 5^exp10) * 2^exp10 * 2^-margin =
539+
// (frac10 * 2^margin / 5^-exp10) * 2^(exp10-margin)
540+
(
541+
(frac10 << margin) / 5.to_bigint().unwrap().pow(-exp10 as u32),
542+
exp10 - margin,
543+
)
502544
};
503545

504-
let mut s = match (precision, force_decimal) {
505-
(0, ForceDecimal::No) => format!("0x{first_digit}p{exponent:+}"),
506-
(0, ForceDecimal::Yes) => format!("0x{first_digit}.p{exponent:+}"),
507-
_ => format!("0x{first_digit}.{mantissa:0>13x}p{exponent:+}"),
546+
// Emulate x86(-64) behavior, we display 4 binary digits before the decimal point,
547+
// so the value will always be between 0x8 and 0xf.
548+
// TODO: Make this configurable? e.g. arm64 only displays 1 digit.
549+
const BEFORE_BITS: usize = 4;
550+
let wanted_bits = (BEFORE_BITS + precision * 4) as u64;
551+
let bits = frac2.bits();
552+
553+
exp2 += bits as i64 - wanted_bits as i64;
554+
if bits > wanted_bits {
555+
// Shift almost all the way, round up if needed, then finish shifting.
556+
frac2 >>= bits - wanted_bits - 1;
557+
let add = frac2.bit(0);
558+
frac2 >>= 1;
559+
560+
if add {
561+
frac2 += 0x1;
562+
if frac2.bits() > wanted_bits {
563+
// We overflowed, drop one more hex digit.
564+
// Note: Yes, the leading hex digit will now contain only 1 binary digit,
565+
// but that emulates coreutils behavior on x86(-64).
566+
frac2 >>= 4;
567+
exp2 += 4;
568+
}
569+
}
570+
} else {
571+
frac2 <<= wanted_bits - bits;
508572
};
509573

510-
if case == Case::Uppercase {
511-
s.make_ascii_uppercase();
512-
}
574+
// Convert "XXX" to "X.XX": that divides by 16^precision = 2^(4*precision), so add that to the exponent.
575+
let digits = frac2.to_str_radix(16);
576+
let (first_digit, remaining_digits) = digits.split_at(1);
577+
let exponent = exp2 + (4 * precision) as i64;
513578

514-
s
579+
let dot =
580+
if !remaining_digits.is_empty() || (precision == 0 && ForceDecimal::Yes == force_decimal) {
581+
"."
582+
} else {
583+
""
584+
};
585+
586+
format!("0x{first_digit}{dot}{remaining_digits}{exp_char}{exponent:+}")
515587
}
516588

517589
fn strip_fractional_zeroes_and_dot(s: &mut String) {
@@ -782,21 +854,48 @@ mod test {
782854

783855
#[test]
784856
fn hexadecimal_float() {
857+
// It's important to create the BigDecimal from a string: going through a f64
858+
// will lose some precision.
859+
785860
use super::format_float_hexadecimal;
786-
let f = |x| format_float_hexadecimal(x, 6, Case::Lowercase, ForceDecimal::No);
787-
// TODO(#7364): These values do not match coreutils output, but are possible correct representations.
788-
assert_eq!(f(0.00001), "0x1.4f8b588e368f1p-17");
789-
assert_eq!(f(0.125), "0x1.0000000000000p-3");
790-
assert_eq!(f(256.0), "0x1.0000000000000p+8");
791-
assert_eq!(f(65536.0), "0x1.0000000000000p+16");
792-
793-
let f = |x| format_float_hexadecimal(x, 0, Case::Lowercase, ForceDecimal::No);
794-
assert_eq!(f(0.125), "0x1p-3");
795-
assert_eq!(f(256.0), "0x1p+8");
796-
797-
let f = |x| format_float_hexadecimal(x, 0, Case::Lowercase, ForceDecimal::Yes);
798-
assert_eq!(f(0.125), "0x1.p-3");
799-
assert_eq!(f(256.0), "0x1.p+8");
861+
let f = |x| {
862+
format_float_hexadecimal(
863+
&BigDecimal::from_str(x).unwrap(),
864+
6,
865+
Case::Lowercase,
866+
ForceDecimal::No,
867+
)
868+
};
869+
assert_eq!(f("0"), "0x0.000000p+0");
870+
assert_eq!(f("0.00001"), "0xa.7c5ac4p-20");
871+
assert_eq!(f("0.125"), "0x8.000000p-6");
872+
assert_eq!(f("256.0"), "0x8.000000p+5");
873+
assert_eq!(f("65536.0"), "0x8.000000p+13");
874+
assert_eq!(f("1.9999999999"), "0x1.000000p+1"); // Corner case: leading hex digit only contains 1 binary digit
875+
876+
let f = |x| {
877+
format_float_hexadecimal(
878+
&BigDecimal::from_str(x).unwrap(),
879+
0,
880+
Case::Lowercase,
881+
ForceDecimal::No,
882+
)
883+
};
884+
assert_eq!(f("0"), "0x0p+0");
885+
assert_eq!(f("0.125"), "0x8p-6");
886+
assert_eq!(f("256.0"), "0x8p+5");
887+
888+
let f = |x| {
889+
format_float_hexadecimal(
890+
&BigDecimal::from_str(x).unwrap(),
891+
0,
892+
Case::Lowercase,
893+
ForceDecimal::Yes,
894+
)
895+
};
896+
assert_eq!(f("0"), "0x0.p+0");
897+
assert_eq!(f("0.125"), "0x8.p-6");
898+
assert_eq!(f("256.0"), "0x8.p+5");
800899
}
801900

802901
#[test]

0 commit comments

Comments
 (0)