Skip to content

Commit cd02d42

Browse files
committed
uucore: format: num_parser: Parse exponent part of floating point numbers
Parse numbers like 123.15e15 and 0xfp-2, and add some tests for that. `parse` is becoming more and more of a monster: we should consider splitting it into multiple parts.
1 parent df80d00 commit cd02d42

File tree

1 file changed

+89
-14
lines changed

1 file changed

+89
-14
lines changed

src/uucore/src/lib/features/format/num_parser.rs

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ fn parse_special_value(
210210
}
211211
}
212212

213+
// TODO: As highlighted by clippy, this function _is_ high cognitive complexity, jumps
214+
// around between integer and float parsing, and should be split in multiple parts.
213215
#[allow(clippy::cognitive_complexity)]
214216
fn parse(
215217
input: &str,
@@ -265,21 +267,51 @@ fn parse(
265267
let mut chars = rest.chars().enumerate().fuse().peekable();
266268
let mut digits = BigUint::zero();
267269
let mut scale = 0i64;
270+
let mut exponent = 0i64;
268271
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
269272
chars.next();
270273
digits = digits * base as u8 + d;
271274
}
272275

273-
// Parse the fractional part of the number if there can be one and the input contains
274-
// a '.' decimal separator.
275-
if matches!(chars.peek(), Some(&(_, '.')))
276-
&& matches!(base, Base::Decimal | Base::Hexadecimal)
277-
&& !integral_only
278-
{
279-
chars.next();
280-
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
276+
// Parse fractional/exponent part of the number for supported bases.
277+
if matches!(base, Base::Decimal | Base::Hexadecimal) && !integral_only {
278+
// Parse the fractional part of the number if there can be one and the input contains
279+
// a '.' decimal separator.
280+
if matches!(chars.peek(), Some(&(_, '.'))) {
281+
chars.next();
282+
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
283+
chars.next();
284+
(digits, scale) = (digits * base as u8 + d, scale + 1);
285+
}
286+
}
287+
288+
let exp_char = match base {
289+
Base::Decimal => 'e',
290+
Base::Hexadecimal => 'p',
291+
_ => unreachable!(),
292+
};
293+
294+
// Parse the exponent part, only decimal numbers are allowed.
295+
if chars.peek().is_some_and(|&(_, c)| c == exp_char) {
281296
chars.next();
282-
(digits, scale) = (digits * base as u8 + d, scale + 1);
297+
let exp_negative = match chars.peek() {
298+
Some((_, '-')) => {
299+
chars.next();
300+
true
301+
}
302+
Some((_, '+')) => {
303+
chars.next();
304+
false
305+
}
306+
_ => false, // Something else, or nothing at all: keep going.
307+
};
308+
while let Some(d) = chars.peek().and_then(|&(_, c)| Base::Decimal.digit(c)) {
309+
chars.next();
310+
exponent = exponent * 10 + d as i64;
311+
}
312+
if exp_negative {
313+
exponent = -exponent;
314+
}
283315
}
284316
}
285317

@@ -298,14 +330,23 @@ fn parse(
298330
} else {
299331
let sign = if negative { Sign::Minus } else { Sign::Plus };
300332
let signed_digits = BigInt::from_biguint(sign, digits);
301-
let bd = if scale == 0 {
333+
let bd = if scale == 0 && exponent == 0 {
302334
BigDecimal::from_bigint(signed_digits, 0)
303335
} else if base == Base::Decimal {
304-
BigDecimal::from_bigint(signed_digits, scale)
336+
BigDecimal::from_bigint(signed_digits, scale - exponent)
337+
} else if base == Base::Hexadecimal {
338+
// Base is 16, init at scale 0 then divide by base**scale.
339+
let bd = BigDecimal::from_bigint(signed_digits, 0)
340+
/ BigDecimal::from_bigint(BigInt::from(16).pow(scale as u32), 0);
341+
// Confusingly, exponent is in base 2 for hex floating point numbers.
342+
if exponent >= 0 {
343+
bd * 2u64.pow(exponent as u32)
344+
} else {
345+
bd / 2u64.pow(-exponent as u32)
346+
}
305347
} else {
306-
// Base is not 10, init at scale 0 then divide by base**scale.
307-
BigDecimal::from_bigint(signed_digits, 0)
308-
/ BigDecimal::from_bigint(BigInt::from(base as u32).pow(scale as u32), 0)
348+
// scale != 0, which means that integral_only is not set, so only base 10 and 16 are allowed.
349+
unreachable!();
309350
};
310351
ExtendedBigDecimal::BigDecimal(bd)
311352
};
@@ -348,6 +389,10 @@ mod tests {
348389
u64::extended_parse("123.15"),
349390
Err(ExtendedParserError::PartialMatch(123, ".15"))
350391
));
392+
assert!(matches!(
393+
u64::extended_parse("123e10"),
394+
Err(ExtendedParserError::PartialMatch(123, "e10"))
395+
));
351396
}
352397

353398
#[test]
@@ -369,6 +414,10 @@ mod tests {
369414
i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)),
370415
Err(ExtendedParserError::Overflow)
371416
));
417+
assert!(matches!(
418+
i64::extended_parse("-123e10"),
419+
Err(ExtendedParserError::PartialMatch(-123, "e10"))
420+
));
372421
}
373422

374423
#[test]
@@ -395,12 +444,18 @@ mod tests {
395444
assert_eq!(Ok(123.15), f64::extended_parse("0123.15"));
396445
assert_eq!(Ok(123.15), f64::extended_parse("+0123.15"));
397446
assert_eq!(Ok(-123.15), f64::extended_parse("-0123.15"));
447+
assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e5"));
448+
assert_eq!(Ok(-12315000.0), f64::extended_parse("-123.15e5"));
449+
assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e+5"));
450+
assert_eq!(Ok(0.0012315), f64::extended_parse("123.15e-5"));
398451
assert_eq!(
399452
Ok(0.15),
400453
f64::extended_parse(".150000000000000000000000000231313")
401454
);
402455
assert!(matches!(f64::extended_parse("1.2.3"),
403456
Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2));
457+
assert!(matches!(f64::extended_parse("123.15p5"),
458+
Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15));
404459
// Minus zero. 0.0 == -0.0 so we explicitly check the sign.
405460
assert_eq!(Ok(0.0), f64::extended_parse("-0.0"));
406461
assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative());
@@ -442,6 +497,20 @@ mod tests {
442497
)),
443498
ExtendedBigDecimal::extended_parse("123.15")
444499
);
500+
assert_eq!(
501+
Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint(
502+
12315.into(),
503+
-98
504+
))),
505+
ExtendedBigDecimal::extended_parse("123.15e100")
506+
);
507+
assert_eq!(
508+
Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint(
509+
12315.into(),
510+
102
511+
))),
512+
ExtendedBigDecimal::extended_parse("123.15e-100")
513+
);
445514
// Very high precision that would not fit in a f64.
446515
assert_eq!(
447516
Ok(ExtendedBigDecimal::BigDecimal(
@@ -486,6 +555,12 @@ mod tests {
486555
assert_eq!(Ok(0.5), f64::extended_parse("0x.8"));
487556
assert_eq!(Ok(0.0625), f64::extended_parse("0x.1"));
488557
assert_eq!(Ok(15.007_812_5), f64::extended_parse("0xf.02"));
558+
assert_eq!(Ok(16.0), f64::extended_parse("0x0.8p5"));
559+
assert_eq!(Ok(0.0625), f64::extended_parse("0x1p-4"));
560+
561+
// We cannot really check that 'e' is not a valid exponent indicator for hex floats...
562+
// but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3
563+
assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5"));
489564

490565
assert_eq!(
491566
Ok(ExtendedBigDecimal::BigDecimal(

0 commit comments

Comments
 (0)