Skip to content

Commit bd68eb8

Browse files
committed
uucore: format: num_parser: Parse exponent part of floating point numbers
Parse numbers like 123.15e15 and 0xfp-2, and add some tests for that. `parse` is becoming more and more of a monster: we should consider splitting it into multiple parts. Fixes #7474.
1 parent 55773e9 commit bd68eb8

File tree

1 file changed

+89
-14
lines changed

1 file changed

+89
-14
lines changed

src/uucore/src/lib/features/format/num_parser.rs

Lines changed: 89 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,8 @@ fn parse_special_value(
210210
}
211211
}
212212

213+
// TODO: As highlighted by clippy, this function _is_ high cognitive complexity, jumps
214+
// around between integer and float parsing, and should be split in multiple parts.
213215
#[allow(clippy::cognitive_complexity)]
214216
fn parse(
215217
input: &str,
@@ -267,21 +269,51 @@ fn parse(
267269
let mut chars = rest.chars().enumerate().fuse().peekable();
268270
let mut digits = BigUint::zero();
269271
let mut scale = 0i64;
272+
let mut exponent = 0i64;
270273
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
271274
chars.next();
272275
digits = digits * base as u8 + d;
273276
}
274277

275-
// Parse the fractional part of the number if there can be one and the input contains
276-
// a '.' decimal separator.
277-
if matches!(chars.peek(), Some(&(_, '.')))
278-
&& matches!(base, Base::Decimal | Base::Hexadecimal)
279-
&& !integral_only
280-
{
281-
chars.next();
282-
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
278+
// Parse fractional/exponent part of the number for supported bases.
279+
if matches!(base, Base::Decimal | Base::Hexadecimal) && !integral_only {
280+
// Parse the fractional part of the number if there can be one and the input contains
281+
// a '.' decimal separator.
282+
if matches!(chars.peek(), Some(&(_, '.'))) {
283+
chars.next();
284+
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
285+
chars.next();
286+
(digits, scale) = (digits * base as u8 + d, scale + 1);
287+
}
288+
}
289+
290+
let exp_char = match base {
291+
Base::Decimal => 'e',
292+
Base::Hexadecimal => 'p',
293+
_ => unreachable!(),
294+
};
295+
296+
// Parse the exponent part, only decimal numbers are allowed.
297+
if chars.peek().is_some_and(|&(_, c)| c == exp_char) {
283298
chars.next();
284-
(digits, scale) = (digits * base as u8 + d, scale + 1);
299+
let exp_negative = match chars.peek() {
300+
Some((_, '-')) => {
301+
chars.next();
302+
true
303+
}
304+
Some((_, '+')) => {
305+
chars.next();
306+
false
307+
}
308+
_ => false, // Something else, or nothing at all: keep going.
309+
};
310+
while let Some(d) = chars.peek().and_then(|&(_, c)| Base::Decimal.digit(c)) {
311+
chars.next();
312+
exponent = exponent * 10 + d as i64;
313+
}
314+
if exp_negative {
315+
exponent = -exponent;
316+
}
285317
}
286318
}
287319

@@ -300,14 +332,23 @@ fn parse(
300332
} else {
301333
let sign = if negative { Sign::Minus } else { Sign::Plus };
302334
let signed_digits = BigInt::from_biguint(sign, digits);
303-
let bd = if scale == 0 {
335+
let bd = if scale == 0 && exponent == 0 {
304336
BigDecimal::from_bigint(signed_digits, 0)
305337
} else if base == Base::Decimal {
306-
BigDecimal::from_bigint(signed_digits, scale)
338+
BigDecimal::from_bigint(signed_digits, scale - exponent)
339+
} else if base == Base::Hexadecimal {
340+
// Base is 16, init at scale 0 then divide by base**scale.
341+
let bd = BigDecimal::from_bigint(signed_digits, 0)
342+
/ BigDecimal::from_bigint(BigInt::from(16).pow(scale as u32), 0);
343+
// Confusingly, exponent is in base 2 for hex floating point numbers.
344+
if exponent >= 0 {
345+
bd * 2u64.pow(exponent as u32)
346+
} else {
347+
bd / 2u64.pow(-exponent as u32)
348+
}
307349
} else {
308-
// Base is not 10, init at scale 0 then divide by base**scale.
309-
BigDecimal::from_bigint(signed_digits, 0)
310-
/ BigDecimal::from_bigint(BigInt::from(base as u32).pow(scale as u32), 0)
350+
// scale != 0, which means that integral_only is not set, so only base 10 and 16 are allowed.
351+
unreachable!();
311352
};
312353
ExtendedBigDecimal::BigDecimal(bd)
313354
};
@@ -350,6 +391,10 @@ mod tests {
350391
u64::extended_parse("123.15"),
351392
Err(ExtendedParserError::PartialMatch(123, ".15"))
352393
));
394+
assert!(matches!(
395+
u64::extended_parse("123e10"),
396+
Err(ExtendedParserError::PartialMatch(123, "e10"))
397+
));
353398
}
354399

355400
#[test]
@@ -371,6 +416,10 @@ mod tests {
371416
i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)),
372417
Err(ExtendedParserError::Overflow)
373418
));
419+
assert!(matches!(
420+
i64::extended_parse("-123e10"),
421+
Err(ExtendedParserError::PartialMatch(-123, "e10"))
422+
));
374423
}
375424

376425
#[test]
@@ -397,12 +446,18 @@ mod tests {
397446
assert_eq!(Ok(123.15), f64::extended_parse("0123.15"));
398447
assert_eq!(Ok(123.15), f64::extended_parse("+0123.15"));
399448
assert_eq!(Ok(-123.15), f64::extended_parse("-0123.15"));
449+
assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e5"));
450+
assert_eq!(Ok(-12315000.0), f64::extended_parse("-123.15e5"));
451+
assert_eq!(Ok(12315000.0), f64::extended_parse("123.15e+5"));
452+
assert_eq!(Ok(0.0012315), f64::extended_parse("123.15e-5"));
400453
assert_eq!(
401454
Ok(0.15),
402455
f64::extended_parse(".150000000000000000000000000231313")
403456
);
404457
assert!(matches!(f64::extended_parse("1.2.3"),
405458
Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2));
459+
assert!(matches!(f64::extended_parse("123.15p5"),
460+
Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15));
406461
// Minus zero. 0.0 == -0.0 so we explicitly check the sign.
407462
assert_eq!(Ok(0.0), f64::extended_parse("-0.0"));
408463
assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative());
@@ -444,6 +499,20 @@ mod tests {
444499
)),
445500
ExtendedBigDecimal::extended_parse("123.15")
446501
);
502+
assert_eq!(
503+
Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint(
504+
12315.into(),
505+
-98
506+
))),
507+
ExtendedBigDecimal::extended_parse("123.15e100")
508+
);
509+
assert_eq!(
510+
Ok(ExtendedBigDecimal::BigDecimal(BigDecimal::from_bigint(
511+
12315.into(),
512+
102
513+
))),
514+
ExtendedBigDecimal::extended_parse("123.15e-100")
515+
);
447516
// Very high precision that would not fit in a f64.
448517
assert_eq!(
449518
Ok(ExtendedBigDecimal::BigDecimal(
@@ -488,6 +557,12 @@ mod tests {
488557
assert_eq!(Ok(0.5), f64::extended_parse("0x.8"));
489558
assert_eq!(Ok(0.0625), f64::extended_parse("0x.1"));
490559
assert_eq!(Ok(15.007_812_5), f64::extended_parse("0xf.02"));
560+
assert_eq!(Ok(16.0), f64::extended_parse("0x0.8p5"));
561+
assert_eq!(Ok(0.0625), f64::extended_parse("0x1p-4"));
562+
563+
// We cannot really check that 'e' is not a valid exponent indicator for hex floats...
564+
// but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3
565+
assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5"));
491566

492567
assert_eq!(
493568
Ok(ExtendedBigDecimal::BigDecimal(

0 commit comments

Comments
 (0)