Skip to content

Commit 83679a3

Browse files
authored
Merge pull request #7790 from drinkcat/num_parser_simplify
uucore: num_parser: Operate on slices, instead of iterator
2 parents aacd6d4 + ddbd995 commit 83679a3

File tree

1 file changed

+114
-94
lines changed

1 file changed

+114
-94
lines changed

src/uucore/src/lib/features/parser/num_parser.rs

Lines changed: 114 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
//! Utilities for parsing numbers in various formats
77
8-
// spell-checker:ignore powf copysign prec inity infinit bigdecimal extendedbigdecimal biguint underflowed
8+
// spell-checker:ignore powf copysign prec inity infinit infs bigdecimal extendedbigdecimal biguint underflowed
99

1010
use bigdecimal::{
1111
BigDecimal, Context,
@@ -35,7 +35,7 @@ enum Base {
3535

3636
impl Base {
3737
/// Return the digit value of a character in the given base
38-
pub fn digit(&self, c: char) -> Option<u64> {
38+
fn digit(&self, c: char) -> Option<u64> {
3939
fn from_decimal(c: char) -> u64 {
4040
u64::from(c) - u64::from('0')
4141
}
@@ -50,6 +50,34 @@ impl Base {
5050
},
5151
}
5252
}
53+
54+
/// Greedily parse as many digits as possible from the string
55+
/// Returns parsed digits (if any), and the rest of the string.
56+
fn parse_digits<'a>(&self, str: &'a str) -> (Option<BigUint>, &'a str) {
57+
let (digits, _, rest) = self.parse_digits_count(str, None);
58+
(digits, rest)
59+
}
60+
61+
/// Greedily parse as many digits as possible from the string, adding to already parsed digits.
62+
/// This is meant to be used (directly) for the part after a decimal point.
63+
/// Returns parsed digits (if any), the number of parsed digits, and the rest of the string.
64+
fn parse_digits_count<'a>(
65+
&self,
66+
str: &'a str,
67+
digits: Option<BigUint>,
68+
) -> (Option<BigUint>, u64, &'a str) {
69+
let mut digits: Option<BigUint> = digits;
70+
let mut count: u64 = 0;
71+
let mut rest = str;
72+
while let Some(d) = rest.chars().next().and_then(|c| self.digit(c)) {
73+
(digits, count) = (
74+
Some(digits.unwrap_or_default() * *self as u8 + d),
75+
count + 1,
76+
);
77+
rest = &rest[1..];
78+
}
79+
(digits, count, rest)
80+
}
5381
}
5482

5583
/// Type returned if a number could not be parsed in its entirety
@@ -235,10 +263,69 @@ impl ExtendedParser for ExtendedBigDecimal {
235263
}
236264
}
237265

266+
fn parse_digits(base: Base, str: &str, fractional: bool) -> (Option<BigUint>, u64, &str) {
267+
// Parse the integral part of the number
268+
let (digits, rest) = base.parse_digits(str);
269+
270+
// If allowed, parse the fractional part of the number if there can be one and the
271+
// input contains a '.' decimal separator.
272+
if fractional {
273+
if let Some(rest) = rest.strip_prefix('.') {
274+
return base.parse_digits_count(rest, digits);
275+
}
276+
}
277+
278+
(digits, 0, rest)
279+
}
280+
281+
fn parse_exponent(base: Base, str: &str) -> (Option<BigInt>, &str) {
282+
let exp_chars = match base {
283+
Base::Decimal => ['e', 'E'],
284+
Base::Hexadecimal => ['p', 'P'],
285+
_ => unreachable!(),
286+
};
287+
288+
// Parse the exponent part, only decimal numbers are allowed.
289+
// We only update `rest` if an exponent is actually parsed.
290+
if let Some(rest) = str.strip_prefix(exp_chars) {
291+
let (sign, rest) = if let Some(rest) = rest.strip_prefix('-') {
292+
(Sign::Minus, rest)
293+
} else if let Some(rest) = rest.strip_prefix('+') {
294+
(Sign::Plus, rest)
295+
} else {
296+
// Something else, or nothing at all: keep going.
297+
(Sign::Plus, rest) // No explicit sign is equivalent to `+`.
298+
};
299+
300+
let (exp_uint, rest) = Base::Decimal.parse_digits(rest);
301+
if let Some(exp_uint) = exp_uint {
302+
return (Some(BigInt::from_biguint(sign, exp_uint)), rest);
303+
}
304+
}
305+
306+
// Nothing parsed
307+
(None, str)
308+
}
309+
310+
// Parse a multiplier from allowed suffixes (e.g. s/m/h).
311+
fn parse_suffix_multiplier<'a>(str: &'a str, allowed_suffixes: &[(char, u32)]) -> (u32, &'a str) {
312+
if let Some(ch) = str.chars().next() {
313+
if let Some(mul) = allowed_suffixes
314+
.iter()
315+
.find_map(|(c, t)| (ch == *c).then_some(*t))
316+
{
317+
return (mul, &str[1..]);
318+
}
319+
}
320+
321+
// No suffix, just return 1 and intact string
322+
(1, str)
323+
}
324+
238325
fn parse_special_value<'a>(
239326
input: &'a str,
240327
negative: bool,
241-
allowed_suffixes: &'a [(char, u32)],
328+
allowed_suffixes: &[(char, u32)],
242329
) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
243330
let input_lc = input.to_ascii_lowercase();
244331

@@ -255,21 +342,14 @@ fn parse_special_value<'a>(
255342
if negative {
256343
special = -special;
257344
}
258-
let mut match_len = str.len();
259-
if let Some(ch) = input.chars().nth(str.chars().count()) {
260-
if allowed_suffixes.iter().any(|(c, _)| ch == *c) {
261-
// multiplying is unnecessary for these special values, but we have to note that
262-
// we processed the character to avoid a partial match error
263-
match_len += 1;
264-
}
265-
}
266-
return if input.len() == match_len {
345+
346+
// "infs" is a valid duration, so parse suffix multiplier in the original input string, but ignore the multiplier.
347+
let (_, rest) = parse_suffix_multiplier(&input[str.len()..], allowed_suffixes);
348+
349+
return if rest.is_empty() {
267350
Ok(special)
268351
} else {
269-
Err(ExtendedParserError::PartialMatch(
270-
special,
271-
&input[match_len..],
272-
))
352+
Err(ExtendedParserError::PartialMatch(special, rest))
273353
};
274354
}
275355
}
@@ -396,13 +476,10 @@ pub(crate) enum ParseTarget {
396476
Duration,
397477
}
398478

399-
// TODO: As highlighted by clippy, this function _is_ high cognitive complexity, jumps
400-
// around between integer and float parsing, and should be split in multiple parts.
401-
#[allow(clippy::cognitive_complexity)]
402479
pub(crate) fn parse<'a>(
403480
input: &'a str,
404481
target: ParseTarget,
405-
allowed_suffixes: &'a [(char, u32)],
482+
allowed_suffixes: &[(char, u32)],
406483
) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
407484
// Parse the " and ' prefixes separately
408485
if target != ParseTarget::Duration {
@@ -451,78 +528,30 @@ pub(crate) fn parse<'a>(
451528
(Base::Decimal, unsigned)
452529
};
453530

454-
// Parse the integral part of the number
455-
let mut chars = rest.chars().enumerate().fuse().peekable();
456-
let mut digits: Option<BigUint> = None;
457-
let mut scale = 0u64;
458-
let mut exponent: Option<BigInt> = None;
459-
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
460-
chars.next();
461-
digits = Some(digits.unwrap_or_default() * base as u8 + d);
462-
}
463-
464-
// Parse fractional/exponent part of the number for supported bases.
465-
if matches!(base, Base::Decimal | Base::Hexadecimal) && target != ParseTarget::Integral {
466-
// Parse the fractional part of the number if there can be one and the input contains
467-
// a '.' decimal separator.
468-
if matches!(chars.peek(), Some(&(_, '.'))) {
469-
chars.next();
470-
while let Some(d) = chars.peek().and_then(|&(_, c)| base.digit(c)) {
471-
chars.next();
472-
(digits, scale) = (Some(digits.unwrap_or_default() * base as u8 + d), scale + 1);
473-
}
474-
}
531+
// We only parse fractional and exponent part of the number in base 10/16 floating point numbers.
532+
let parse_frac_exp =
533+
matches!(base, Base::Decimal | Base::Hexadecimal) && target != ParseTarget::Integral;
475534

476-
let exp_char = match base {
477-
Base::Decimal => 'e',
478-
Base::Hexadecimal => 'p',
479-
_ => unreachable!(),
480-
};
535+
// Parse the integral and fractional (if supported) part of the number
536+
let (digits, scale, rest) = parse_digits(base, rest, parse_frac_exp);
481537

482-
// Parse the exponent part, only decimal numbers are allowed.
483-
if chars
484-
.peek()
485-
.is_some_and(|&(_, c)| c.to_ascii_lowercase() == exp_char)
486-
{
487-
// Save the iterator position in case we do not parse any exponent.
488-
let save_chars = chars.clone();
489-
chars.next();
490-
let exp_negative = match chars.peek() {
491-
Some((_, '-')) => {
492-
chars.next();
493-
true
494-
}
495-
Some((_, '+')) => {
496-
chars.next();
497-
false
498-
}
499-
_ => false, // Something else, or nothing at all: keep going.
500-
};
501-
while let Some(d) = chars.peek().and_then(|&(_, c)| Base::Decimal.digit(c)) {
502-
chars.next();
503-
exponent = Some(exponent.unwrap_or_default() * 10 + d as i64);
504-
}
505-
if let Some(exp) = &exponent {
506-
if exp_negative {
507-
exponent = Some(-exp);
508-
}
509-
} else {
510-
// No exponent actually parsed, reset iterator to return partial match.
511-
chars = save_chars;
512-
}
513-
}
514-
}
538+
// Parse exponent part of the number for supported bases.
539+
let (exponent, rest) = if parse_frac_exp {
540+
parse_exponent(base, rest)
541+
} else {
542+
(None, rest)
543+
};
515544

516545
// If no digit has been parsed, check if this is a special value, or declare the parsing unsuccessful
517546
if digits.is_none() {
518547
// If we trimmed an initial `0x`/`0b`, return a partial match.
519-
if rest != unsigned {
548+
if let Some(partial) = unsigned.strip_prefix("0") {
520549
let ebd = if negative {
521550
ExtendedBigDecimal::MinusZero
522551
} else {
523552
ExtendedBigDecimal::zero()
524553
};
525-
return Err(ExtendedParserError::PartialMatch(ebd, &unsigned[1..]));
554+
return Err(ExtendedParserError::PartialMatch(ebd, partial));
526555
}
527556

528557
return if target == ParseTarget::Integral {
@@ -532,28 +561,19 @@ pub(crate) fn parse<'a>(
532561
};
533562
}
534563

535-
let mut digits = digits.unwrap();
564+
let (mul, rest) = parse_suffix_multiplier(rest, allowed_suffixes);
536565

537-
if let Some((_, ch)) = chars.peek() {
538-
if let Some(times) = allowed_suffixes
539-
.iter()
540-
.find(|(c, _)| ch == c)
541-
.map(|&(_, t)| t)
542-
{
543-
chars.next();
544-
digits *= times;
545-
}
546-
}
566+
let digits = digits.unwrap() * mul;
547567

548568
let ebd_result =
549569
construct_extended_big_decimal(digits, negative, base, scale, exponent.unwrap_or_default());
550570

551571
// Return what has been parsed so far. If there are extra characters, mark the
552572
// parsing as a partial match.
553-
if let Some((first_unparsed, _)) = chars.next() {
573+
if !rest.is_empty() {
554574
Err(ExtendedParserError::PartialMatch(
555575
ebd_result.unwrap_or_else(|e| e.extract()),
556-
&rest[first_unparsed..],
576+
rest,
557577
))
558578
} else {
559579
ebd_result

0 commit comments

Comments
 (0)